Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
a6e0ff7a
提交
a6e0ff7a
authored
10月 26, 2015
作者:
Arnaud Bergeron
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Use the new name for op context (which is params now).
上级
51c981b0
隐藏空白字符变更
内嵌
并排
正在显示
11 个修改的文件
包含
56 行增加
和
56 行删除
+56
-56
basic_ops.py
theano/sandbox/gpuarray/basic_ops.py
+21
-21
conv.py
theano/sandbox/gpuarray/conv.py
+1
-1
dnn.py
theano/sandbox/gpuarray/dnn.py
+2
-2
dnn_base.c
theano/sandbox/gpuarray/dnn_base.c
+3
-3
elemwise.py
theano/sandbox/gpuarray/elemwise.py
+10
-10
gemm16.c
theano/sandbox/gpuarray/gemm16.c
+1
-1
neighbours.py
theano/sandbox/gpuarray/neighbours.py
+2
-2
nerv.py
theano/sandbox/gpuarray/nerv.py
+3
-3
nnet.py
theano/sandbox/gpuarray/nnet.py
+8
-8
subtensor.py
theano/sandbox/gpuarray/subtensor.py
+3
-3
rng_mrg.py
theano/sandbox/rng_mrg.py
+2
-2
没有找到文件。
theano/sandbox/gpuarray/basic_ops.py
浏览文件 @
a6e0ff7a
...
@@ -169,7 +169,7 @@ class Kernel(object):
...
@@ -169,7 +169,7 @@ class Kernel(object):
class
GpuKernelBase
(
object
):
class
GpuKernelBase
(
object
):
context
_type
=
gpu_context_type
params
_type
=
gpu_context_type
def
gpu_kernels
(
self
,
node
,
name
):
def
gpu_kernels
(
self
,
node
,
name
):
"""
"""
...
@@ -214,7 +214,7 @@ class GpuKernelBase(object):
...
@@ -214,7 +214,7 @@ class GpuKernelBase(object):
def
c_support_code_apply
(
self
,
node
,
name
):
def
c_support_code_apply
(
self
,
node
,
name
):
kernels
=
self
.
gpu_kernels
(
node
,
name
)
kernels
=
self
.
gpu_kernels
(
node
,
name
)
ctx
=
self
.
get_
context
(
node
)
ctx
=
self
.
get_
params
(
node
)
bins
=
'
\n
'
.
join
(
self
.
_generate_kernel_bin
(
k
,
ctx
)
for
k
in
kernels
)
bins
=
'
\n
'
.
join
(
self
.
_generate_kernel_bin
(
k
,
ctx
)
for
k
in
kernels
)
codes
=
'
\n
'
.
join
(
self
.
_generate_kernel_code
(
k
)
for
k
in
kernels
)
codes
=
'
\n
'
.
join
(
self
.
_generate_kernel_code
(
k
)
for
k
in
kernels
)
return
'
\n
'
.
join
([
bins
,
codes
])
return
'
\n
'
.
join
([
bins
,
codes
])
...
@@ -248,7 +248,7 @@ class GpuKernelBase(object):
...
@@ -248,7 +248,7 @@ class GpuKernelBase(object):
flags
=
k
.
_get_c_flags
(),
fail
=
fail
,
ctx
=
ctx
)
flags
=
k
.
_get_c_flags
(),
fail
=
fail
,
ctx
=
ctx
)
def
c_init_code_struct
(
self
,
node
,
name
,
sub
):
def
c_init_code_struct
(
self
,
node
,
name
,
sub
):
ctx
=
sub
[
'
context
'
]
ctx
=
sub
[
'
params
'
]
kernels
=
self
.
gpu_kernels
(
node
,
name
)
kernels
=
self
.
gpu_kernels
(
node
,
name
)
inits_0
=
'
\n
'
.
join
(
self
.
_generate_zeros
(
k
)
for
k
in
kernels
)
inits_0
=
'
\n
'
.
join
(
self
.
_generate_zeros
(
k
)
for
k
in
kernels
)
inits
=
'
\n
'
.
join
(
self
.
_generate_kernel_init
(
k
,
sub
[
'fail'
],
ctx
)
inits
=
'
\n
'
.
join
(
self
.
_generate_kernel_init
(
k
,
sub
[
'fail'
],
ctx
)
...
@@ -269,7 +269,7 @@ class GpuKernelBase(object):
...
@@ -269,7 +269,7 @@ class GpuKernelBase(object):
return
(
self
.
c_code_cache_version
(),
self
.
kernel_version
(
node
))
return
(
self
.
c_code_cache_version
(),
self
.
kernel_version
(
node
))
def
kernel_version
(
self
,
node
):
def
kernel_version
(
self
,
node
):
return
(
3
,
node
.
get_context
(
)
.
bin_id
)
return
(
3
,
self
.
get_params
(
node
)
.
bin_id
)
class
HostFromGpu
(
Op
):
class
HostFromGpu
(
Op
):
...
@@ -351,7 +351,7 @@ host_from_gpu = HostFromGpu()
...
@@ -351,7 +351,7 @@ host_from_gpu = HostFromGpu()
class
GpuFromHost
(
Op
):
class
GpuFromHost
(
Op
):
__props__
=
(
'context_name'
,)
__props__
=
(
'context_name'
,)
_f16_ok
=
True
_f16_ok
=
True
context
_type
=
gpu_context_type
params
_type
=
gpu_context_type
def
__init__
(
self
,
context_name
):
def
__init__
(
self
,
context_name
):
self
.
context_name
=
context_name
self
.
context_name
=
context_name
...
@@ -366,7 +366,7 @@ class GpuFromHost(Op):
...
@@ -366,7 +366,7 @@ class GpuFromHost(Op):
context_name
=
self
.
context_name
,
context_name
=
self
.
context_name
,
dtype
=
x
.
dtype
)()])
dtype
=
x
.
dtype
)()])
def
get_
context
(
self
,
node
):
def
get_
params
(
self
,
node
):
return
get_context
(
self
.
context_name
)
return
get_context
(
self
.
context_name
)
def
perform
(
self
,
node
,
inp
,
out
,
ctx
):
def
perform
(
self
,
node
,
inp
,
out
,
ctx
):
...
@@ -404,7 +404,7 @@ class GpuFromHost(Op):
...
@@ -404,7 +404,7 @@ class GpuFromHost(Op):
if (
%(out)
s == NULL) {
if (
%(out)
s == NULL) {
%(fail)
s
%(fail)
s
}
}
"""
%
{
'name'
:
name
,
'inp'
:
inputs
[
0
],
'ctx'
:
sub
[
'
context
'
],
"""
%
{
'name'
:
name
,
'inp'
:
inputs
[
0
],
'ctx'
:
sub
[
'
params
'
],
'out'
:
outputs
[
0
],
'fail'
:
sub
[
'fail'
]}
'out'
:
outputs
[
0
],
'fail'
:
sub
[
'fail'
]}
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
...
@@ -414,7 +414,7 @@ class GpuFromHost(Op):
...
@@ -414,7 +414,7 @@ class GpuFromHost(Op):
class
GpuToGpu
(
Op
):
class
GpuToGpu
(
Op
):
__props__
=
(
'context_name'
,)
__props__
=
(
'context_name'
,)
_f16_ok
=
True
_f16_ok
=
True
context
_type
=
gpu_context_type
params
_type
=
gpu_context_type
def
__init__
(
self
,
context_name
):
def
__init__
(
self
,
context_name
):
self
.
context_name
=
context_name
self
.
context_name
=
context_name
...
@@ -429,7 +429,7 @@ class GpuToGpu(Op):
...
@@ -429,7 +429,7 @@ class GpuToGpu(Op):
context_name
=
self
.
context_name
,
context_name
=
self
.
context_name
,
dtype
=
x
.
dtype
)()])
dtype
=
x
.
dtype
)()])
def
get_
context
(
self
,
node
):
def
get_
params
(
self
,
node
):
return
get_context
(
self
.
context_name
)
return
get_context
(
self
.
context_name
)
def
perform
(
self
,
node
,
inp
,
out
,
ctx
):
def
perform
(
self
,
node
,
inp
,
out
,
ctx
):
...
@@ -454,7 +454,7 @@ class GpuToGpu(Op):
...
@@ -454,7 +454,7 @@ class GpuToGpu(Op):
if (
%(out)
s == NULL) {
if (
%(out)
s == NULL) {
%(fail)
s
%(fail)
s
}
}
"""
%
{
'inp'
:
inputs
[
0
],
'ctx'
:
sub
[
'
context
'
],
"""
%
{
'inp'
:
inputs
[
0
],
'ctx'
:
sub
[
'
params
'
],
'out'
:
outputs
[
0
],
'fail'
:
sub
[
'fail'
]}
'out'
:
outputs
[
0
],
'fail'
:
sub
[
'fail'
]}
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
...
@@ -476,13 +476,13 @@ class GpuAlloc(HideC, Alloc):
...
@@ -476,13 +476,13 @@ class GpuAlloc(HideC, Alloc):
__props__
=
(
'memset_0'
,
'context_name'
)
__props__
=
(
'memset_0'
,
'context_name'
)
_f16_ok
=
True
_f16_ok
=
True
context
_type
=
gpu_context_type
params
_type
=
gpu_context_type
def
__init__
(
self
,
context_name
,
memset_0
=
False
):
def
__init__
(
self
,
context_name
,
memset_0
=
False
):
self
.
context_name
=
context_name
self
.
context_name
=
context_name
self
.
memset_0
=
memset_0
self
.
memset_0
=
memset_0
def
get_
context
(
self
,
node
):
def
get_
params
(
self
,
node
):
return
get_context
(
self
.
context_name
)
return
get_context
(
self
.
context_name
)
def
__str__
(
self
):
def
__str__
(
self
):
...
@@ -580,7 +580,7 @@ class GpuAlloc(HideC, Alloc):
...
@@ -580,7 +580,7 @@ class GpuAlloc(HideC, Alloc):
%(fail)
s
%(fail)
s
}
}
}
}
"""
%
dict
(
name
=
name
,
ndim
=
ndim
,
zz
=
zz
,
vv
=
vv
,
ctx
=
sub
[
'
context
'
],
"""
%
dict
(
name
=
name
,
ndim
=
ndim
,
zz
=
zz
,
vv
=
vv
,
ctx
=
sub
[
'
params
'
],
fail
=
sub
[
'fail'
],
memset_0
=
memset_0
)
fail
=
sub
[
'fail'
],
memset_0
=
memset_0
)
if
config
.
gpuarray
.
sync
:
if
config
.
gpuarray
.
sync
:
...
@@ -625,13 +625,13 @@ class GpuAlloc(HideC, Alloc):
...
@@ -625,13 +625,13 @@ class GpuAlloc(HideC, Alloc):
class
GpuAllocEmpty
(
HideC
,
Alloc
):
class
GpuAllocEmpty
(
HideC
,
Alloc
):
__props__
=
(
'dtype'
,
'context_name'
)
__props__
=
(
'dtype'
,
'context_name'
)
_f16_ok
=
True
_f16_ok
=
True
context
_type
=
gpu_context_type
params
_type
=
gpu_context_type
def
__init__
(
self
,
dtype
,
context_name
):
def
__init__
(
self
,
dtype
,
context_name
):
self
.
dtype
=
dtype
self
.
dtype
=
dtype
self
.
context_name
=
context_name
self
.
context_name
=
context_name
def
get_
context
(
self
,
node
):
def
get_
params
(
self
,
node
):
return
get_context
(
self
.
context_name
)
return
get_context
(
self
.
context_name
)
def
make_node
(
self
,
*
shape
):
def
make_node
(
self
,
*
shape
):
...
@@ -677,7 +677,7 @@ if (theano_prep_output(&%(zz)s, %(ndim)s, shape, %(type)s, GA_C_ORDER,
...
@@ -677,7 +677,7 @@ if (theano_prep_output(&%(zz)s, %(ndim)s, shape, %(type)s, GA_C_ORDER,
%(fail)
s
%(fail)
s
}
}
"""
%
dict
(
zz
=
zz
,
ndim
=
ndim
,
type
=
gpuarray
.
dtype_to_typecode
(
self
.
dtype
),
"""
%
dict
(
zz
=
zz
,
ndim
=
ndim
,
type
=
gpuarray
.
dtype_to_typecode
(
self
.
dtype
),
fail
=
fail
,
ctx
=
sub
[
'
context
'
]))
fail
=
fail
,
ctx
=
sub
[
'
params
'
]))
return
''
.
join
(
code
)
return
''
.
join
(
code
)
...
@@ -884,7 +884,7 @@ class GpuReshape(HideC, tensor.Reshape):
...
@@ -884,7 +884,7 @@ class GpuReshape(HideC, tensor.Reshape):
class
GpuJoin
(
HideC
,
Join
):
class
GpuJoin
(
HideC
,
Join
):
_f16_ok
=
True
_f16_ok
=
True
context
_type
=
gpu_context_type
params
_type
=
gpu_context_type
def
make_node
(
self
,
axis
,
*
tensors
):
def
make_node
(
self
,
axis
,
*
tensors
):
node
=
Join
.
make_node
(
self
,
axis
,
*
tensors
)
node
=
Join
.
make_node
(
self
,
axis
,
*
tensors
)
...
@@ -899,7 +899,7 @@ class GpuJoin(HideC, Join):
...
@@ -899,7 +899,7 @@ class GpuJoin(HideC, Join):
dtype
=
node
.
outputs
[
0
]
.
dtype
,
dtype
=
node
.
outputs
[
0
]
.
dtype
,
context_name
=
ctx_name
)()])
context_name
=
ctx_name
)()])
def
get_
context
(
self
,
node
):
def
get_
params
(
self
,
node
):
return
node
.
outputs
[
0
]
.
type
.
context
return
node
.
outputs
[
0
]
.
type
.
context
def
perform
(
self
,
node
,
axis_and_tensors
,
out_
,
ctx
):
def
perform
(
self
,
node
,
axis_and_tensors
,
out_
,
ctx
):
...
@@ -947,7 +947,7 @@ if (%(out)s == NULL)
...
@@ -947,7 +947,7 @@ if (%(out)s == NULL)
%(fail)
s
%(fail)
s
"""
%
dict
(
n
=
len
(
inputs
[
1
:]),
fail
=
sub
[
'fail'
],
out
=
out_
[
0
],
"""
%
dict
(
n
=
len
(
inputs
[
1
:]),
fail
=
sub
[
'fail'
],
out
=
out_
[
0
],
axis
=
inputs
[
0
],
copy_inputs_to_list
=
'
\n
'
.
join
(
copy_to_list
),
axis
=
inputs
[
0
],
copy_inputs_to_list
=
'
\n
'
.
join
(
copy_to_list
),
restype
=
restype
,
ctx
=
sub
[
'
context
'
])
restype
=
restype
,
ctx
=
sub
[
'
params
'
])
gpu_join
=
GpuJoin
()
gpu_join
=
GpuJoin
()
...
@@ -973,7 +973,7 @@ class GpuEye(GpuKernelBase, Op):
...
@@ -973,7 +973,7 @@ class GpuEye(GpuKernelBase, Op):
self
.
dtype
=
dtype
self
.
dtype
=
dtype
self
.
context_name
=
context_name
self
.
context_name
=
context_name
def
get_
context
(
self
,
node
):
def
get_
params
(
self
,
node
):
return
get_context
(
self
.
context_name
)
return
get_context
(
self
.
context_name
)
def
make_node
(
self
,
n
,
m
,
k
):
def
make_node
(
self
,
n
,
m
,
k
):
...
@@ -1018,7 +1018,7 @@ KERNEL void k(GLOBAL_MEM %(ctype)s *a, ga_size n, ga_size m) {
...
@@ -1018,7 +1018,7 @@ KERNEL void k(GLOBAL_MEM %(ctype)s *a, ga_size n, ga_size m) {
n
,
m
=
inp
n
,
m
=
inp
z
,
=
out
z
,
=
out
fail
=
sub
[
'fail'
]
fail
=
sub
[
'fail'
]
ctx
=
sub
[
'
context
'
]
ctx
=
sub
[
'
params
'
]
typecode
=
pygpu
.
gpuarray
.
dtype_to_typecode
(
self
.
dtype
)
typecode
=
pygpu
.
gpuarray
.
dtype_to_typecode
(
self
.
dtype
)
sync
=
bool
(
config
.
gpuarray
.
sync
)
sync
=
bool
(
config
.
gpuarray
.
sync
)
kname
=
self
.
gpu_kernels
(
node
,
name
)[
0
]
.
objvar
kname
=
self
.
gpu_kernels
(
node
,
name
)[
0
]
.
objvar
...
...
theano/sandbox/gpuarray/conv.py
浏览文件 @
a6e0ff7a
...
@@ -135,7 +135,7 @@ class GpuConv(GpuKernelBase, gof.Op):
...
@@ -135,7 +135,7 @@ class GpuConv(GpuKernelBase, gof.Op):
out
=
GpuArrayType
(
img
.
dtype
,
broadcastable
,
context_name
=
ctx_name
)()
out
=
GpuArrayType
(
img
.
dtype
,
broadcastable
,
context_name
=
ctx_name
)()
return
gof
.
Apply
(
self
,
[
img
,
kern
],
[
out
])
return
gof
.
Apply
(
self
,
[
img
,
kern
],
[
out
])
def
get_
context
(
self
,
node
):
def
get_
params
(
self
,
node
):
return
node
.
inputs
[
0
]
.
type
.
context
return
node
.
inputs
[
0
]
.
type
.
context
def
flops
(
self
,
inputs
,
outputs
):
def
flops
(
self
,
inputs
,
outputs
):
...
...
theano/sandbox/gpuarray/dnn.py
浏览文件 @
a6e0ff7a
...
@@ -133,9 +133,9 @@ class DnnBase(COp):
...
@@ -133,9 +133,9 @@ class DnnBase(COp):
# dnn does not know about broadcasting, so we do not need to assert
# dnn does not know about broadcasting, so we do not need to assert
# the input broadcasting pattern.
# the input broadcasting pattern.
check_broadcast
=
False
check_broadcast
=
False
context
_type
=
gpu_context_type
params
_type
=
gpu_context_type
def
get_
context
(
self
,
node
):
def
get_
params
(
self
,
node
):
return
node
.
outputs
[
0
]
.
type
.
context
return
node
.
outputs
[
0
]
.
type
.
context
def
__init__
(
self
,
files
=
None
,
c_func
=
None
):
def
__init__
(
self
,
files
=
None
,
c_func
=
None
):
...
...
theano/sandbox/gpuarray/dnn_base.c
浏览文件 @
a6e0ff7a
...
@@ -107,14 +107,14 @@ cudnnHandle_t APPLY_SPECIFIC(_handle);
...
@@ -107,14 +107,14 @@ cudnnHandle_t APPLY_SPECIFIC(_handle);
#section init_code_struct
#section init_code_struct
{
{
cuda_enter
(
CONTEXT
->
ctx
);
cuda_enter
(
PARAMS
->
ctx
);
cudnnStatus_t
err
;
cudnnStatus_t
err
;
APPLY_SPECIFIC
(
_handle
)
=
NULL
;
APPLY_SPECIFIC
(
_handle
)
=
NULL
;
if
((
err
=
cudnnCreate
(
&
APPLY_SPECIFIC
(
_handle
)))
!=
CUDNN_STATUS_SUCCESS
)
{
if
((
err
=
cudnnCreate
(
&
APPLY_SPECIFIC
(
_handle
)))
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"could not create cuDNN handle: %s"
,
PyErr_Format
(
PyExc_RuntimeError
,
"could not create cuDNN handle: %s"
,
cudnnGetErrorString
(
err
));
cudnnGetErrorString
(
err
));
cuda_exit
(
CONTEXT
->
ctx
);
cuda_exit
(
PARAMS
->
ctx
);
FAIL
;
FAIL
;
}
}
cuda_exit
(
CONTEXT
->
ctx
);
cuda_exit
(
PARAMS
->
ctx
);
}
}
theano/sandbox/gpuarray/elemwise.py
浏览文件 @
a6e0ff7a
...
@@ -101,7 +101,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
...
@@ -101,7 +101,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
return
node
return
node
def
get_
context
(
self
,
node
):
def
get_
params
(
self
,
node
):
return
node
.
inputs
[
0
]
.
type
.
context
return
node
.
inputs
[
0
]
.
type
.
context
def
generate_kernel
(
self
,
node
,
nodename
):
def
generate_kernel
(
self
,
node
,
nodename
):
...
@@ -173,7 +173,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
...
@@ -173,7 +173,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
(
"npy_float64"
,
"ga_double"
),
(
"npy_float64"
,
"ga_double"
),
]:
]:
kop
=
kop
.
replace
(
npy
,
ga
)
kop
=
kop
.
replace
(
npy
,
ga
)
return
ElemwiseKernel
(
self
.
get_
context
(
node
),
inps
+
outs
,
kop
,
return
ElemwiseKernel
(
self
.
get_
params
(
node
),
inps
+
outs
,
kop
,
preamble
=
support_code
)
preamble
=
support_code
)
def
c_headers
(
self
):
def
c_headers
(
self
):
...
@@ -222,7 +222,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
...
@@ -222,7 +222,7 @@ class GpuElemwise(GpuKernelBase, HideC, Elemwise):
fail
=
sub
[
"fail"
]
fail
=
sub
[
"fail"
]
initial_dims
=
','
.
join
(
'1'
for
i
in
xrange
(
nd
))
initial_dims
=
','
.
join
(
'1'
for
i
in
xrange
(
nd
))
opname
=
str
(
self
.
scalar_op
)
opname
=
str
(
self
.
scalar_op
)
ctx
=
sub
[
'
context
'
]
ctx
=
sub
[
'
params
'
]
# check that all inputs have valid dimensions
# check that all inputs have valid dimensions
emitted_inames
=
{}
emitted_inames
=
{}
...
@@ -650,7 +650,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
...
@@ -650,7 +650,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
ret
.
outputs
[
0
]
.
type
.
broadcastable
,
ret
.
outputs
[
0
]
.
type
.
broadcastable
,
context_name
=
x
.
type
.
context_name
)()])
context_name
=
x
.
type
.
context_name
)()])
def
get_
context
(
self
,
node
):
def
get_
params
(
self
,
node
):
return
node
.
inputs
[
0
]
.
type
.
context
return
node
.
inputs
[
0
]
.
type
.
context
def
perform
(
self
,
node
,
inp
,
out
,
ctx
):
def
perform
(
self
,
node
,
inp
,
out
,
ctx
):
...
@@ -683,7 +683,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
...
@@ -683,7 +683,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
inp
=
[
'fake_input_name_
%
d'
%
i
for
i
in
xrange
(
len
(
inputs
))]
inp
=
[
'fake_input_name_
%
d'
%
i
for
i
in
xrange
(
len
(
inputs
))]
out
=
[
'fake_output_name_
%
d'
%
i
for
i
in
xrange
(
len
(
node
.
outputs
))]
out
=
[
'fake_output_name_
%
d'
%
i
for
i
in
xrange
(
len
(
node
.
outputs
))]
sub
=
{
'fail'
:
'fake failure code'
,
'
context
'
:
'fake context'
}
sub
=
{
'fail'
:
'fake failure code'
,
'
params
'
:
'fake context'
}
try
:
try
:
self
.
c_code
(
node
,
name
,
inp
,
out
,
sub
)
self
.
c_code
(
node
,
name
,
inp
,
out
,
sub
)
...
@@ -711,7 +711,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
...
@@ -711,7 +711,7 @@ class GpuCAReduceCuda(GpuKernelBase, HideC, CAReduceDtype):
sio
=
StringIO
()
sio
=
StringIO
()
fail
=
sub
[
'fail'
]
fail
=
sub
[
'fail'
]
ctx
=
sub
[
'
context
'
]
ctx
=
sub
[
'
params
'
]
# check input
# check input
print
(
"""
print
(
"""
...
@@ -2664,7 +2664,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
...
@@ -2664,7 +2664,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
return
Apply
(
res
.
op
,
[
input
],
[
otype
()])
return
Apply
(
res
.
op
,
[
input
],
[
otype
()])
def
get_
context
(
self
,
node
):
def
get_
params
(
self
,
node
):
return
node
.
outputs
[
0
]
.
type
.
context
return
node
.
outputs
[
0
]
.
type
.
context
def
make_thunk
(
self
,
node
,
storage_map
,
compute_map
,
no_recycling
):
def
make_thunk
(
self
,
node
,
storage_map
,
compute_map
,
no_recycling
):
...
@@ -2776,7 +2776,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
...
@@ -2776,7 +2776,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
}
}
}
}
"""
%
dict
(
output
=
output
,
nd_out
=
nd_out
,
fail
=
sub
[
'fail'
],
"""
%
dict
(
output
=
output
,
nd_out
=
nd_out
,
fail
=
sub
[
'fail'
],
ctx
=
sub
[
'
context
'
],
ctx
=
sub
[
'
params
'
],
out_type
=
dtype_to_typecode
(
node
.
outputs
[
0
]
.
type
.
dtype
))
out_type
=
dtype_to_typecode
(
node
.
outputs
[
0
]
.
type
.
dtype
))
else
:
else
:
code
+=
"""
code
+=
"""
...
@@ -2788,7 +2788,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
...
@@ -2788,7 +2788,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
%(fail)
s
%(fail)
s
}
}
}
}
"""
%
dict
(
output
=
output
,
fail
=
sub
[
'fail'
],
ctx
=
sub
[
'
context
'
],
"""
%
dict
(
output
=
output
,
fail
=
sub
[
'fail'
],
ctx
=
sub
[
'
params
'
],
out_type
=
dtype_to_typecode
(
node
.
outputs
[
0
]
.
type
.
dtype
))
out_type
=
dtype_to_typecode
(
node
.
outputs
[
0
]
.
type
.
dtype
))
if
acc_dtype
!=
node
.
outputs
[
0
]
.
type
.
dtype
:
if
acc_dtype
!=
node
.
outputs
[
0
]
.
type
.
dtype
:
...
@@ -2796,7 +2796,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
...
@@ -2796,7 +2796,7 @@ class GpuCAReduceCPY(GpuKernelBase, HideC, CAReduceDtype):
tmp = pygpu_empty(
%(output)
s->ga.nd,
%(output)
s->ga.dimensions,
tmp = pygpu_empty(
%(output)
s->ga.nd,
%(output)
s->ga.dimensions,
%(acc_type)
s, GA_C_ORDER,
%(ctx)
s, Py_None);
%(acc_type)
s, GA_C_ORDER,
%(ctx)
s, Py_None);
if (!tmp)
%(fail)
s
if (!tmp)
%(fail)
s
"""
%
dict
(
output
=
output
,
fail
=
sub
[
'fail'
],
ctx
=
sub
[
'
context
'
],
"""
%
dict
(
output
=
output
,
fail
=
sub
[
'fail'
],
ctx
=
sub
[
'
params
'
],
acc_type
=
dtype_to_typecode
(
acc_dtype
))
acc_type
=
dtype_to_typecode
(
acc_dtype
))
else
:
else
:
code
+=
"""
code
+=
"""
...
...
theano/sandbox/gpuarray/gemm16.c
浏览文件 @
a6e0ff7a
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
/* Why do we need this? */
/* Why do we need this? */
size_t
dim
=
2048
*
32
;
size_t
dim
=
2048
*
32
;
rand_buf
=
pygpu_empty
(
1
,
&
dim
,
GA_UINT
,
GA_C_ORDER
,
CONTEXT
,
rand_buf
=
pygpu_empty
(
1
,
&
dim
,
GA_UINT
,
GA_C_ORDER
,
PARAMS
,
Py_None
);
Py_None
);
if
(
rand_buf
==
NULL
)
{
if
(
rand_buf
==
NULL
)
{
FAIL
;
FAIL
;
...
...
theano/sandbox/gpuarray/neighbours.py
浏览文件 @
a6e0ff7a
...
@@ -41,7 +41,7 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
...
@@ -41,7 +41,7 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
dtype
=
ten4
.
type
.
dtype
,
dtype
=
ten4
.
type
.
dtype
,
context_name
=
ten4
.
type
.
context_name
)()])
context_name
=
ten4
.
type
.
context_name
)()])
def
get_
context
(
self
,
node
):
def
get_
params
(
self
,
node
):
return
node
.
inputs
[
0
]
.
type
.
context
return
node
.
inputs
[
0
]
.
type
.
context
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
...
@@ -250,7 +250,7 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
...
@@ -250,7 +250,7 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
ten4
,
neib_shape
,
neib_step
=
inp
ten4
,
neib_shape
,
neib_step
=
inp
z
,
=
out
z
,
=
out
fail
=
sub
[
'fail'
]
fail
=
sub
[
'fail'
]
ctx
=
sub
[
'
context
'
]
ctx
=
sub
[
'
params
'
]
mode
=
self
.
mode
mode
=
self
.
mode
err_check
=
"""
err_check
=
"""
if (err != GA_NO_ERROR) {
if (err != GA_NO_ERROR) {
...
...
theano/sandbox/gpuarray/nerv.py
浏览文件 @
a6e0ff7a
...
@@ -43,7 +43,7 @@ def ensure_float(val, name):
...
@@ -43,7 +43,7 @@ def ensure_float(val, name):
class
Gemm16
(
COp
):
class
Gemm16
(
COp
):
__props__
=
(
'relu'
,
'inplace'
)
__props__
=
(
'relu'
,
'inplace'
)
_f16_ok
=
True
_f16_ok
=
True
context
_type
=
gpu_context_type
params
_type
=
gpu_context_type
KERN_NAMES
=
(
'nn_128x128'
,
'nn_128x64'
,
'nn_128x32'
,
KERN_NAMES
=
(
'nn_128x128'
,
'nn_128x64'
,
'nn_128x32'
,
'nn_vec_128x128'
,
'nn_vec_128x64'
,
'nn_vec_128x32'
,
'nn_vec_128x128'
,
'nn_vec_128x64'
,
'nn_vec_128x32'
,
'tn_128x128'
,
'tn_128x64'
,
'tn_128x32'
,
'tn_128x128'
,
'tn_128x64'
,
'tn_128x32'
,
...
@@ -75,7 +75,7 @@ class Gemm16(COp):
...
@@ -75,7 +75,7 @@ class Gemm16(COp):
return
Apply
(
self
,
[
C
,
alpha
,
A
,
B
,
beta
],
[
C
.
type
()])
return
Apply
(
self
,
[
C
,
alpha
,
A
,
B
,
beta
],
[
C
.
type
()])
def
get_
context
(
self
,
node
):
def
get_
params
(
self
,
node
):
return
node
.
inputs
[
0
]
.
type
.
context
return
node
.
inputs
[
0
]
.
type
.
context
def
c_headers
(
self
):
def
c_headers
(
self
):
...
@@ -128,7 +128,7 @@ if (GpuKernel_init(&k_%(name)s, c->ops, c->ctx, 1, &bcode, &sz,
...
@@ -128,7 +128,7 @@ if (GpuKernel_init(&k_%(name)s, c->ops, c->ctx, 1, &bcode, &sz,
codel
.
append
(
"memset(&k_{0}, 0, sizeof(GpuKernel));"
.
format
(
name
))
codel
.
append
(
"memset(&k_{0}, 0, sizeof(GpuKernel));"
.
format
(
name
))
codel
.
append
(
"const char *bcode;"
)
codel
.
append
(
"const char *bcode;"
)
codel
.
append
(
"size_t sz;"
)
codel
.
append
(
"size_t sz;"
)
codel
.
append
(
"PyGpuContextObject *c =
%
s;"
%
(
sub
[
'
context
'
],))
codel
.
append
(
"PyGpuContextObject *c =
%
s;"
%
(
sub
[
'
params
'
],))
codel
.
append
(
"int types[13] = {GA_BUFFER, GA_BUFFER, GA_BUFFER, "
codel
.
append
(
"int types[13] = {GA_BUFFER, GA_BUFFER, GA_BUFFER, "
"GA_BUFFER, GA_INT, GA_INT, GA_INT, GA_INT, GA_INT, "
"GA_BUFFER, GA_INT, GA_INT, GA_INT, GA_INT, GA_INT, "
"GA_INT, GA_FLOAT, GA_FLOAT, GA_INT};"
)
"GA_INT, GA_FLOAT, GA_FLOAT, GA_INT};"
)
...
...
theano/sandbox/gpuarray/nnet.py
浏览文件 @
a6e0ff7a
...
@@ -41,7 +41,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op):
...
@@ -41,7 +41,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op):
am
=
y_idx
.
type
()
am
=
y_idx
.
type
()
return
Apply
(
self
,
[
x
,
b
,
y_idx
],
[
nll
,
sm
,
am
])
return
Apply
(
self
,
[
x
,
b
,
y_idx
],
[
nll
,
sm
,
am
])
def
get_
context
(
self
,
node
):
def
get_
params
(
self
,
node
):
return
node
.
inputs
[
0
]
.
type
.
context
return
node
.
inputs
[
0
]
.
type
.
context
def
c_headers
(
self
):
def
c_headers
(
self
):
...
@@ -169,7 +169,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op):
...
@@ -169,7 +169,7 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuKernelBase, Op):
dtype_am
=
node
.
outputs
[
2
]
.
dtype
dtype_am
=
node
.
outputs
[
2
]
.
dtype
classname
=
self
.
__class__
.
__name__
classname
=
self
.
__class__
.
__name__
fail
=
sub
[
'fail'
]
fail
=
sub
[
'fail'
]
ctx
=
sub
[
'
context
'
]
ctx
=
sub
[
'
params
'
]
k_var
=
"k_xent_sm_1hot_bias_
%(nodename)
s"
%
locals
()
k_var
=
"k_xent_sm_1hot_bias_
%(nodename)
s"
%
locals
()
err_check
=
"""
err_check
=
"""
if (err != GA_NO_ERROR) {
if (err != GA_NO_ERROR) {
...
@@ -322,7 +322,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op):
...
@@ -322,7 +322,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op):
y_idx
=
as_gpuarray_variable
(
y_idx
,
ctx_name
)
y_idx
=
as_gpuarray_variable
(
y_idx
,
ctx_name
)
return
Apply
(
self
,
[
dnll
,
sm
,
y_idx
],
[
sm
.
type
()])
return
Apply
(
self
,
[
dnll
,
sm
,
y_idx
],
[
sm
.
type
()])
def
get_
context
(
self
,
node
):
def
get_
params
(
self
,
node
):
return
node
.
inputs
[
0
]
.
type
.
context
return
node
.
inputs
[
0
]
.
type
.
context
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
...
@@ -347,7 +347,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op):
...
@@ -347,7 +347,7 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuKernelBase, Op):
dnll
,
sm
,
y_idx
=
inp
dnll
,
sm
,
y_idx
=
inp
dx
,
=
out
dx
,
=
out
fail
=
sub
[
'fail'
]
fail
=
sub
[
'fail'
]
ctx
=
sub
[
'
context
'
]
ctx
=
sub
[
'
params
'
]
k_var
=
"kCrossEntropySoftmax1HotWithBiasDx_"
+
nodename
k_var
=
"kCrossEntropySoftmax1HotWithBiasDx_"
+
nodename
err_check
=
"""
err_check
=
"""
if (err != GA_NO_ERROR) {
if (err != GA_NO_ERROR) {
...
@@ -528,7 +528,7 @@ class GpuSoftmax(GpuKernelBase, Op):
...
@@ -528,7 +528,7 @@ class GpuSoftmax(GpuKernelBase, Op):
x
=
as_gpuarray_variable
(
x
,
infer_context_name
(
x
))
x
=
as_gpuarray_variable
(
x
,
infer_context_name
(
x
))
return
Apply
(
self
,
[
x
],
[
x
.
type
()])
return
Apply
(
self
,
[
x
],
[
x
.
type
()])
def
get_
context
(
self
,
node
):
def
get_
params
(
self
,
node
):
return
node
.
inputs
[
0
]
.
type
.
context
return
node
.
inputs
[
0
]
.
type
.
context
def
infer_shape
(
self
,
node
,
shape
):
def
infer_shape
(
self
,
node
,
shape
):
...
@@ -552,7 +552,7 @@ class GpuSoftmax(GpuKernelBase, Op):
...
@@ -552,7 +552,7 @@ class GpuSoftmax(GpuKernelBase, Op):
x
,
=
inp
x
,
=
inp
z
,
=
out
z
,
=
out
fail
=
sub
[
'fail'
]
fail
=
sub
[
'fail'
]
ctx
=
sub
[
'
context
'
]
ctx
=
sub
[
'
params
'
]
err_check
=
"""
err_check
=
"""
if (err != GA_NO_ERROR) {
if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, fmt_str, msg);
PyErr_Format(PyExc_RuntimeError, fmt_str, msg);
...
@@ -727,7 +727,7 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op):
...
@@ -727,7 +727,7 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op):
b
=
as_gpuarray_variable
(
b
,
ctx_name
)
b
=
as_gpuarray_variable
(
b
,
ctx_name
)
return
Apply
(
self
,
[
x
,
b
],
[
x
.
type
()])
return
Apply
(
self
,
[
x
,
b
],
[
x
.
type
()])
def
get_
context
(
self
,
node
):
def
get_
params
(
self
,
node
):
return
node
.
inputs
[
0
]
.
type
.
context
return
node
.
inputs
[
0
]
.
type
.
context
def
infer_shape
(
self
,
node
,
shape
):
def
infer_shape
(
self
,
node
,
shape
):
...
@@ -753,7 +753,7 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op):
...
@@ -753,7 +753,7 @@ class GpuSoftmaxWithBias(GpuKernelBase, Op):
x
,
b
=
inp
x
,
b
=
inp
z
,
=
out
z
,
=
out
fail
=
sub
[
'fail'
]
fail
=
sub
[
'fail'
]
ctx
=
sub
[
'
context
'
]
ctx
=
sub
[
'
params
'
]
err_check
=
"""
err_check
=
"""
if (err != GA_NO_ERROR) {
if (err != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, fmt_str, msg);
PyErr_Format(PyExc_RuntimeError, fmt_str, msg);
...
...
theano/sandbox/gpuarray/subtensor.py
浏览文件 @
a6e0ff7a
...
@@ -202,7 +202,7 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
...
@@ -202,7 +202,7 @@ class GpuIncSubtensor(GpuKernelBase, IncSubtensor):
op
.
create_iadd_node
(
ret
)
op
.
create_iadd_node
(
ret
)
return
ret
return
ret
def
get_
context
(
self
,
node
):
def
get_
params
(
self
,
node
):
return
node
.
outputs
[
0
]
.
type
.
context
return
node
.
outputs
[
0
]
.
type
.
context
def
create_iadd_node
(
self
,
node
):
def
create_iadd_node
(
self
,
node
):
...
@@ -609,7 +609,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, GpuAdvancedIncSubtensor1):
...
@@ -609,7 +609,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, GpuAdvancedIncSubtensor1):
return
gof
.
Apply
(
self
,
[
x_
,
y_
,
ilist_
],
[
x_
.
type
()])
return
gof
.
Apply
(
self
,
[
x_
,
y_
,
ilist_
],
[
x_
.
type
()])
def
get_
context
(
self
,
node
):
def
get_
params
(
self
,
node
):
return
node
.
outputs
[
0
]
.
type
.
context
return
node
.
outputs
[
0
]
.
type
.
context
def
perform
(
self
,
node
,
inp
,
out
,
ctx
):
def
perform
(
self
,
node
,
inp
,
out
,
ctx
):
...
@@ -626,7 +626,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, GpuAdvancedIncSubtensor1):
...
@@ -626,7 +626,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuKernelBase, GpuAdvancedIncSubtensor1):
return
[
os
.
path
.
dirname
(
__file__
)]
return
[
os
.
path
.
dirname
(
__file__
)]
def
c_code
(
self
,
node
,
name
,
inputs
,
outputs
,
sub
):
def
c_code
(
self
,
node
,
name
,
inputs
,
outputs
,
sub
):
ctx
=
self
.
get_
context
(
node
)
ctx
=
self
.
get_
params
(
node
)
if
ctx
.
kind
!=
'cuda'
:
if
ctx
.
kind
!=
'cuda'
:
raise
NotImplementedError
(
"cuda only"
)
raise
NotImplementedError
(
"cuda only"
)
if
(
self
.
set_instead_of_inc
or
if
(
self
.
set_instead_of_inc
or
...
...
theano/sandbox/rng_mrg.py
浏览文件 @
a6e0ff7a
...
@@ -771,7 +771,7 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
...
@@ -771,7 +771,7 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
# GpuArray version
# GpuArray version
_f16_ok
=
True
_f16_ok
=
True
def
get_
context
(
self
,
node
):
def
get_
params
(
self
,
node
):
return
node
.
inputs
[
0
]
.
type
.
context
return
node
.
inputs
[
0
]
.
type
.
context
@classmethod
@classmethod
...
@@ -1014,7 +1014,7 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
...
@@ -1014,7 +1014,7 @@ class GPUA_mrg_uniform(GpuKernelBase, mrg_uniform_base):
"""
%
locals
()
"""
%
locals
()
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
7
,
self
.
GpuKernelBase_version
)
return
(
7
,)
def
guess_n_streams
(
size
,
warn
=
False
):
def
guess_n_streams
(
size
,
warn
=
False
):
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论