Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
4d8e60e7
提交
4d8e60e7
authored
6月 25, 2015
作者:
Arnaud Bergeron
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Move over to the pygpu API.
上级
0f7d5930
隐藏空白字符变更
内嵌
并排
正在显示
6 个修改的文件
包含
246 行增加
和
174 行删除
+246
-174
basic_ops.py
theano/sandbox/gpuarray/basic_ops.py
+4
-0
dnn.py
theano/sandbox/gpuarray/dnn.py
+122
-131
dnn_fwd.c
theano/sandbox/gpuarray/dnn_fwd.c
+53
-19
dnn_gi.c
theano/sandbox/gpuarray/dnn_gi.c
+31
-12
dnn_gw.c
theano/sandbox/gpuarray/dnn_gw.c
+31
-12
gpuarray_helper.h
theano/sandbox/gpuarray/gpuarray_helper.h
+5
-0
没有找到文件。
theano/sandbox/gpuarray/basic_ops.py
浏览文件 @
4d8e60e7
...
@@ -783,6 +783,10 @@ if (theano_prep_output(&%(zz)s, %(ndim)s, shape, %(type)s, GA_C_ORDER,
...
@@ -783,6 +783,10 @@ if (theano_prep_output(&%(zz)s, %(ndim)s, shape, %(type)s, GA_C_ORDER,
raise
NotImplementedError
(
"grad disabled"
)
raise
NotImplementedError
(
"grad disabled"
)
def
empty_like
(
var
):
return
GpuAllocEmpty
(
var
.
type
.
dtype
)(
*
var
.
shape
)
class
GpuContiguous
(
Op
):
class
GpuContiguous
(
Op
):
"""
"""
Always return a c contiguous output. Copy the input only if it is
Always return a c contiguous output. Copy the input only if it is
...
...
theano/sandbox/gpuarray/dnn.py
浏览文件 @
4d8e60e7
...
@@ -18,8 +18,7 @@ from . import pygpu, init_dev
...
@@ -18,8 +18,7 @@ from . import pygpu, init_dev
from
.basic_ops
import
(
as_gpuarray_variable
,
from
.basic_ops
import
(
as_gpuarray_variable
,
host_from_gpu
,
host_from_gpu
,
gpu_contiguous
,
HostFromGpu
,
gpu_contiguous
,
HostFromGpu
,
# No GpuAllocEmpty (yet)
GpuAllocEmpty
,
empty_like
)
gpu_alloc
,
GpuAlloc
)
from
.conv
import
GpuConv
from
.conv
import
GpuConv
# These don't exist in gpuarray
# These don't exist in gpuarray
...
@@ -103,32 +102,32 @@ dnn_available.msg = None
...
@@ -103,32 +102,32 @@ dnn_available.msg = None
def
c_set_tensor4d
(
var
,
desc
,
err
,
fail
):
def
c_set_tensor4d
(
var
,
desc
,
err
,
fail
):
return
"""
return
"""
{
{
int str0, str1, str2, str3;
%(err)
s = cudnnSetTensor4dDescriptorEx(
str3 = CudaNdarray_HOST_STRIDES(
%(var)
s)[3]?CudaNdarray_HOST_STRIDES(
%(var)
s)[3]:1;
str2 = CudaNdarray_HOST_STRIDES(
%(var)
s)[2]?CudaNdarray_HOST_STRIDES(
%(var)
s)[2]:CudaNdarray_HOST_DIMS(
%(var)
s)[3];
str1 = CudaNdarray_HOST_STRIDES(
%(var)
s)[1]?CudaNdarray_HOST_STRIDES(
%(var)
s)[1]:CudaNdarray_HOST_DIMS(
%(var)
s)[2]*CudaNdarray_HOST_DIMS(
%(var)
s)[3];
str0 = CudaNdarray_HOST_STRIDES(
%(var)
s)[0]?CudaNdarray_HOST_STRIDES(
%(var)
s)[0]:CudaNdarray_HOST_DIMS(
%(var)
s)[2]*CudaNdarray_HOST_DIMS(
%(var)
s)[3]*CudaNdarray_HOST_DIMS(
%(var)
s)[1];
%(err)
s = cudnnSetTensor4dDescriptorEx(
%(desc)
s, CUDNN_DATA_FLOAT,
%(desc)
s, CUDNN_DATA_FLOAT,
CudaNdarray_HOST_DIMS(
%(var)
s)[0],
PyGpuArray_DIMS(
%(var)
s)[0],
CudaNdarray_HOST_DIMS(
%(var)
s)[1],
PyGpuArray_DIMS(
%(var)
s)[1],
CudaNdarray_HOST_DIMS(
%(var)
s)[2],
PyGpuArray_DIMS(
%(var)
s)[2],
CudaNdarray_HOST_DIMS(
%(var)
s)[3],
PyGpuArray_DIMS(
%(var)
s)[3],
str0, str1, str2, str3
PyGpuArray_STRIDES(
%(var)
s)[0],
);
PyGpuArray_STRIDES(
%(var)
s)[1],
if (
%(err)
s != CUDNN_STATUS_SUCCESS) {
PyGpuArray_STRIDES(
%(var)
s)[2],
PyGpuArray_STRIDES(
%(var)
s)[3]);
if (
%(err)
s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
PyErr_Format(PyExc_RuntimeError,
"could not set tensor4d descriptor:
%%
s"
"could not set tensor4d descriptor:
%%
s"
"shapes=
%%
d
%%
d
%%
d
%%
d strides=
%%
d
%%
d
%%
d
%%
d",
"shapes=
%%
d
%%
d
%%
d
%%
d strides=
%%
d
%%
d
%%
d
%%
d",
cudnnGetErrorString(
%(err)
s),
cudnnGetErrorString(
%(err)
s),
CudaNdarray_HOST_DIMS(
%(var)
s)[0],
PyGpuArray_DIMS(
%(var)
s)[0],
CudaNdarray_HOST_DIMS(
%(var)
s)[1],
PyGpuArray_DIMS(
%(var)
s)[1],
CudaNdarray_HOST_DIMS(
%(var)
s)[2],
PyGpuArray_DIMS(
%(var)
s)[2],
CudaNdarray_HOST_DIMS(
%(var)
s)[3],
PyGpuArray_DIMS(
%(var)
s)[3],
str0, str1, str2, str3
PyGpuArray_STRIDES(
%(var)
s)[0],
);
PyGpuArray_STRIDES(
%(var)
s)[1],
PyGpuArray_STRIDES(
%(var)
s)[2],
PyGpuArray_STRIDES(
%(var)
s)[3]);
%(fail)
s
%(fail)
s
}
}
}
}
"""
%
dict
(
var
=
var
,
err
=
err
,
desc
=
desc
,
fail
=
fail
)
"""
%
dict
(
var
=
var
,
err
=
err
,
desc
=
desc
,
fail
=
fail
)
...
@@ -348,7 +347,7 @@ class GpuDnnConvDesc(Op):
...
@@ -348,7 +347,7 @@ class GpuDnnConvDesc(Op):
pad_h_spec
=
pad_h_spec
,
pad_w_spec
=
pad_w_spec
)
pad_h_spec
=
pad_h_spec
,
pad_w_spec
=
pad_w_spec
)
def
c_code_cache_version
(
self
):
def
c_code_cache_version
(
self
):
return
(
2
,
version
())
return
(
1
,
version
())
# This is to avoid conflict with the one in cuda/dnn.py
# This is to avoid conflict with the one in cuda/dnn.py
if
not
hasattr
(
config
,
'dnn'
):
if
not
hasattr
(
config
,
'dnn'
):
...
@@ -359,21 +358,21 @@ if not hasattr(config, 'dnn'):
...
@@ -359,21 +358,21 @@ if not hasattr(config, 'dnn'):
in_c_key
=
False
)
in_c_key
=
False
)
# scalar constants
# scalar constants
_zero
=
constant
(
numpy
.
asarray
(
0.0
,
dtype
=
'float
32
'
))
_zero
=
constant
(
numpy
.
asarray
(
0.0
,
dtype
=
'float
64
'
))
_one
=
constant
(
numpy
.
asarray
(
1.0
,
dtype
=
'float
32
'
))
_one
=
constant
(
numpy
.
asarray
(
1.0
,
dtype
=
'float
64
'
))
def
ensure_
float
(
val
,
default
,
name
):
def
ensure_
double
(
val
,
default
,
name
):
if
val
is
None
:
if
val
is
None
:
return
default
.
clone
()
return
default
.
clone
()
if
not
isinstance
(
val
,
Variable
):
if
not
isinstance
(
val
,
Variable
):
val
=
constant
(
val
)
val
=
constant
(
val
,
dtype
=
'float64'
)
if
hasattr
(
val
,
'ndim'
)
and
val
.
ndim
==
0
:
if
hasattr
(
val
,
'ndim'
)
and
val
.
ndim
==
0
:
val
=
as_scalar
(
val
)
val
=
as_scalar
(
val
)
if
not
isinstance
(
val
.
type
,
theano
.
scalar
.
Scalar
):
if
not
isinstance
(
val
.
type
,
theano
.
scalar
.
Scalar
):
raise
TypeError
(
"
%
s: expected a scalar value"
%
(
name
,))
raise
TypeError
(
"
%
s: expected a scalar value"
%
(
name
,))
if
not
val
.
type
.
dtype
==
'float
32
'
:
if
not
val
.
type
.
dtype
==
'float
64
'
:
raise
TypeError
(
"
%
s: type is not float
32
"
%
(
name
,))
raise
TypeError
(
"
%
s: type is not float
64
"
%
(
name
,))
return
val
return
val
...
@@ -386,8 +385,6 @@ class GpuDnnConv(DnnBase, COp):
...
@@ -386,8 +385,6 @@ class GpuDnnConv(DnnBase, COp):
:param descr: the convolution descriptor
:param descr: the convolution descriptor
"""
"""
__props__
=
(
'workmem'
,
'inplace'
)
__props__
=
(
'workmem'
,
'inplace'
)
__input_name__
=
(
'image'
,
'kernel'
,
'output'
,
'descriptor'
,
'alpha'
,
'beta'
)
def
__init__
(
self
,
workmem
=
None
,
inplace
=
False
):
def
__init__
(
self
,
workmem
=
None
,
inplace
=
False
):
"""
"""
...
@@ -404,13 +401,6 @@ class GpuDnnConv(DnnBase, COp):
...
@@ -404,13 +401,6 @@ class GpuDnnConv(DnnBase, COp):
self
.
destroy_map
=
{
0
:
[
2
]}
self
.
destroy_map
=
{
0
:
[
2
]}
assert
self
.
workmem
in
[
'none'
,
'small'
,
'large'
]
assert
self
.
workmem
in
[
'none'
,
'small'
,
'large'
]
def
__setstate__
(
self
,
d
):
self
.
__dict__
.
update
(
d
)
if
not
hasattr
(
self
,
'workmem'
):
self
.
workmem
=
'none'
if
not
hasattr
(
self
,
'inplace'
):
self
.
inplace
=
False
def
get_op_params
(
self
):
def
get_op_params
(
self
):
if
self
.
inplace
:
if
self
.
inplace
:
inpl_def
=
[(
'CONV_INPLACE'
,
'1'
)]
inpl_def
=
[(
'CONV_INPLACE'
,
'1'
)]
...
@@ -429,9 +419,9 @@ class GpuDnnConv(DnnBase, COp):
...
@@ -429,9 +419,9 @@ class GpuDnnConv(DnnBase, COp):
return
[
alg_def
]
+
inpl_def
return
[
alg_def
]
+
inpl_def
def
make_node
(
self
,
img
,
kern
,
output
,
desc
,
alpha
=
None
,
beta
=
None
):
def
make_node
(
self
,
img
,
kern
,
output
,
desc
,
alpha
=
None
,
beta
=
None
):
img
=
as_
cuda_nd
array_variable
(
img
)
img
=
as_
gpu
array_variable
(
img
)
kern
=
as_
cuda_nd
array_variable
(
kern
)
kern
=
as_
gpu
array_variable
(
kern
)
output
=
as_
cuda_nd
array_variable
(
output
)
output
=
as_
gpu
array_variable
(
output
)
if
img
.
type
.
ndim
!=
4
:
if
img
.
type
.
ndim
!=
4
:
raise
TypeError
(
'img must be 4D tensor'
)
raise
TypeError
(
'img must be 4D tensor'
)
if
kern
.
type
.
ndim
!=
4
:
if
kern
.
type
.
ndim
!=
4
:
...
@@ -443,8 +433,8 @@ class GpuDnnConv(DnnBase, COp):
...
@@ -443,8 +433,8 @@ class GpuDnnConv(DnnBase, COp):
or
desc
.
type
.
ctype
!=
'cudnnConvolutionDescriptor_t'
:
or
desc
.
type
.
ctype
!=
'cudnnConvolutionDescriptor_t'
:
raise
TypeError
(
'desc must be cudnnConvolutionDescriptor_t'
)
raise
TypeError
(
'desc must be cudnnConvolutionDescriptor_t'
)
alpha
=
ensure_
float
(
alpha
,
_one
,
'alpha'
)
alpha
=
ensure_
double
(
alpha
,
_one
,
'alpha'
)
beta
=
ensure_
float
(
beta
,
_zero
,
'beta'
)
beta
=
ensure_
double
(
beta
,
_zero
,
'beta'
)
return
Apply
(
self
,
[
img
,
kern
,
output
,
desc
,
alpha
,
beta
],
return
Apply
(
self
,
[
img
,
kern
,
output
,
desc
,
alpha
,
beta
],
[
output
.
type
()])
[
output
.
type
()])
...
@@ -455,8 +445,8 @@ class GpuDnnConv(DnnBase, COp):
...
@@ -455,8 +445,8 @@ class GpuDnnConv(DnnBase, COp):
top
=
gpu_contiguous
(
top
)
top
=
gpu_contiguous
(
top
)
d_img
=
GpuDnnConvGradI
()(
kerns
,
top
,
gpu_alloc_empty
(
*
img
.
shape
),
desc
)
d_img
=
GpuDnnConvGradI
()(
kerns
,
top
,
empty_like
(
img
),
desc
)
d_kerns
=
GpuDnnConvGradW
()(
img
,
top
,
gpu_alloc_empty
(
*
kerns
.
shape
),
desc
)
d_kerns
=
GpuDnnConvGradW
()(
img
,
top
,
empty_like
(
kerns
),
desc
)
d_alpha
=
grad_not_implemented
(
self
,
4
,
alpha
)
d_alpha
=
grad_not_implemented
(
self
,
4
,
alpha
)
d_beta
=
grad_not_implemented
(
self
,
5
,
beta
)
d_beta
=
grad_not_implemented
(
self
,
5
,
beta
)
...
@@ -512,7 +502,6 @@ class GpuDnnConvGradW(DnnBase, COp):
...
@@ -512,7 +502,6 @@ class GpuDnnConvGradW(DnnBase, COp):
"""
"""
__props__
=
(
'inplace'
,)
__props__
=
(
'inplace'
,)
__input_name__
=
(
'image'
,
'grad'
,
'output'
,
'descriptor'
,
'alpha'
,
'beta'
)
def
__init__
(
self
,
inplace
=
False
):
def
__init__
(
self
,
inplace
=
False
):
COp
.
__init__
(
self
,
[
"dnn_base.c"
,
"dnn_conv_base.c"
,
"dnn_gw.c"
],
COp
.
__init__
(
self
,
[
"dnn_base.c"
,
"dnn_conv_base.c"
,
"dnn_gw.c"
],
...
@@ -532,8 +521,8 @@ class GpuDnnConvGradW(DnnBase, COp):
...
@@ -532,8 +521,8 @@ class GpuDnnConvGradW(DnnBase, COp):
kerns
=
gpu_contiguous
(
kerns
)
kerns
=
gpu_contiguous
(
kerns
)
d_img
=
GpuDnnConvGradI
()(
kerns
,
top
,
gpu_alloc_empty
(
*
img
.
shape
),
desc
)
d_img
=
GpuDnnConvGradI
()(
kerns
,
top
,
empty_like
(
img
),
desc
)
d_top
=
GpuDnnConv
()(
img
,
kerns
,
gpu_alloc_empty
(
*
top
.
shape
),
desc
)
d_top
=
GpuDnnConv
()(
img
,
kerns
,
empty_like
(
top
),
desc
)
d_alpha
=
grad_not_implemented
(
self
,
4
,
alpha
)
d_alpha
=
grad_not_implemented
(
self
,
4
,
alpha
)
d_beta
=
grad_not_implemented
(
self
,
5
,
beta
)
d_beta
=
grad_not_implemented
(
self
,
5
,
beta
)
...
@@ -551,9 +540,9 @@ class GpuDnnConvGradW(DnnBase, COp):
...
@@ -551,9 +540,9 @@ class GpuDnnConvGradW(DnnBase, COp):
return
[]
return
[]
def
make_node
(
self
,
img
,
topgrad
,
output
,
desc
,
alpha
=
None
,
beta
=
None
):
def
make_node
(
self
,
img
,
topgrad
,
output
,
desc
,
alpha
=
None
,
beta
=
None
):
img
=
as_
cuda_nd
array_variable
(
img
)
img
=
as_
gpu
array_variable
(
img
)
topgrad
=
as_
cuda_nd
array_variable
(
topgrad
)
topgrad
=
as_
gpu
array_variable
(
topgrad
)
output
=
as_
cuda_nd
array_variable
(
output
)
output
=
as_
gpu
array_variable
(
output
)
if
img
.
type
.
ndim
!=
4
:
if
img
.
type
.
ndim
!=
4
:
raise
TypeError
(
'img must be 4D tensor'
)
raise
TypeError
(
'img must be 4D tensor'
)
if
topgrad
.
type
.
ndim
!=
4
:
if
topgrad
.
type
.
ndim
!=
4
:
...
@@ -565,8 +554,8 @@ class GpuDnnConvGradW(DnnBase, COp):
...
@@ -565,8 +554,8 @@ class GpuDnnConvGradW(DnnBase, COp):
or
desc
.
type
.
ctype
!=
'cudnnConvolutionDescriptor_t'
:
or
desc
.
type
.
ctype
!=
'cudnnConvolutionDescriptor_t'
:
raise
TypeError
(
'desc must be cudnnConvolutionDescriptor_t'
)
raise
TypeError
(
'desc must be cudnnConvolutionDescriptor_t'
)
alpha
=
ensure_
float
(
alpha
,
_one
,
'alpha'
)
alpha
=
ensure_
double
(
alpha
,
_one
,
'alpha'
)
beta
=
ensure_
float
(
beta
,
_zero
,
'beta'
)
beta
=
ensure_
double
(
beta
,
_zero
,
'beta'
)
return
Apply
(
self
,
[
img
,
topgrad
,
output
,
desc
,
alpha
,
beta
],
return
Apply
(
self
,
[
img
,
topgrad
,
output
,
desc
,
alpha
,
beta
],
[
output
.
type
()])
[
output
.
type
()])
...
@@ -585,8 +574,6 @@ class GpuDnnConvGradI(DnnBase):
...
@@ -585,8 +574,6 @@ class GpuDnnConvGradI(DnnBase):
"""
"""
__props__
=
(
'inplace'
,)
__props__
=
(
'inplace'
,)
__input_name__
=
(
'kernel'
,
'grad'
,
'output'
,
'descriptor'
,
'alpha'
,
'beta'
)
def
__init__
(
self
,
inplace
=
False
):
def
__init__
(
self
,
inplace
=
False
):
COp
.
__init__
(
self
,
[
"dnn_base.c"
,
"dnn_conv_base.c"
,
"dnn_gi.c"
],
COp
.
__init__
(
self
,
[
"dnn_base.c"
,
"dnn_conv_base.c"
,
"dnn_gi.c"
],
...
@@ -601,8 +588,8 @@ class GpuDnnConvGradI(DnnBase):
...
@@ -601,8 +588,8 @@ class GpuDnnConvGradI(DnnBase):
img
=
gpu_contiguous
(
img
)
img
=
gpu_contiguous
(
img
)
d_kerns
=
GpuDnnConvGradW
()(
img
,
top
,
gpu_alloc_empty
(
*
kerns
.
shape
),
desc
)
d_kerns
=
GpuDnnConvGradW
()(
img
,
top
,
empty_like
(
kerns
),
desc
)
d_top
=
GpuDnnConv
()(
img
,
kerns
,
gpu_alloc_empty
(
*
top
.
shape
),
desc
)
d_top
=
GpuDnnConv
()(
img
,
kerns
,
empty_like
(
top
),
desc
)
d_alpha
=
grad_not_implemented
(
self
,
4
,
alpha
)
d_alpha
=
grad_not_implemented
(
self
,
4
,
alpha
)
d_beta
=
grad_not_implemented
(
self
,
5
,
beta
)
d_beta
=
grad_not_implemented
(
self
,
5
,
beta
)
...
@@ -620,9 +607,9 @@ class GpuDnnConvGradI(DnnBase):
...
@@ -620,9 +607,9 @@ class GpuDnnConvGradI(DnnBase):
return
[]
return
[]
def
make_node
(
self
,
kern
,
topgrad
,
output
,
desc
,
alpha
=
None
,
beta
=
None
):
def
make_node
(
self
,
kern
,
topgrad
,
output
,
desc
,
alpha
=
None
,
beta
=
None
):
kern
=
as_
cuda_nd
array_variable
(
kern
)
kern
=
as_
gpu
array_variable
(
kern
)
topgrad
=
as_
cuda_nd
array_variable
(
topgrad
)
topgrad
=
as_
gpu
array_variable
(
topgrad
)
output
=
as_
cuda_nd
array_variable
(
output
)
output
=
as_
gpu
array_variable
(
output
)
if
kern
.
type
.
ndim
!=
4
:
if
kern
.
type
.
ndim
!=
4
:
raise
TypeError
(
'kern must be 4D tensor'
)
raise
TypeError
(
'kern must be 4D tensor'
)
if
topgrad
.
type
.
ndim
!=
4
:
if
topgrad
.
type
.
ndim
!=
4
:
...
@@ -634,8 +621,8 @@ class GpuDnnConvGradI(DnnBase):
...
@@ -634,8 +621,8 @@ class GpuDnnConvGradI(DnnBase):
or
desc
.
type
.
ctype
!=
'cudnnConvolutionDescriptor_t'
:
or
desc
.
type
.
ctype
!=
'cudnnConvolutionDescriptor_t'
:
raise
TypeError
(
'desc must be cudnnConvolutionDescriptor_t'
)
raise
TypeError
(
'desc must be cudnnConvolutionDescriptor_t'
)
alpha
=
ensure_
float
(
alpha
,
_one
,
'alpha'
)
alpha
=
ensure_
double
(
alpha
,
_one
,
'alpha'
)
beta
=
ensure_
float
(
beta
,
_zero
,
'beta'
)
beta
=
ensure_
double
(
beta
,
_zero
,
'beta'
)
return
Apply
(
self
,
[
kern
,
topgrad
,
output
,
desc
,
alpha
,
beta
],
return
Apply
(
self
,
[
kern
,
topgrad
,
output
,
desc
,
alpha
,
beta
],
[
output
.
type
()])
[
output
.
type
()])
...
@@ -690,12 +677,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
...
@@ -690,12 +677,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
kerns
=
gpu_contiguous
(
kerns
.
dimshuffle
(
1
,
0
,
2
,
3
))
kerns
=
gpu_contiguous
(
kerns
.
dimshuffle
(
1
,
0
,
2
,
3
))
shape2
=
shape_i
(
img
,
2
,
fgraph
)
-
shape_i
(
kerns
,
2
,
fgraph
)
+
1
shape2
=
shape_i
(
img
,
2
,
fgraph
)
-
shape_i
(
kerns
,
2
,
fgraph
)
+
1
shape3
=
shape_i
(
img
,
3
,
fgraph
)
-
shape_i
(
kerns
,
3
,
fgraph
)
+
1
shape3
=
shape_i
(
img
,
3
,
fgraph
)
-
shape_i
(
kerns
,
3
,
fgraph
)
+
1
out
=
gpu_alloc_empty
(
shape_i
(
kerns
,
1
,
fgraph
),
out
=
GpuAllocEmpty
(
img
.
dtype
)
(
shape_i
(
kerns
,
1
,
fgraph
),
shape_i
(
img
,
1
,
fgraph
),
shape2
,
shape3
)
shape_i
(
img
,
1
,
fgraph
),
shape2
,
shape3
)
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
conv_mode
=
'cross'
)(
img
.
shape
,
out
.
shape
)
conv_mode
=
'cross'
)(
img
.
shape
,
out
.
shape
)
conv
=
GpuDnnConvGradW
()(
img
,
kerns
,
out
,
desc
)
conv
=
GpuDnnConvGradW
()(
img
,
kerns
,
out
,
desc
)
return
as_
cuda_nd
array_variable
(
conv
.
dimshuffle
(
1
,
0
,
2
,
3
))
return
as_
gpu
array_variable
(
conv
.
dimshuffle
(
1
,
0
,
2
,
3
))
elif
(
border_mode
==
'full'
and
subsample
==
(
1
,
1
)
and
elif
(
border_mode
==
'full'
and
subsample
==
(
1
,
1
)
and
direction_hint
!=
'forward!'
):
direction_hint
!=
'forward!'
):
...
@@ -707,8 +694,9 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
...
@@ -707,8 +694,9 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
conv_mode
=
'cross'
if
conv_mode
==
'conv'
else
'conv'
conv_mode
=
'cross'
if
conv_mode
==
'conv'
else
'conv'
shape2
=
shape_i
(
img
,
2
,
fgraph
)
+
shape_i
(
kerns
,
2
,
fgraph
)
-
1
shape2
=
shape_i
(
img
,
2
,
fgraph
)
+
shape_i
(
kerns
,
2
,
fgraph
)
-
1
shape3
=
shape_i
(
img
,
3
,
fgraph
)
+
shape_i
(
kerns
,
3
,
fgraph
)
-
1
shape3
=
shape_i
(
img
,
3
,
fgraph
)
+
shape_i
(
kerns
,
3
,
fgraph
)
-
1
out
=
gpu_alloc_empty
(
shape_i
(
img
,
0
,
fgraph
),
out
=
GpuAllocEmpty
(
img
.
dtype
)(
shape_i
(
img
,
0
,
fgraph
),
shape_i
(
kerns
,
1
,
fgraph
),
shape2
,
shape3
)
shape_i
(
kerns
,
1
,
fgraph
),
shape2
,
shape3
)
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
desc
=
GpuDnnConvDesc
(
border_mode
=
'valid'
,
subsample
=
(
1
,
1
),
conv_mode
=
conv_mode
)(
out
.
shape
,
kerns
.
shape
)
conv_mode
=
conv_mode
)(
out
.
shape
,
kerns
.
shape
)
return
GpuDnnConvGradI
()(
kerns
,
img
,
out
,
desc
)
return
GpuDnnConvGradI
()(
kerns
,
img
,
out
,
desc
)
...
@@ -724,7 +712,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
...
@@ -724,7 +712,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
out_shp
=
GpuDnnConv
.
get_out_shape
(
img
.
shape
,
kerns
.
shape
,
out_shp
=
GpuDnnConv
.
get_out_shape
(
img
.
shape
,
kerns
.
shape
,
desc_op
.
border_mode
,
desc_op
.
border_mode
,
desc_op
.
subsample
)
desc_op
.
subsample
)
out
=
gpu_alloc_empty
(
*
out_shp
)
out
=
GpuAllocEmpty
(
img
.
dtype
)
(
*
out_shp
)
return
GpuDnnConv
(
workmem
=
workmem
)(
img
,
kerns
,
out
,
desc
)
return
GpuDnnConv
(
workmem
=
workmem
)(
img
,
kerns
,
out
,
desc
)
...
@@ -848,7 +836,7 @@ class GpuDnnPool(DnnBase):
...
@@ -848,7 +836,7 @@ class GpuDnnPool(DnnBase):
__props__
=
()
__props__
=
()
def
make_node
(
self
,
img
,
desc
):
def
make_node
(
self
,
img
,
desc
):
img
=
as_
cuda_nd
array_variable
(
img
)
img
=
as_
gpu
array_variable
(
img
)
if
img
.
type
.
ndim
!=
4
:
if
img
.
type
.
ndim
!=
4
:
raise
TypeError
(
'img must be 4D tensor'
)
raise
TypeError
(
'img must be 4D tensor'
)
...
@@ -913,9 +901,9 @@ if (output%(name)s != NULL) { cudnnDestroyTensorDescriptor(output%(name)s); }
...
@@ -913,9 +901,9 @@ if (output%(name)s != NULL) { cudnnDestroyTensorDescriptor(output%(name)s); }
return
"""
return
"""
cudnnStatus_t err
%(name)
s;
cudnnStatus_t err
%(name)
s;
in
t
%(out)
s_dims[4];
size_
t
%(out)
s_dims[4];
if (!
CudaNdarray_is_c_contiguous(
%(input)
s
)) {
if (!
GpuArray_IS_C_CONTIGUOUS(&
%(input)
s->ga
)) {
PyErr_SetString(PyExc_ValueError, "Only contiguous inputs are supported.");
PyErr_SetString(PyExc_ValueError, "Only contiguous inputs are supported.");
%(fail)
s
%(fail)
s
}
}
...
@@ -944,13 +932,13 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
...
@@ -944,13 +932,13 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
%(fail)
s
%(fail)
s
}
}
%(out)
s_dims[0] =
CudaNdarray_HOST
_DIMS(
%(input)
s)[0];
%(out)
s_dims[0] =
PyGpuArray
_DIMS(
%(input)
s)[0];
%(out)
s_dims[1] =
CudaNdarray_HOST
_DIMS(
%(input)
s)[1];
%(out)
s_dims[1] =
PyGpuArray
_DIMS(
%(input)
s)[1];
%(out)
s_dims[2] = (
CudaNdarray_HOST
_DIMS(
%(input)
s)[2] + (vpad*2) - wsX) / strideX + 1;
%(out)
s_dims[2] = (
PyGpuArray
_DIMS(
%(input)
s)[2] + (vpad*2) - wsX) / strideX + 1;
%(out)
s_dims[3] = (
CudaNdarray_HOST
_DIMS(
%(input)
s)[3] + (hpad*2) - wsY) / strideY + 1;
%(out)
s_dims[3] = (
PyGpuArray
_DIMS(
%(input)
s)[3] + (hpad*2) - wsY) / strideY + 1;
if (
CudaNdarray_prep_output(&
%(out)
s, 4,
%(out)
s_dims) != 0)
if (
theano_prep_output(&
%(out)
s, 4,
%(out)
s_dims,
%(input)
s->ga.typecode,
{
GA_C_ORDER) != 0)
{
%(fail)
s
%(fail)
s
}
}
...
@@ -959,8 +947,8 @@ if (CudaNdarray_prep_output(&%(out)s, 4, %(out)s_dims) != 0)
...
@@ -959,8 +947,8 @@ if (CudaNdarray_prep_output(&%(out)s, 4, %(out)s_dims) != 0)
err
%(name)
s = cudnnPoolingForward(
err
%(name)
s = cudnnPoolingForward(
_handle,
_handle,
%(desc)
s,
%(desc)
s,
%(input_desc)
s,
CudaNda
rray_DEV_DATA(
%(input)
s),
%(input_desc)
s,
PyGpuA
rray_DEV_DATA(
%(input)
s),
%(output_desc)
s,
CudaNda
rray_DEV_DATA(
%(out)
s)
%(output_desc)
s,
PyGpuA
rray_DEV_DATA(
%(out)
s)
);
);
#else
#else
{
{
...
@@ -970,9 +958,9 @@ err%(name)s = cudnnPoolingForward(
...
@@ -970,9 +958,9 @@ err%(name)s = cudnnPoolingForward(
_handle,
_handle,
%(desc)
s,
%(desc)
s,
&alpha,
&alpha,
%(input_desc)
s,
CudaNda
rray_DEV_DATA(
%(input)
s),
%(input_desc)
s,
PyGpuA
rray_DEV_DATA(
%(input)
s),
&beta,
&beta,
%(output_desc)
s,
CudaNda
rray_DEV_DATA(
%(out)
s)
%(output_desc)
s,
PyGpuA
rray_DEV_DATA(
%(out)
s)
);
);
}
}
#endif
#endif
...
@@ -1020,15 +1008,15 @@ class GpuDnnPoolGrad(DnnBase):
...
@@ -1020,15 +1008,15 @@ class GpuDnnPoolGrad(DnnBase):
__props__
=
()
__props__
=
()
def
make_node
(
self
,
inp
,
out
,
inp_grad
,
desc
):
def
make_node
(
self
,
inp
,
out
,
inp_grad
,
desc
):
inp
=
as_
cuda_nd
array_variable
(
inp
)
inp
=
as_
gpu
array_variable
(
inp
)
if
inp
.
type
.
ndim
!=
4
:
if
inp
.
type
.
ndim
!=
4
:
raise
TypeError
(
'inp must be 4D tensor'
)
raise
TypeError
(
'inp must be 4D tensor'
)
inp_grad
=
as_
cuda_nd
array_variable
(
inp_grad
)
inp_grad
=
as_
gpu
array_variable
(
inp_grad
)
if
inp_grad
.
type
.
ndim
!=
4
:
if
inp_grad
.
type
.
ndim
!=
4
:
raise
TypeError
(
'inp_grad must be 4D tensor'
)
raise
TypeError
(
'inp_grad must be 4D tensor'
)
out
=
as_
cuda_nd
array_variable
(
out
)
out
=
as_
gpu
array_variable
(
out
)
if
out
.
type
.
ndim
!=
4
:
if
out
.
type
.
ndim
!=
4
:
raise
TypeError
(
'out must be 4D tensor'
)
raise
TypeError
(
'out must be 4D tensor'
)
...
@@ -1110,19 +1098,19 @@ if (output_grad%(name)s != NULL) { cudnnDestroyTensorDescriptor(output_grad%(nam
...
@@ -1110,19 +1098,19 @@ if (output_grad%(name)s != NULL) { cudnnDestroyTensorDescriptor(output_grad%(nam
return
"""
return
"""
cudnnStatus_t err
%(name)
s;
cudnnStatus_t err
%(name)
s;
if (!
CudaNdarray_is_c_contiguous(
%(input)
s
)) {
if (!
GpuArray_IS_C_CONTIGUOUS(&
%(input)
s->ga
)) {
PyErr_SetString(PyExc_ValueError,
PyErr_SetString(PyExc_ValueError,
"GpuDnnPoolGrad: Only contiguous inputs are supported.");
"GpuDnnPoolGrad: Only contiguous inputs are supported.");
%(fail)
s
%(fail)
s
}
}
if (!
CudaNdarray_is_c_contiguous(
%(input_grad)
s
)) {
if (!
GpuArray_IS_C_CONTIGUOUS(&
%(input_grad)
s->ga
)) {
PyErr_SetString(PyExc_ValueError,
PyErr_SetString(PyExc_ValueError,
"GpuDnnPoolGrad: Only contiguous input gradients are supported.");
"GpuDnnPoolGrad: Only contiguous input gradients are supported.");
%(fail)
s
%(fail)
s
}
}
if (!
CudaNdarray_is_c_contiguous
(
%(output)
s)) {
if (!
GpuArray_IS_C_CONTIGUOUS
(
%(output)
s)) {
PyErr_SetString(PyExc_ValueError,
PyErr_SetString(PyExc_ValueError,
"GpuDnnPoolGrad: Only contiguous outputs are supported.");
"GpuDnnPoolGrad: Only contiguous outputs are supported.");
%(fail)
s
%(fail)
s
...
@@ -1130,8 +1118,9 @@ if (!CudaNdarray_is_c_contiguous(%(output)s)) {
...
@@ -1130,8 +1118,9 @@ if (!CudaNdarray_is_c_contiguous(%(output)s)) {
%(set_in)
s
%(set_in)
s
if (CudaNdarray_prep_output(&
%(output_grad)
s, 4,
if (theano_prep_output(&
%(output_grad)
s, PyGpuArray_NDIM(
%(output)
s),
CudaNdarray_HOST_DIMS(
%(output)
s)) != 0)
PyGpuArray_DIMS(
%(output)
s,
%(output)
s->ga.typecode,
GA_C_ORDER)) != 0)
{
{
%(fail)
s
%(fail)
s
}
}
...
@@ -1141,10 +1130,10 @@ if (CudaNdarray_prep_output(&%(output_grad)s, 4,
...
@@ -1141,10 +1130,10 @@ if (CudaNdarray_prep_output(&%(output_grad)s, 4,
err
%(name)
s = cudnnPoolingBackward(
err
%(name)
s = cudnnPoolingBackward(
_handle,
_handle,
%(desc)
s,
%(desc)
s,
%(input_desc)
s,
CudaNda
rray_DEV_DATA(
%(input)
s),
%(input_desc)
s,
PyGpuA
rray_DEV_DATA(
%(input)
s),
%(input_grad_desc)
s,
CudaNda
rray_DEV_DATA(
%(input_grad)
s),
%(input_grad_desc)
s,
PyGpuA
rray_DEV_DATA(
%(input_grad)
s),
%(output_desc)
s,
CudaNda
rray_DEV_DATA(
%(output)
s),
%(output_desc)
s,
PyGpuA
rray_DEV_DATA(
%(output)
s),
%(output_grad_desc)
s,
CudaNda
rray_DEV_DATA(
%(output_grad)
s)
%(output_grad_desc)
s,
PyGpuA
rray_DEV_DATA(
%(output_grad)
s)
);
);
#else
#else
{
{
...
@@ -1154,11 +1143,11 @@ err%(name)s = cudnnPoolingBackward(
...
@@ -1154,11 +1143,11 @@ err%(name)s = cudnnPoolingBackward(
_handle,
_handle,
%(desc)
s,
%(desc)
s,
&alpha,
&alpha,
%(input_desc)
s,
CudaNda
rray_DEV_DATA(
%(input)
s),
%(input_desc)
s,
PyGpuA
rray_DEV_DATA(
%(input)
s),
%(input_grad_desc)
s,
CudaNda
rray_DEV_DATA(
%(input_grad)
s),
%(input_grad_desc)
s,
PyGpuA
rray_DEV_DATA(
%(input_grad)
s),
%(output_desc)
s,
CudaNda
rray_DEV_DATA(
%(output)
s),
%(output_desc)
s,
PyGpuA
rray_DEV_DATA(
%(output)
s),
&beta,
&beta,
%(output_grad_desc)
s,
CudaNda
rray_DEV_DATA(
%(output_grad)
s)
%(output_grad_desc)
s,
PyGpuA
rray_DEV_DATA(
%(output_grad)
s)
);
);
}
}
#endif
#endif
...
@@ -1170,22 +1159,22 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
...
@@ -1170,22 +1159,22 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
"output.shape=(
%%
d,
%%
d,
%%
d,
%%
d) "
"output.shape=(
%%
d,
%%
d,
%%
d,
%%
d) "
"output_grad.shape=(
%%
d,
%%
d,
%%
d,
%%
d)",
"output_grad.shape=(
%%
d,
%%
d,
%%
d,
%%
d)",
cudnnGetErrorString(err
%(name)
s),
cudnnGetErrorString(err
%(name)
s),
CudaNdarray_HOST
_DIMS(
%(input)
s)[0],
PyGpuArray
_DIMS(
%(input)
s)[0],
CudaNdarray_HOST
_DIMS(
%(input)
s)[1],
PyGpuArray
_DIMS(
%(input)
s)[1],
CudaNdarray_HOST
_DIMS(
%(input)
s)[2],
PyGpuArray
_DIMS(
%(input)
s)[2],
CudaNdarray_HOST
_DIMS(
%(input)
s)[3],
PyGpuArray
_DIMS(
%(input)
s)[3],
CudaNdarray_HOST
_DIMS(
%(input_grad)
s)[0],
PyGpuArray
_DIMS(
%(input_grad)
s)[0],
CudaNdarray_HOST
_DIMS(
%(input_grad)
s)[1],
PyGpuArray
_DIMS(
%(input_grad)
s)[1],
CudaNdarray_HOST
_DIMS(
%(input_grad)
s)[2],
PyGpuArray
_DIMS(
%(input_grad)
s)[2],
CudaNdarray_HOST
_DIMS(
%(input_grad)
s)[3],
PyGpuArray
_DIMS(
%(input_grad)
s)[3],
CudaNdarray_HOST
_DIMS(
%(output)
s)[0],
PyGpuArray
_DIMS(
%(output)
s)[0],
CudaNdarray_HOST
_DIMS(
%(output)
s)[1],
PyGpuArray
_DIMS(
%(output)
s)[1],
CudaNdarray_HOST
_DIMS(
%(output)
s)[2],
PyGpuArray
_DIMS(
%(output)
s)[2],
CudaNdarray_HOST
_DIMS(
%(output)
s)[3],
PyGpuArray
_DIMS(
%(output)
s)[3],
CudaNdarray_HOST
_DIMS(
%(output_grad)
s)[0],
PyGpuArray
_DIMS(
%(output_grad)
s)[0],
CudaNdarray_HOST
_DIMS(
%(output_grad)
s)[1],
PyGpuArray
_DIMS(
%(output_grad)
s)[1],
CudaNdarray_HOST
_DIMS(
%(output_grad)
s)[2],
PyGpuArray
_DIMS(
%(output_grad)
s)[2],
CudaNdarray_HOST
_DIMS(
%(output_grad)
s)[3]
PyGpuArray
_DIMS(
%(output_grad)
s)[3]
);
);
%(fail)
s
%(fail)
s
}
}
...
@@ -1357,7 +1346,9 @@ if (%(mode)d == 1)
...
@@ -1357,7 +1346,9 @@ if (%(mode)d == 1)
# Build and prepare the output variable.
# Build and prepare the output variable.
result
+=
"""
result
+=
"""
if (CudaNdarray_prep_output(&
%(outs)
s, 4, CudaNdarray_HOST_DIMS(
%(ins)
s)) != 0)
if (theano_prep_output(&
%(outs)
s, PyGpuArray_NDIM(
%(ins)
s),
PyGpuArray_DIMS(
%(ins)
s),
%(ins)
s->ga.typecode,
GA_C_ORDER) != 0)
{
{
%(fail)
s
%(fail)
s
}
}
...
@@ -1393,7 +1384,7 @@ class GpuDnnSoftmax(GpuDnnSoftmaxBase):
...
@@ -1393,7 +1384,7 @@ class GpuDnnSoftmax(GpuDnnSoftmaxBase):
softmax_inputs
=
[
'softmax_input'
]
softmax_inputs
=
[
'softmax_input'
]
def
make_node
(
self
,
x
):
def
make_node
(
self
,
x
):
x
=
as_
cuda_nd
array_variable
(
x
)
x
=
as_
gpu
array_variable
(
x
)
assert
x
.
ndim
==
4
assert
x
.
ndim
==
4
return
Apply
(
self
,
[
x
],
[
x
.
type
()])
return
Apply
(
self
,
[
x
],
[
x
.
type
()])
...
@@ -1405,9 +1396,9 @@ err%(name)s = cudnnSoftmaxForward(
...
@@ -1405,9 +1396,9 @@ err%(name)s = cudnnSoftmaxForward(
algo
%(name)
s,
algo
%(name)
s,
mode
%(name)
s,
mode
%(name)
s,
softmax_input_
%(name)
s,
softmax_input_
%(name)
s,
CudaNda
rray_DEV_DATA(
%(ins)
s),
PyGpuA
rray_DEV_DATA(
%(ins)
s),
softmax_output_
%(name)
s,
softmax_output_
%(name)
s,
CudaNda
rray_DEV_DATA(
%(outs)
s)
PyGpuA
rray_DEV_DATA(
%(outs)
s)
);
);
#else
#else
{
{
...
@@ -1419,10 +1410,10 @@ err%(name)s = cudnnSoftmaxForward(
...
@@ -1419,10 +1410,10 @@ err%(name)s = cudnnSoftmaxForward(
mode
%(name)
s,
mode
%(name)
s,
(void*) &alpha,
(void*) &alpha,
softmax_input_
%(name)
s,
softmax_input_
%(name)
s,
CudaNda
rray_DEV_DATA(
%(ins)
s),
PyGpuA
rray_DEV_DATA(
%(ins)
s),
(void*) &beta,
(void*) &beta,
softmax_output_
%(name)
s,
softmax_output_
%(name)
s,
CudaNda
rray_DEV_DATA(
%(outs)
s)
PyGpuA
rray_DEV_DATA(
%(outs)
s)
);
);
}
}
#endif
#endif
...
@@ -1454,8 +1445,8 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
...
@@ -1454,8 +1445,8 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
softmax_inputs
=
[
'softmax_gout'
,
'softmax_input'
]
softmax_inputs
=
[
'softmax_gout'
,
'softmax_input'
]
def
make_node
(
self
,
dy
,
sm
):
def
make_node
(
self
,
dy
,
sm
):
dy
=
as_
cuda_nd
array_variable
(
dy
)
dy
=
as_
gpu
array_variable
(
dy
)
sm
=
as_
cuda_nd
array_variable
(
sm
)
sm
=
as_
gpu
array_variable
(
sm
)
assert
dy
.
ndim
==
4
assert
dy
.
ndim
==
4
assert
sm
.
ndim
==
4
assert
sm
.
ndim
==
4
return
Apply
(
self
,
[
dy
,
sm
],
[
sm
.
type
.
make_variable
()])
return
Apply
(
self
,
[
dy
,
sm
],
[
sm
.
type
.
make_variable
()])
...
@@ -1468,11 +1459,11 @@ err%(name)s = cudnnSoftmaxBackward(
...
@@ -1468,11 +1459,11 @@ err%(name)s = cudnnSoftmaxBackward(
algo
%(name)
s,
algo
%(name)
s,
mode
%(name)
s,
mode
%(name)
s,
%(name1)
s_
%(name)
s,
%(name1)
s_
%(name)
s,
CudaNda
rray_DEV_DATA(
%(ins1)
s),
PyGpuA
rray_DEV_DATA(
%(ins1)
s),
%(name0)
s_
%(name)
s,
%(name0)
s_
%(name)
s,
CudaNda
rray_DEV_DATA(
%(ins0)
s),
PyGpuA
rray_DEV_DATA(
%(ins0)
s),
softmax_output_
%(name)
s,
softmax_output_
%(name)
s,
CudaNda
rray_DEV_DATA(
%(outs)
s)
PyGpuA
rray_DEV_DATA(
%(outs)
s)
);
);
#else
#else
{
{
...
@@ -1484,12 +1475,12 @@ err%(name)s = cudnnSoftmaxBackward(
...
@@ -1484,12 +1475,12 @@ err%(name)s = cudnnSoftmaxBackward(
mode
%(name)
s,
mode
%(name)
s,
(void*) &alpha,
(void*) &alpha,
%(name1)
s_
%(name)
s,
%(name1)
s_
%(name)
s,
CudaNda
rray_DEV_DATA(
%(ins1)
s),
PyGpuA
rray_DEV_DATA(
%(ins1)
s),
%(name0)
s_
%(name)
s,
%(name0)
s_
%(name)
s,
CudaNda
rray_DEV_DATA(
%(ins0)
s),
PyGpuA
rray_DEV_DATA(
%(ins0)
s),
(void*) &beta,
(void*) &beta,
softmax_output_
%(name)
s,
softmax_output_
%(name)
s,
CudaNda
rray_DEV_DATA(
%(outs)
s)
PyGpuA
rray_DEV_DATA(
%(outs)
s)
);
);
}
}
#endif
#endif
...
@@ -1560,7 +1551,7 @@ if False:
...
@@ -1560,7 +1551,7 @@ if False:
if
(
dest
.
owner
and
if
(
dest
.
owner
and
isinstance
(
dest
.
owner
.
op
,
GpuAllocEmpty
)
and
isinstance
(
dest
.
owner
.
op
,
GpuAllocEmpty
)
and
len
(
dest
.
clients
)
>
1
):
len
(
dest
.
clients
)
>
1
):
inputs
[
2
]
=
gpu_alloc_empty
(
*
dest
.
owner
.
inputs
)
inputs
[
2
]
=
GpuAllocEmpty
(
dest
.
owner
.
op
.
dtype
)
(
*
dest
.
owner
.
inputs
)
return
[
GpuDnnConv
(
workmem
=
node
.
op
.
workmem
,
inplace
=
True
)(
*
inputs
)]
return
[
GpuDnnConv
(
workmem
=
node
.
op
.
workmem
,
inplace
=
True
)(
*
inputs
)]
@local_optimizer
([
GpuDnnConvGradW
],
inplace
=
True
)
@local_optimizer
([
GpuDnnConvGradW
],
inplace
=
True
)
...
@@ -1572,7 +1563,7 @@ if False:
...
@@ -1572,7 +1563,7 @@ if False:
if
(
dest
.
owner
and
if
(
dest
.
owner
and
isinstance
(
dest
.
owner
.
op
,
GpuAllocEmpty
)
and
isinstance
(
dest
.
owner
.
op
,
GpuAllocEmpty
)
and
len
(
dest
.
clients
)
>
1
):
len
(
dest
.
clients
)
>
1
):
inputs
[
2
]
=
gpu_alloc_empty
(
*
dest
.
owner
.
inputs
)
inputs
[
2
]
=
GpuAllocEmpty
(
dest
.
owner
.
op
.
dtype
)
(
*
dest
.
owner
.
inputs
)
return
[
GpuDnnConvGradW
(
inplace
=
True
)(
*
inputs
)]
return
[
GpuDnnConvGradW
(
inplace
=
True
)(
*
inputs
)]
@local_optimizer
([
GpuDnnConvGradI
],
inplace
=
True
)
@local_optimizer
([
GpuDnnConvGradI
],
inplace
=
True
)
...
@@ -1584,7 +1575,7 @@ if False:
...
@@ -1584,7 +1575,7 @@ if False:
if
(
dest
.
owner
and
if
(
dest
.
owner
and
isinstance
(
dest
.
owner
.
op
,
GpuAllocEmpty
)
and
isinstance
(
dest
.
owner
.
op
,
GpuAllocEmpty
)
and
len
(
dest
.
clients
)
>
1
):
len
(
dest
.
clients
)
>
1
):
inputs
[
2
]
=
gpu_alloc_empty
(
*
dest
.
owner
.
inputs
)
inputs
[
2
]
=
GpuAllocEmpty
(
dest
.
owner
.
op
.
dtype
)
(
*
dest
.
owner
.
inputs
)
return
[
GpuDnnConvGradI
(
inplace
=
True
)(
*
inputs
)]
return
[
GpuDnnConvGradI
(
inplace
=
True
)(
*
inputs
)]
optdb
.
register
(
'local_dnn_conv_inplace'
,
optdb
.
register
(
'local_dnn_conv_inplace'
,
...
@@ -1714,7 +1705,7 @@ if False:
...
@@ -1714,7 +1705,7 @@ if False:
ins
=
node
.
inputs
[
0
]
.
dimshuffle
(
0
,
1
,
'x'
,
'x'
)
ins
=
node
.
inputs
[
0
]
.
dimshuffle
(
0
,
1
,
'x'
,
'x'
)
ins
=
gpu_contiguous
(
ins
)
ins
=
gpu_contiguous
(
ins
)
out
=
GpuDnnSoftmax
(
'bc01'
,
'accurate'
,
'channel'
)(
ins
)
out
=
GpuDnnSoftmax
(
'bc01'
,
'accurate'
,
'channel'
)(
ins
)
out
=
as_
cuda_nd
array_variable
(
out
.
dimshuffle
(
0
,
1
))
out
=
as_
gpu
array_variable
(
out
.
dimshuffle
(
0
,
1
))
return
[
out
]
return
[
out
]
class
NoCuDNNRaise
(
Optimizer
):
class
NoCuDNNRaise
(
Optimizer
):
...
...
theano/sandbox/gpuarray/dnn_fwd.c
浏览文件 @
4d8e60e7
#section support_code_struct
#section support_code_struct
int
int
APPLY_SPECIFIC
(
conv_fwd
)(
CudaNdarray
*
input
,
CudaNdarray
*
kerns
,
APPLY_SPECIFIC
(
conv_fwd
)(
PyGpuArrayObject
*
input
,
PyGpuArrayObject
*
kerns
,
CudaNdarray
*
om
,
cudnnConvolutionDescriptor_t
desc
,
PyGpuArrayObject
*
om
,
float
alpha
,
float
beta
,
CudaNdarray
**
output
)
{
cudnnConvolutionDescriptor_t
desc
,
double
alpha
,
double
beta
,
PyGpuArrayObject
**
output
)
{
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
if
(
CudaNdarray_HOST_DIMS
(
input
)[
1
]
!=
CudaNdarray_HOST_DIMS
(
kerns
)[
1
])
{
float
af
=
alpha
,
bf
=
beta
;
void
*
alpha_p
;
void
*
beta_p
;
if
(
PyGpuArray_DIMS
(
input
)[
1
]
!=
PyGpuArray_DIMS
(
kerns
)[
1
])
{
PyErr_SetString
(
PyExc_ValueError
,
PyErr_SetString
(
PyExc_ValueError
,
"GpuDnnConv images and kernel must have the same stack size
\n
"
);
"GpuDnnConv images and kernel must have the same stack size"
);
return
1
;
return
1
;
}
}
...
@@ -16,14 +22,29 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
...
@@ -16,14 +22,29 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
if
(
c_set_filter
(
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
if
(
c_set_filter
(
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
return
1
;
return
1
;
switch
(
input
->
ga
.
typecode
)
{
case
GA_DOUBLE
:
alpha_p
=
(
void
*
)
&
alpha
;
beta_p
=
(
void
*
)
&
beta
;
break
;
case
GA_FLOAT
:
alpha_p
=
(
void
*
)
&
af
;
beta_p
=
(
void
*
)
&
bf
;
break
;
default:
PyErr_SetString
(
PyExc_TypeError
,
"Unsupported type in convolution"
);
return
1
;
}
#ifdef CONV_INPLACE
#ifdef CONV_INPLACE
Py_XDECREF
(
*
output
);
Py_XDECREF
(
*
output
);
*
output
=
om
;
*
output
=
om
;
Py_INCREF
(
*
output
);
Py_INCREF
(
*
output
);
#else
#else
if
(
CudaNdarray_prep_output
(
output
,
4
,
CudaNdarray_HOST_DIMS
(
om
))
!=
0
)
if
(
theano_prep_output
(
output
,
PyGpuArray_NDIM
(
om
),
PyGpuArray_DIMS
(
om
),
om
->
ga
.
typecode
,
GA_C_ORDER
)
!=
0
)
return
1
;
return
1
;
if
(
beta
!=
0
.
0
&&
CudaNdarray_CopyFromCudaNdarray
(
*
output
,
om
))
if
(
beta
!=
0
.
0
&&
pygpu_move
(
*
output
,
om
))
return
1
;
return
1
;
#endif
#endif
...
@@ -32,7 +53,7 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
...
@@ -32,7 +53,7 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
{
{
size_t
worksize
;
size_t
worksize
;
void
*
workspace
;
gpudata
*
workspace
;
err
=
cudnnGetConvolutionForwardWorkspaceSize
(
_handle
,
err
=
cudnnGetConvolutionForwardWorkspaceSize
(
_handle
,
APPLY_SPECIFIC
(
input
),
APPLY_SPECIFIC
(
input
),
...
@@ -48,21 +69,34 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
...
@@ -48,21 +69,34 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
return
1
;
return
1
;
}
}
workspace
=
get_work_mem
(
worksize
);
/*
if
(
workspace
==
NULL
&&
worksize
!=
0
)
* This is less than ideal since we need to free it after (which
return
1
;
* introduces a synchronization point. But we don't have a module
* to place a nice get_work_mem() function in.
*/
if
(
worksize
!=
0
)
{
workspace
=
pygpu_default_context
->
ops
->
buffer_alloc
(
pygpu_default_context
->
ctx
,
worksize
,
NULL
,
0
,
NULL
);
if
(
workspace
==
NULL
)
{
PyErr_SetString
(
PyExc_RuntimeError
,
"Could not allocate working memory"
);
return
1
;
}
}
err
=
cudnnConvolutionForward
(
err
=
cudnnConvolutionForward
(
_handle
,
_handle
,
(
void
*
)
&
alpha
,
alpha_p
,
APPLY_SPECIFIC
(
input
),
CudaNdarray_DEV_DATA
(
input
),
APPLY_SPECIFIC
(
input
),
PyGpuArray_DEV_DATA
(
input
),
APPLY_SPECIFIC
(
kerns
),
CudaNdarray_DEV_DATA
(
kerns
),
APPLY_SPECIFIC
(
kerns
),
PyGpuArray_DEV_DATA
(
kerns
),
desc
,
desc
,
CONV_ALGO
,
CONV_ALGO
,
worksize
==
0
?
NULL
:
*
(
void
**
)
workspace
,
worksize
,
workspace
,
worksize
,
beta_p
,
(
void
*
)
&
beta
,
APPLY_SPECIFIC
(
output
),
PyGpuArray_DEV_DATA
(
*
output
));
APPLY_SPECIFIC
(
output
),
CudaNdarray_DEV_DATA
(
*
output
));
}
}
pygpu_default_context
->
ops
->
buffer_release
(
workspace
);
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuDnnConv: error doing operation: %s"
,
PyErr_Format
(
PyExc_RuntimeError
,
"GpuDnnConv: error doing operation: %s"
,
cudnnGetErrorString
(
err
));
cudnnGetErrorString
(
err
));
...
...
theano/sandbox/gpuarray/dnn_gi.c
浏览文件 @
4d8e60e7
#section support_code_struct
#section support_code_struct
int
int
APPLY_SPECIFIC
(
conv_gi
)(
CudaNdarray
*
kerns
,
CudaNdarray
*
output
,
APPLY_SPECIFIC
(
conv_gi
)(
PyGpuArrayObject
*
kerns
,
PyGpuArrayObject
*
output
,
CudaNdarray
*
im
,
cudnnConvolutionDescriptor_t
desc
,
PyGpuArrayObject
*
im
,
float
alpha
,
float
beta
,
CudaNdarray
**
input
)
{
cudnnConvolutionDescriptor_t
desc
,
double
alpha
,
double
beta
,
PyGpuArrayObject
**
input
)
{
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
float
af
=
alpha
,
bf
=
beta
;
void
*
alpha_p
;
void
*
beta_p
;
if
(
CudaNdarray_HOST_DIMS
(
im
)[
1
]
!=
CudaNdarray_HOST
_DIMS
(
kerns
)[
1
])
{
if
(
PyGpuArray_DIMS
(
im
)[
1
]
!=
PyGpuArray
_DIMS
(
kerns
)[
1
])
{
PyErr_SetString
(
PyExc_ValueError
,
PyErr_SetString
(
PyExc_ValueError
,
"GpuDnnConv images and kernel must have the same stack size
\n
"
);
"GpuDnnConv images and kernel must have the same stack size"
);
return
1
;
return
1
;
}
}
...
@@ -17,14 +21,29 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
...
@@ -17,14 +21,29 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
if
(
c_set_filter
(
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
if
(
c_set_filter
(
kerns
,
APPLY_SPECIFIC
(
kerns
))
==
-
1
)
return
1
;
return
1
;
switch
(
input
->
ga
.
typecode
)
{
case
GA_DOUBLE
:
alpha_p
=
(
void
*
)
&
alpha
;
beta_p
=
(
void
*
)
&
beta
;
break
;
case
GA_FLOAT
:
alpha_p
=
(
void
*
)
&
af
;
beta_p
=
(
void
*
)
&
bf
;
break
;
default:
PyErr_SetString
(
PyExc_TypeError
,
"Unsupported type in convolution"
);
return
1
;
}
#ifdef CONV_INPLACE
#ifdef CONV_INPLACE
Py_XDECREF
(
*
input
);
Py_XDECREF
(
*
input
);
*
input
=
im
;
*
input
=
im
;
Py_INCREF
(
*
input
);
Py_INCREF
(
*
input
);
#else
#else
if
(
CudaNdarray_prep_output
(
input
,
4
,
CudaNdarray_HOST_DIMS
(
im
))
!=
0
)
if
(
theano_prep_output
(
input
,
PyGpuArray_NDIM
(
im
),
PyGpuArray_DIMS
(
im
),
im
->
ga
.
typecode
,
GA_C_ORDER
)
!=
0
)
return
1
;
return
1
;
if
(
beta
!=
0
.
0
&&
CudaNdarray_CopyFromCudaNdarray
(
*
input
,
im
))
if
(
beta
!=
0
.
0
&&
pygpu_move
(
*
input
,
im
))
return
1
;
return
1
;
#endif
#endif
...
@@ -33,12 +52,12 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
...
@@ -33,12 +52,12 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
err
=
cudnnConvolutionBackwardData
(
err
=
cudnnConvolutionBackwardData
(
_handle
,
_handle
,
(
void
*
)
&
alpha
,
alpha_p
,
APPLY_SPECIFIC
(
kerns
),
CudaNda
rray_DEV_DATA
(
kerns
),
APPLY_SPECIFIC
(
kerns
),
PyGpuA
rray_DEV_DATA
(
kerns
),
APPLY_SPECIFIC
(
output
),
CudaNda
rray_DEV_DATA
(
output
),
APPLY_SPECIFIC
(
output
),
PyGpuA
rray_DEV_DATA
(
output
),
desc
,
desc
,
(
void
*
)
&
beta
,
beta_p
,
APPLY_SPECIFIC
(
input
),
CudaNda
rray_DEV_DATA
(
*
input
));
APPLY_SPECIFIC
(
input
),
PyGpuA
rray_DEV_DATA
(
*
input
));
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuDnnConvGradI: error doing operation: %s"
,
PyErr_Format
(
PyExc_RuntimeError
,
"GpuDnnConvGradI: error doing operation: %s"
,
cudnnGetErrorString
(
err
));
cudnnGetErrorString
(
err
));
...
...
theano/sandbox/gpuarray/dnn_gw.c
浏览文件 @
4d8e60e7
#section support_code_struct
#section support_code_struct
int
int
APPLY_SPECIFIC
(
conv_gw
)(
CudaNdarray
*
input
,
CudaNdarray
*
output
,
APPLY_SPECIFIC
(
conv_gw
)(
PyGpuArrayObject
*
input
,
PyGpuArrayObject
*
output
,
CudaNdarray
*
km
,
cudnnConvolutionDescriptor_t
desc
,
PyGpuArrayObject
*
km
,
float
alpha
,
float
beta
,
CudaNdarray
**
kerns
)
{
cudnnConvolutionDescriptor_t
desc
,
double
alpha
,
double
beta
,
PyGpuArrayObject
**
kerns
)
{
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
cudnnStatus_t
err
=
CUDNN_STATUS_SUCCESS
;
float
af
=
alpha
,
bf
=
beta
;
void
*
alpha_p
;
void
*
beta_p
;
if
(
CudaNdarray_HOST_DIMS
(
input
)[
1
]
!=
CudaNdarray_HOST
_DIMS
(
km
)[
1
])
{
if
(
PyGpuArray_DIMS
(
input
)[
1
]
!=
PyGpuArray
_DIMS
(
km
)[
1
])
{
PyErr_SetString
(
PyExc_ValueError
,
PyErr_SetString
(
PyExc_ValueError
,
"GpuDnnConv images and kernel must have the same stack size
\n
"
);
"GpuDnnConv images and kernel must have the same stack size"
);
return
1
;
return
1
;
}
}
...
@@ -17,14 +21,29 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
...
@@ -17,14 +21,29 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
if
(
c_set_tensor4d
(
output
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
if
(
c_set_tensor4d
(
output
,
APPLY_SPECIFIC
(
output
))
==
-
1
)
return
1
;
return
1
;
switch
(
input
->
ga
.
typecode
)
{
case
GA_DOUBLE
:
alpha_p
=
(
void
*
)
&
alpha
;
beta_p
=
(
void
*
)
&
beta
;
break
;
case
GA_FLOAT
:
alpha_p
=
(
void
*
)
&
af
;
beta_p
=
(
void
*
)
&
bf
;
break
;
default:
PyErr_SetString
(
PyExc_TypeError
,
"Unsupported type in convolution"
);
return
1
;
}
#ifdef CONV_INPLACE
#ifdef CONV_INPLACE
Py_XDECREF
(
*
kerns
);
Py_XDECREF
(
*
kerns
);
*
kerns
=
km
;
*
kerns
=
km
;
Py_INCREF
(
*
kerns
);
Py_INCREF
(
*
kerns
);
#else
#else
if
(
CudaNdarray_prep_output
(
kerns
,
4
,
CudaNdarray_HOST_DIMS
(
km
))
!=
0
)
if
(
theano_prep_output
(
kerns
,
PyGpuArray_NDIM
(
km
),
PyGpuArray_DIMS
(
km
),
km
->
ga
.
typecode
,
GA_C_ORDER
)
!=
0
)
return
1
;
return
1
;
if
(
beta
!=
0
.
0
&&
CudaNdarray_CopyFromCudaNdarray
(
*
kerns
,
km
))
if
(
beta
!=
0
.
0
&&
pygpu_move
(
*
kerns
,
km
))
return
1
;
return
1
;
#endif
#endif
...
@@ -33,12 +52,12 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
...
@@ -33,12 +52,12 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
err
=
cudnnConvolutionBackwardFilter
(
err
=
cudnnConvolutionBackwardFilter
(
_handle
,
_handle
,
(
void
*
)
&
alpha
,
alpha_p
,
APPLY_SPECIFIC
(
input
),
CudaNda
rray_DEV_DATA
(
input
),
APPLY_SPECIFIC
(
input
),
PyGpuA
rray_DEV_DATA
(
input
),
APPLY_SPECIFIC
(
output
),
CudaNda
rray_DEV_DATA
(
output
),
APPLY_SPECIFIC
(
output
),
PyGpuA
rray_DEV_DATA
(
output
),
desc
,
desc
,
(
void
*
)
&
beta
,
beta_p
,
APPLY_SPECIFIC
(
kerns
),
CudaNda
rray_DEV_DATA
(
*
kerns
));
APPLY_SPECIFIC
(
kerns
),
PyGpuA
rray_DEV_DATA
(
*
kerns
));
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
if
(
err
!=
CUDNN_STATUS_SUCCESS
)
{
PyErr_Format
(
PyExc_RuntimeError
,
"GpuDnnConvGradW: error doing operation: %s"
,
PyErr_Format
(
PyExc_RuntimeError
,
"GpuDnnConvGradW: error doing operation: %s"
,
cudnnGetErrorString
(
err
));
cudnnGetErrorString
(
err
));
...
...
theano/sandbox/gpuarray/gpuarray_helper.h
浏览文件 @
4d8e60e7
...
@@ -42,4 +42,9 @@ static PyGpuArrayObject *theano_try_copy(PyGpuArrayObject *out,
...
@@ -42,4 +42,9 @@ static PyGpuArrayObject *theano_try_copy(PyGpuArrayObject *out,
return
out
;
return
out
;
}
}
/* This is guaranteed to work and return the raw CUDA/OpenCL object on
* all recent (as of June 2015) version of libgpuarray. This is also
* promised to keep working in future versions. */
#define PyGpuArray_DEV_DATA(ary) (*(void **)((ary)->ga.data))
#endif
#endif
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论