Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
3ae18f61
提交
3ae18f61
authored
4月 12, 2016
作者:
Frederic Bastien
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Use cuDNN instead of CuDNN.
上级
5855468f
隐藏空白字符变更
内嵌
并排
正在显示
4 个修改的文件
包含
67 行增加
和
65 行删除
+67
-65
__init__.py
theano/sandbox/cuda/__init__.py
+5
-3
dnn.py
theano/sandbox/cuda/dnn.py
+31
-31
__init__.py
theano/sandbox/gpuarray/__init__.py
+4
-4
dnn.py
theano/sandbox/gpuarray/dnn.py
+27
-27
没有找到文件。
theano/sandbox/cuda/__init__.py
浏览文件 @
3ae18f61
...
...
@@ -329,12 +329,14 @@ if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
if
v
==
-
1
or
v
[
0
]
<
4007
:
# 4007 is the final release of cudnn v4
dnn_available
.
avail
=
False
dnn_available
.
msg
=
"Version too old. Update to v5, was
%
d"
%
v
[
0
]
dnn_available
.
msg
=
"Version too old. Update to v5, was
%
d
.
"
%
v
[
0
]
raise
RuntimeError
(
dnn_available
.
msg
)
else
:
dnn_available
.
avail
=
comp
if
config
.
dnn
.
enabled
==
"True"
:
if
not
dnn_available
.
avail
:
raise
RuntimeError
(
"You enabled
C
uDNN, but we aren't able to use it:
%
s"
%
"You enabled
c
uDNN, but we aren't able to use it:
%
s"
%
dnn_available
.
msg
)
return
dnn_available
.
avail
...
...
@@ -587,7 +589,7 @@ def use(device,
" downgrading cuDNN to version 5."
)
except
Exception
:
cudnn_version
=
dnn_available
.
msg
print
(
"Using gpu device
%
d:
%
s (CNMeM is
%
s,
C
uDNN
%
s)"
%
(
print
(
"Using gpu device
%
d:
%
s (CNMeM is
%
s,
c
uDNN
%
s)"
%
(
active_device_number
(),
active_device_name
(),
cnmem_enabled
,
...
...
theano/sandbox/cuda/dnn.py
浏览文件 @
3ae18f61
...
...
@@ -322,30 +322,30 @@ class GpuDnnConv(DnnBase, COp):
if
self
.
inplace
:
self
.
destroy_map
=
{
0
:
[
2
]}
# In
C
uDNN version older than V3, the FFT implementation and the
# In
c
uDNN version older than V3, the FFT implementation and the
# option to time the different implementations to get the fastest
# are both unavailable.
if
version
()
<
(
3000
,
3000
):
if
self
.
algo
==
'fft'
:
raise
RuntimeError
(
"
CuDNN FFT convolution requires C
uDNN v3"
)
raise
RuntimeError
(
"
cuDNN FFT convolution requires c
uDNN v3"
)
elif
self
.
algo
in
[
'guess_once'
,
'guess_on_shape_change'
]:
raise
RuntimeError
(
"
C
uDNN selection of convolution "
raise
RuntimeError
(
"
c
uDNN selection of convolution "
"implementation based on heuristics "
"requires
C
uDNN v3"
)
"requires
c
uDNN v3"
)
elif
self
.
algo
in
[
'time_once'
,
'time_on_shape_change'
]:
raise
RuntimeError
(
"
CuDNN convolution timing requires C
uDNN "
raise
RuntimeError
(
"
cuDNN convolution timing requires c
uDNN "
"v3"
)
# The fft_tiling implementation is only available from
C
uDNN V4 onward
# The fft_tiling implementation is only available from
c
uDNN V4 onward
if
version
()
<
(
4000
,
4000
):
if
self
.
algo
==
'fft_tiling'
:
raise
RuntimeError
(
"
C
uDNN tiled-FFT convolution requires "
"
C
uDNN v4 or more recent"
)
raise
RuntimeError
(
"
c
uDNN tiled-FFT convolution requires "
"
c
uDNN v4 or more recent"
)
if
version
()
<
(
5000
,
5000
):
if
self
.
algo
==
'winograd'
:
raise
RuntimeError
(
"
C
uDNN winograd convolution requires "
"
C
uDNN v5 or more recent"
)
raise
RuntimeError
(
"
c
uDNN winograd convolution requires "
"
c
uDNN v5 or more recent"
)
assert
self
.
algo
in
[
'none'
,
'small'
,
'large'
,
'fft'
,
'fft_tiling'
,
'winograd'
,
'guess_once'
,
'guess_on_shape_change'
,
...
...
@@ -516,11 +516,11 @@ class GpuDnnConv3d(GpuDnnConv):
if
version
()
<
(
5000
,
5000
):
if
self
.
algo
==
'fft_tiling'
:
raise
RuntimeError
(
"
C
uDNN 3d tiled-FFT convolution requires "
"
C
uDNN v5 or more recent"
)
raise
RuntimeError
(
"
c
uDNN 3d tiled-FFT convolution requires "
"
c
uDNN v5 or more recent"
)
elif
self
.
algo
==
'winograd'
:
raise
RuntimeError
(
"
C
uDNN 3d winograd convolution requires "
"
C
uDNN v5 or more recent"
)
raise
RuntimeError
(
"
c
uDNN 3d winograd convolution requires "
"
c
uDNN v5 or more recent"
)
def
make_node
(
self
,
img
,
kern
,
output
,
desc
,
alpha
=
None
,
beta
=
None
):
...
...
@@ -833,17 +833,17 @@ class GpuDnnConvGradI(DnnBase, COp):
if
self
.
inplace
:
self
.
destroy_map
=
{
0
:
[
2
]}
# The small-workspace implementation is only available from
C
uDNN V4
# The small-workspace implementation is only available from
c
uDNN V4
# onward.
if
version
()
<
(
4000
,
4000
):
if
self
.
algo
==
'fft_tiling'
:
raise
RuntimeError
(
"
C
uDNN's tiled-FFT convolution requires "
"
C
uDNN v4 or more recent"
)
raise
RuntimeError
(
"
c
uDNN's tiled-FFT convolution requires "
"
c
uDNN v4 or more recent"
)
if
version
()
<
(
5000
,
5000
):
if
self
.
algo
==
'winograd'
:
raise
RuntimeError
(
"
C
uDNN's winograd convolution requires "
"
C
uDNN v5 or more recent"
)
raise
RuntimeError
(
"
c
uDNN's winograd convolution requires "
"
c
uDNN v5 or more recent"
)
assert
self
.
algo
in
[
'none'
,
'deterministic'
,
'fft'
,
'fft_tiling'
,
'winograd'
,
'guess_once'
,
'guess_on_shape_change'
,
...
...
@@ -996,11 +996,11 @@ class GpuDnnConv3dGradI(GpuDnnConvGradI):
assert
self
.
algo
in
good_algo
if
version
()
<
(
5000
,
5000
):
if
self
.
algo
==
'fft_tiling'
:
raise
RuntimeError
(
"
C
uDNN 3d tiled-FFT convolution requires "
"
C
uDNN v5 or more recent"
)
raise
RuntimeError
(
"
c
uDNN 3d tiled-FFT convolution requires "
"
c
uDNN v5 or more recent"
)
elif
self
.
algo
==
'winograd'
:
raise
RuntimeError
(
"
C
uDNN 3d winograd convolution requires "
"
C
uDNN v5 or more recent"
)
raise
RuntimeError
(
"
c
uDNN 3d winograd convolution requires "
"
c
uDNN v5 or more recent"
)
def
grad
(
self
,
inp
,
grads
):
kerns
,
top
,
output
,
desc
,
alpha
,
beta
=
inp
...
...
@@ -1078,7 +1078,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
*deprecated*, use parameter algo instead.
algo : {'none', 'small', 'large', 'fft', 'guess_once', 'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Convolution implementation to use. Some of its values may require certain
versions of
C
uDNN to be installed. Default is the value of
versions of
c
uDNN to be installed. Default is the value of
:attr:`config.dnn.conv.algo_fwd`.
precision : {'as_input', 'float16', 'float32', 'float64'}
Description of the dtype in which the computation of the convolution
...
...
@@ -1364,13 +1364,13 @@ class GpuDnnPoolDesc(GpuOp):
self
.
pad
=
pad
if
(
pad
[
0
]
!=
0
or
pad
[
1
]
!=
0
)
and
version
()
==
-
1
:
raise
RuntimeError
(
"
CuDNN pooling with padding requires C
uDNN v2"
)
raise
RuntimeError
(
"
cuDNN pooling with padding requires c
uDNN v2"
)
if
self
.
get_ndim
()
==
3
and
version
()
<
(
3000
,
3000
):
raise
RuntimeError
(
"
CuDNN 3d pooling requires C
uDNN v3"
)
raise
RuntimeError
(
"
cuDNN 3d pooling requires c
uDNN v3"
)
if
(
mode
==
'average_exc_pad'
and
max
(
pad
)
>
0
and
version
()
<
(
4004
,
4004
)):
raise
RuntimeError
(
"
C
uDNN pooling mode 'average_exc_pad' requires at least v4"
)
"
c
uDNN pooling mode 'average_exc_pad' requires at least v4"
)
def
get_ndim
(
self
):
return
len
(
self
.
ws
)
...
...
@@ -1382,7 +1382,7 @@ class GpuDnnPoolDesc(GpuOp):
def
make_node
(
self
):
if
self
.
pad
!=
(
0
,
0
)
and
version
()
==
-
1
:
raise
RuntimeError
(
"
CuDNN pooling with padding requires C
uDNN v2"
)
raise
RuntimeError
(
"
cuDNN pooling with padding requires c
uDNN v2"
)
node
=
Apply
(
self
,
[],
[
CDataType
(
"cudnnPoolingDescriptor_t"
,
...
...
@@ -1982,7 +1982,7 @@ class GpuDnnSoftmaxBase(DnnBase):
Always set this to 'bc01'.
algo : {'fast', 'accurate', 'log'}
Indicating whether, respectively, computations should be optimized for
speed, for accuracy, or if
C
uDNN should rather compute the log-softmax instead.
speed, for accuracy, or if
c
uDNN should rather compute the log-softmax instead.
mode : {'instance', 'channel'}
Indicating whether the softmax should be computed per image across 'c01'
or per spatial location '01' per image across 'c'.
...
...
@@ -2003,7 +2003,7 @@ class GpuDnnSoftmaxBase(DnnBase):
self
.
tensor_format
=
tensor_format
if
algo
==
'log'
and
version
()
<
(
3000
,
3000
):
raise
RuntimeError
(
"
CuDNN log-softmax requires C
uDNN v3"
)
raise
RuntimeError
(
"
cuDNN log-softmax requires c
uDNN v3"
)
assert
(
algo
in
(
'fast'
,
'accurate'
,
'log'
))
self
.
algo
=
algo
...
...
@@ -2525,7 +2525,7 @@ if True:
@register_opt
(
'cudnn'
)
@local_optimizer
([
GpuElemwise
,
LogSoftmax
])
def
local_log_softmax_dnn
(
node
):
# The log-softmax implementation is only available starting at
C
uDNN V3
# The log-softmax implementation is only available starting at
c
uDNN V3
if
not
dnn_available
()
or
version
()
<
(
3000
,
3000
):
return
...
...
theano/sandbox/gpuarray/__init__.py
浏览文件 @
3ae18f61
...
...
@@ -68,15 +68,15 @@ def init_dev(dev, name=None):
warn
=
None
cudnn_version
=
""
if
dev
.
startswith
(
'cuda'
):
cudnn_version
=
" (
C
uDNN not available)"
cudnn_version
=
" (
c
uDNN not available)"
try
:
cudnn_version
=
dnn
.
version
()
# 5100 should not print warning with cudnn 5 final.
if
cudnn_version
>
5100
:
warn
=
(
"Your
C
uDNN version is more recent than Theano."
warn
=
(
"Your
c
uDNN version is more recent than Theano."
" If you see problems, try updating Theano or"
" downgrading
C
uDNN to version 5."
)
cudnn_version
=
" (
C
uDNN version
%
s)"
%
cudnn_version
" downgrading
c
uDNN to version 5."
)
cudnn_version
=
" (
c
uDNN version
%
s)"
%
cudnn_version
except
Exception
:
cudnn_version
=
dnn
.
dnn_present
.
msg
print
(
"Mapped name
%
s to device
%
s:
%
s
%
s"
%
(
...
...
theano/sandbox/gpuarray/dnn.py
浏览文件 @
3ae18f61
...
...
@@ -36,7 +36,7 @@ from .opt_util import alpha_merge, output_merge, inplace_allocempty
from
theano.configdefaults
import
SUPPORTED_DNN_CONV_ALGO_BWD_FILTER
def
raise_no_cudnn
(
msg
=
"
C
uDNN is required for convolution and pooling"
):
def
raise_no_cudnn
(
msg
=
"
c
uDNN is required for convolution and pooling"
):
raise
RuntimeError
(
msg
)
...
...
@@ -74,7 +74,7 @@ if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
try_run
=
False
,
output
=
True
)
if
not
avail
:
return
False
,
(
"cannot compile with
C
uDNN. "
return
False
,
(
"cannot compile with
c
uDNN. "
"We got this error:
\n
"
+
str
(
err
))
return
True
,
None
...
...
@@ -108,7 +108,7 @@ def dnn_present():
if
config
.
dnn
.
enabled
==
"True"
:
if
not
dnn_present
.
avail
:
raise
RuntimeError
(
"You enabled
C
uDNN, but we aren't able to use it:
%
s"
%
"You enabled
c
uDNN, but we aren't able to use it:
%
s"
%
dnn_present
.
msg
)
return
dnn_present
.
avail
...
...
@@ -229,7 +229,7 @@ def version(raises=True):
This also does a check that the header version matches the runtime version.
:raises: If True, raise an exception if
C
uDNN is not present or badly installed.
:raises: If True, raise an exception if
c
uDNN is not present or badly installed.
Otherwise, return -1.
"""
...
...
@@ -421,23 +421,23 @@ class GpuDnnConv(DnnBase):
if
version
()
<
3000
:
if
self
.
algo
==
'fft'
:
raise
RuntimeError
(
"
CuDNN FFT convolution requires C
uDNN v3"
)
raise
RuntimeError
(
"
cuDNN FFT convolution requires c
uDNN v3"
)
elif
self
.
algo
in
[
'guess_once'
,
'guess_on_shape_change'
]:
raise
RuntimeError
(
"
C
uDNN selection of convolution "
raise
RuntimeError
(
"
c
uDNN selection of convolution "
"implementation based on heuristics "
"requires
C
uDNN v3"
)
"requires
c
uDNN v3"
)
elif
self
.
algo
in
[
'time_once'
,
'time_on_shape_change'
]:
raise
RuntimeError
(
"
CuDNN convolution timing requires C
uDNN v3"
)
raise
RuntimeError
(
"
cuDNN convolution timing requires c
uDNN v3"
)
# The fft_tiling implementation is only available from
C
uDNN V4 onward
# The fft_tiling implementation is only available from
c
uDNN V4 onward
if
version
()
<
4000
:
if
self
.
algo
==
'fft_tiling'
:
raise
RuntimeError
(
"
C
uDNN tiled-FFT convolution requires "
"
C
uDNN v4 or more recent"
)
raise
RuntimeError
(
"
c
uDNN tiled-FFT convolution requires "
"
c
uDNN v4 or more recent"
)
if
version
()
<
5000
and
self
.
algo
==
'winograd'
:
raise
RuntimeError
(
"
C
uDNN winograd convolution requires "
"
C
uDNN v5 or more recent"
)
raise
RuntimeError
(
"
c
uDNN winograd convolution requires "
"
c
uDNN v5 or more recent"
)
assert
self
.
algo
in
[
'none'
,
'small'
,
'large'
,
'fft'
,
'fft_tiling'
,
'winograd'
,
'guess_once'
,
'guess_on_shape_change'
,
...
...
@@ -714,13 +714,13 @@ class GpuDnnConvGradI(DnnBase):
algo
=
config
.
dnn
.
conv
.
algo_bwd_data
self
.
algo
=
algo
# The small-workspace implementation is only available from
C
uDNN V4
# The small-workspace implementation is only available from
c
uDNN V4
# onward.
if
version
()
<
4000
and
self
.
algo
==
'fft_tiling'
:
raise
RuntimeError
(
"
CuDNN's tiled-FFT convolution requires C
uDNN "
raise
RuntimeError
(
"
cuDNN's tiled-FFT convolution requires c
uDNN "
"v4 or more recent"
)
if
version
()
<
5000
and
self
.
algo
==
'winograd'
:
raise
RuntimeError
(
"
CuDNN's winograd convolution requires C
uDNN "
raise
RuntimeError
(
"
cuDNN's winograd convolution requires c
uDNN "
"v5 or more recent"
)
assert
self
.
algo
in
[
'none'
,
'deterministic'
,
'fft'
,
'fft_tiling'
,
...
...
@@ -856,7 +856,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
removed at any time without a deprecation period. You have been warned.
algo : {'none', 'small', 'large', 'fft', 'guess_once', 'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Convolution implementation to use. Some of its values may
require certain versions of
C
uDNN to be installed. Default is
require certain versions of
c
uDNN to be installed. Default is
the value of :attr:`config.dnn.conv.algo_fwd`.
precision : {'as_input', 'float16', 'float32', 'float64'}
Description of the dtype in which the computation of the convolution
...
...
@@ -1014,10 +1014,10 @@ class GpuDnnPoolDesc(Op):
self
.
pad
=
pad
if
self
.
get_ndim
()
==
3
and
version
()
<
3000
:
raise
RuntimeError
(
"
C
uDNN 3d pooling requires v3"
)
raise
RuntimeError
(
"
c
uDNN 3d pooling requires v3"
)
if
mode
==
'average_exc_pad'
and
max
(
pad
)
>
0
and
version
()
<
4004
:
raise
RuntimeError
(
"
C
uDNN pooling mode 'average_exc_pad' requires at least v4"
)
"
c
uDNN pooling mode 'average_exc_pad' requires at least v4"
)
def
get_ndim
(
self
):
return
len
(
self
.
ws
)
...
...
@@ -1277,7 +1277,7 @@ class GpuDnnSoftmaxBase(DnnBase):
----------
algo : {'fast', 'accurate', 'log'}
Indicating whether, respectively, computations should be optimized for
speed, for accuracy, or if
C
uDNN should rather compute the log-softmax instead.
speed, for accuracy, or if
c
uDNN should rather compute the log-softmax instead.
mode : {'instance', 'channel'}
Indicating whether the softmax should be computed per image across 'c01'
or per spatial location '01' per image across 'c'.
...
...
@@ -1291,7 +1291,7 @@ class GpuDnnSoftmaxBase(DnnBase):
assert
(
algo
in
(
'fast'
,
'accurate'
,
'log'
))
if
algo
==
'log'
and
version
(
raises
=
False
)
<
3000
:
raise
RuntimeError
(
"Need
C
uDNN v3 for log-softmax"
)
raise
RuntimeError
(
"Need
c
uDNN v3 for log-softmax"
)
self
.
algo
=
algo
assert
(
mode
in
(
'instance'
,
'channel'
))
...
...
@@ -1326,7 +1326,7 @@ class GpuDnnSoftmax(GpuDnnSoftmaxBase):
algo : {'fast', 'accurate', 'log'}
Indicating whether, respectively, computations should be optimized for
speed, for accuracy, or if
C
uDNN should rather compute the log-softmax instead.
speed, for accuracy, or if
c
uDNN should rather compute the log-softmax instead.
mode : {'instance', 'channel'}
Indicating whether the softmax should be computed per image across 'c01'
or per spatial location '01' per image across 'c'.
...
...
@@ -1360,7 +1360,7 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
----------
algo
'fast', 'accurate' or 'log' indicating whether, respectively,
computations should be optimized for speed, for accuracy, or if
C
uDNN
computations should be optimized for speed, for accuracy, or if
c
uDNN
should rather compute the gradient of the log-softmax instead.
mode
'instance' or 'channel' indicating whether the softmax should
...
...
@@ -1549,7 +1549,7 @@ def local_avg_pool_dnn_grad_stride(node, ctx_name):
cg
=
gpu_contiguous
(
out_grad
)
# We reuse cg because
C
uDNN does not use the value of the `out`
# We reuse cg because
c
uDNN does not use the value of the `out`
# argument but still checks its shape for average pooling. This
# has been observed in v2 and v3 as far as I know.
return
GpuDnnPoolGrad
(
mode
=
mode
)(
gpu_contiguous
(
inp
),
cg
,
cg
,
ds
,
st
,
pad
)
...
...
@@ -1579,7 +1579,7 @@ def local_log_softmax_dnn(node):
len
(
node
.
inputs
[
0
]
.
clients
)
==
1
):
if
version
(
raises
=
False
)
<
3000
:
# No log-softmax before cudnn v3
raise_no_cudnn
(
"Need
C
uDNN v3 for LogSoftmax"
)
raise_no_cudnn
(
"Need
c
uDNN v3 for LogSoftmax"
)
softmax_node
=
node
.
inputs
[
0
]
.
owner
new_softmax
=
GpuDnnSoftmax
(
'log'
,
softmax_node
.
op
.
mode
)
return
[
new_softmax
(
softmax_node
.
inputs
[
0
])]
...
...
@@ -1594,7 +1594,7 @@ def local_logsoftmax_to_dnn(node, ctx_name):
return
if
not
dnn_available
(
ctx_name
)
or
version
(
raises
=
False
)
<
3000
:
# No log-softmax before cudnn v3
raise_no_cudnn
(
"Need
C
uDNN v3 for LogSoftmax"
)
raise_no_cudnn
(
"Need
c
uDNN v3 for LogSoftmax"
)
inp
=
inp
.
dimshuffle
(
0
,
1
,
'x'
,
'x'
)
inp
.
tag
.
context_name
=
ctx_name
...
...
@@ -1627,7 +1627,7 @@ gpu_seqopt.register("NoCuDNNRaise", NoCuDNNRaise(), 0, 'cudnn')
@op_lifter
([
SoftmaxGrad
])
def
local_softmax_dnn_grad
(
node
,
ctx_name
):
if
not
dnn_available
(
ctx_name
):
raise_no_cudnn
(
"
C
uDNN needed for SoftmaxGrad"
)
raise_no_cudnn
(
"
c
uDNN needed for SoftmaxGrad"
)
ins
=
[]
for
n
in
node
.
inputs
:
n
=
as_gpuarray_variable
(
n
,
ctx_name
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论