Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
3ae18f61
提交
3ae18f61
authored
4月 12, 2016
作者:
Frederic Bastien
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Use cuDNN instead of CuDNN.
上级
5855468f
隐藏空白字符变更
内嵌
并排
正在显示
4 个修改的文件
包含
67 行增加
和
65 行删除
+67
-65
__init__.py
theano/sandbox/cuda/__init__.py
+5
-3
dnn.py
theano/sandbox/cuda/dnn.py
+31
-31
__init__.py
theano/sandbox/gpuarray/__init__.py
+4
-4
dnn.py
theano/sandbox/gpuarray/dnn.py
+27
-27
没有找到文件。
theano/sandbox/cuda/__init__.py
浏览文件 @
3ae18f61
...
@@ -329,12 +329,14 @@ if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
...
@@ -329,12 +329,14 @@ if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
if
v
==
-
1
or
v
[
0
]
<
4007
:
if
v
==
-
1
or
v
[
0
]
<
4007
:
# 4007 is the final release of cudnn v4
# 4007 is the final release of cudnn v4
dnn_available
.
avail
=
False
dnn_available
.
avail
=
False
dnn_available
.
msg
=
"Version too old. Update to v5, was
%
d"
%
v
[
0
]
dnn_available
.
msg
=
"Version too old. Update to v5, was
%
d
.
"
%
v
[
0
]
raise
RuntimeError
(
dnn_available
.
msg
)
raise
RuntimeError
(
dnn_available
.
msg
)
else
:
dnn_available
.
avail
=
comp
if
config
.
dnn
.
enabled
==
"True"
:
if
config
.
dnn
.
enabled
==
"True"
:
if
not
dnn_available
.
avail
:
if
not
dnn_available
.
avail
:
raise
RuntimeError
(
raise
RuntimeError
(
"You enabled
C
uDNN, but we aren't able to use it:
%
s"
%
"You enabled
c
uDNN, but we aren't able to use it:
%
s"
%
dnn_available
.
msg
)
dnn_available
.
msg
)
return
dnn_available
.
avail
return
dnn_available
.
avail
...
@@ -587,7 +589,7 @@ def use(device,
...
@@ -587,7 +589,7 @@ def use(device,
" downgrading cuDNN to version 5."
)
" downgrading cuDNN to version 5."
)
except
Exception
:
except
Exception
:
cudnn_version
=
dnn_available
.
msg
cudnn_version
=
dnn_available
.
msg
print
(
"Using gpu device
%
d:
%
s (CNMeM is
%
s,
C
uDNN
%
s)"
%
(
print
(
"Using gpu device
%
d:
%
s (CNMeM is
%
s,
c
uDNN
%
s)"
%
(
active_device_number
(),
active_device_number
(),
active_device_name
(),
active_device_name
(),
cnmem_enabled
,
cnmem_enabled
,
...
...
theano/sandbox/cuda/dnn.py
浏览文件 @
3ae18f61
...
@@ -322,30 +322,30 @@ class GpuDnnConv(DnnBase, COp):
...
@@ -322,30 +322,30 @@ class GpuDnnConv(DnnBase, COp):
if
self
.
inplace
:
if
self
.
inplace
:
self
.
destroy_map
=
{
0
:
[
2
]}
self
.
destroy_map
=
{
0
:
[
2
]}
# In
C
uDNN version older than V3, the FFT implementation and the
# In
c
uDNN version older than V3, the FFT implementation and the
# option to time the different implementations to get the fastest
# option to time the different implementations to get the fastest
# are both unavailable.
# are both unavailable.
if
version
()
<
(
3000
,
3000
):
if
version
()
<
(
3000
,
3000
):
if
self
.
algo
==
'fft'
:
if
self
.
algo
==
'fft'
:
raise
RuntimeError
(
"
CuDNN FFT convolution requires C
uDNN v3"
)
raise
RuntimeError
(
"
cuDNN FFT convolution requires c
uDNN v3"
)
elif
self
.
algo
in
[
'guess_once'
,
'guess_on_shape_change'
]:
elif
self
.
algo
in
[
'guess_once'
,
'guess_on_shape_change'
]:
raise
RuntimeError
(
"
C
uDNN selection of convolution "
raise
RuntimeError
(
"
c
uDNN selection of convolution "
"implementation based on heuristics "
"implementation based on heuristics "
"requires
C
uDNN v3"
)
"requires
c
uDNN v3"
)
elif
self
.
algo
in
[
'time_once'
,
'time_on_shape_change'
]:
elif
self
.
algo
in
[
'time_once'
,
'time_on_shape_change'
]:
raise
RuntimeError
(
"
CuDNN convolution timing requires C
uDNN "
raise
RuntimeError
(
"
cuDNN convolution timing requires c
uDNN "
"v3"
)
"v3"
)
# The fft_tiling implementation is only available from
C
uDNN V4 onward
# The fft_tiling implementation is only available from
c
uDNN V4 onward
if
version
()
<
(
4000
,
4000
):
if
version
()
<
(
4000
,
4000
):
if
self
.
algo
==
'fft_tiling'
:
if
self
.
algo
==
'fft_tiling'
:
raise
RuntimeError
(
"
C
uDNN tiled-FFT convolution requires "
raise
RuntimeError
(
"
c
uDNN tiled-FFT convolution requires "
"
C
uDNN v4 or more recent"
)
"
c
uDNN v4 or more recent"
)
if
version
()
<
(
5000
,
5000
):
if
version
()
<
(
5000
,
5000
):
if
self
.
algo
==
'winograd'
:
if
self
.
algo
==
'winograd'
:
raise
RuntimeError
(
"
C
uDNN winograd convolution requires "
raise
RuntimeError
(
"
c
uDNN winograd convolution requires "
"
C
uDNN v5 or more recent"
)
"
c
uDNN v5 or more recent"
)
assert
self
.
algo
in
[
'none'
,
'small'
,
'large'
,
'fft'
,
'fft_tiling'
,
assert
self
.
algo
in
[
'none'
,
'small'
,
'large'
,
'fft'
,
'fft_tiling'
,
'winograd'
,
'guess_once'
,
'guess_on_shape_change'
,
'winograd'
,
'guess_once'
,
'guess_on_shape_change'
,
...
@@ -516,11 +516,11 @@ class GpuDnnConv3d(GpuDnnConv):
...
@@ -516,11 +516,11 @@ class GpuDnnConv3d(GpuDnnConv):
if
version
()
<
(
5000
,
5000
):
if
version
()
<
(
5000
,
5000
):
if
self
.
algo
==
'fft_tiling'
:
if
self
.
algo
==
'fft_tiling'
:
raise
RuntimeError
(
"
C
uDNN 3d tiled-FFT convolution requires "
raise
RuntimeError
(
"
c
uDNN 3d tiled-FFT convolution requires "
"
C
uDNN v5 or more recent"
)
"
c
uDNN v5 or more recent"
)
elif
self
.
algo
==
'winograd'
:
elif
self
.
algo
==
'winograd'
:
raise
RuntimeError
(
"
C
uDNN 3d winograd convolution requires "
raise
RuntimeError
(
"
c
uDNN 3d winograd convolution requires "
"
C
uDNN v5 or more recent"
)
"
c
uDNN v5 or more recent"
)
def
make_node
(
self
,
img
,
kern
,
output
,
desc
,
alpha
=
None
,
beta
=
None
):
def
make_node
(
self
,
img
,
kern
,
output
,
desc
,
alpha
=
None
,
beta
=
None
):
...
@@ -833,17 +833,17 @@ class GpuDnnConvGradI(DnnBase, COp):
...
@@ -833,17 +833,17 @@ class GpuDnnConvGradI(DnnBase, COp):
if
self
.
inplace
:
if
self
.
inplace
:
self
.
destroy_map
=
{
0
:
[
2
]}
self
.
destroy_map
=
{
0
:
[
2
]}
# The small-workspace implementation is only available from
C
uDNN V4
# The small-workspace implementation is only available from
c
uDNN V4
# onward.
# onward.
if
version
()
<
(
4000
,
4000
):
if
version
()
<
(
4000
,
4000
):
if
self
.
algo
==
'fft_tiling'
:
if
self
.
algo
==
'fft_tiling'
:
raise
RuntimeError
(
"
C
uDNN's tiled-FFT convolution requires "
raise
RuntimeError
(
"
c
uDNN's tiled-FFT convolution requires "
"
C
uDNN v4 or more recent"
)
"
c
uDNN v4 or more recent"
)
if
version
()
<
(
5000
,
5000
):
if
version
()
<
(
5000
,
5000
):
if
self
.
algo
==
'winograd'
:
if
self
.
algo
==
'winograd'
:
raise
RuntimeError
(
"
C
uDNN's winograd convolution requires "
raise
RuntimeError
(
"
c
uDNN's winograd convolution requires "
"
C
uDNN v5 or more recent"
)
"
c
uDNN v5 or more recent"
)
assert
self
.
algo
in
[
'none'
,
'deterministic'
,
'fft'
,
'fft_tiling'
,
assert
self
.
algo
in
[
'none'
,
'deterministic'
,
'fft'
,
'fft_tiling'
,
'winograd'
,
'guess_once'
,
'guess_on_shape_change'
,
'winograd'
,
'guess_once'
,
'guess_on_shape_change'
,
...
@@ -996,11 +996,11 @@ class GpuDnnConv3dGradI(GpuDnnConvGradI):
...
@@ -996,11 +996,11 @@ class GpuDnnConv3dGradI(GpuDnnConvGradI):
assert
self
.
algo
in
good_algo
assert
self
.
algo
in
good_algo
if
version
()
<
(
5000
,
5000
):
if
version
()
<
(
5000
,
5000
):
if
self
.
algo
==
'fft_tiling'
:
if
self
.
algo
==
'fft_tiling'
:
raise
RuntimeError
(
"
C
uDNN 3d tiled-FFT convolution requires "
raise
RuntimeError
(
"
c
uDNN 3d tiled-FFT convolution requires "
"
C
uDNN v5 or more recent"
)
"
c
uDNN v5 or more recent"
)
elif
self
.
algo
==
'winograd'
:
elif
self
.
algo
==
'winograd'
:
raise
RuntimeError
(
"
C
uDNN 3d winograd convolution requires "
raise
RuntimeError
(
"
c
uDNN 3d winograd convolution requires "
"
C
uDNN v5 or more recent"
)
"
c
uDNN v5 or more recent"
)
def
grad
(
self
,
inp
,
grads
):
def
grad
(
self
,
inp
,
grads
):
kerns
,
top
,
output
,
desc
,
alpha
,
beta
=
inp
kerns
,
top
,
output
,
desc
,
alpha
,
beta
=
inp
...
@@ -1078,7 +1078,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
...
@@ -1078,7 +1078,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
*deprecated*, use parameter algo instead.
*deprecated*, use parameter algo instead.
algo : {'none', 'small', 'large', 'fft', 'guess_once', 'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
algo : {'none', 'small', 'large', 'fft', 'guess_once', 'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Convolution implementation to use. Some of its values may require certain
Convolution implementation to use. Some of its values may require certain
versions of
C
uDNN to be installed. Default is the value of
versions of
c
uDNN to be installed. Default is the value of
:attr:`config.dnn.conv.algo_fwd`.
:attr:`config.dnn.conv.algo_fwd`.
precision : {'as_input', 'float16', 'float32', 'float64'}
precision : {'as_input', 'float16', 'float32', 'float64'}
Description of the dtype in which the computation of the convolution
Description of the dtype in which the computation of the convolution
...
@@ -1364,13 +1364,13 @@ class GpuDnnPoolDesc(GpuOp):
...
@@ -1364,13 +1364,13 @@ class GpuDnnPoolDesc(GpuOp):
self
.
pad
=
pad
self
.
pad
=
pad
if
(
pad
[
0
]
!=
0
or
pad
[
1
]
!=
0
)
and
version
()
==
-
1
:
if
(
pad
[
0
]
!=
0
or
pad
[
1
]
!=
0
)
and
version
()
==
-
1
:
raise
RuntimeError
(
"
CuDNN pooling with padding requires C
uDNN v2"
)
raise
RuntimeError
(
"
cuDNN pooling with padding requires c
uDNN v2"
)
if
self
.
get_ndim
()
==
3
and
version
()
<
(
3000
,
3000
):
if
self
.
get_ndim
()
==
3
and
version
()
<
(
3000
,
3000
):
raise
RuntimeError
(
"
CuDNN 3d pooling requires C
uDNN v3"
)
raise
RuntimeError
(
"
cuDNN 3d pooling requires c
uDNN v3"
)
if
(
mode
==
'average_exc_pad'
and
max
(
pad
)
>
0
and
if
(
mode
==
'average_exc_pad'
and
max
(
pad
)
>
0
and
version
()
<
(
4004
,
4004
)):
version
()
<
(
4004
,
4004
)):
raise
RuntimeError
(
raise
RuntimeError
(
"
C
uDNN pooling mode 'average_exc_pad' requires at least v4"
)
"
c
uDNN pooling mode 'average_exc_pad' requires at least v4"
)
def
get_ndim
(
self
):
def
get_ndim
(
self
):
return
len
(
self
.
ws
)
return
len
(
self
.
ws
)
...
@@ -1382,7 +1382,7 @@ class GpuDnnPoolDesc(GpuOp):
...
@@ -1382,7 +1382,7 @@ class GpuDnnPoolDesc(GpuOp):
def
make_node
(
self
):
def
make_node
(
self
):
if
self
.
pad
!=
(
0
,
0
)
and
version
()
==
-
1
:
if
self
.
pad
!=
(
0
,
0
)
and
version
()
==
-
1
:
raise
RuntimeError
(
"
CuDNN pooling with padding requires C
uDNN v2"
)
raise
RuntimeError
(
"
cuDNN pooling with padding requires c
uDNN v2"
)
node
=
Apply
(
self
,
[],
node
=
Apply
(
self
,
[],
[
CDataType
(
"cudnnPoolingDescriptor_t"
,
[
CDataType
(
"cudnnPoolingDescriptor_t"
,
...
@@ -1982,7 +1982,7 @@ class GpuDnnSoftmaxBase(DnnBase):
...
@@ -1982,7 +1982,7 @@ class GpuDnnSoftmaxBase(DnnBase):
Always set this to 'bc01'.
Always set this to 'bc01'.
algo : {'fast', 'accurate', 'log'}
algo : {'fast', 'accurate', 'log'}
Indicating whether, respectively, computations should be optimized for
Indicating whether, respectively, computations should be optimized for
speed, for accuracy, or if
C
uDNN should rather compute the log-softmax instead.
speed, for accuracy, or if
c
uDNN should rather compute the log-softmax instead.
mode : {'instance', 'channel'}
mode : {'instance', 'channel'}
Indicating whether the softmax should be computed per image across 'c01'
Indicating whether the softmax should be computed per image across 'c01'
or per spatial location '01' per image across 'c'.
or per spatial location '01' per image across 'c'.
...
@@ -2003,7 +2003,7 @@ class GpuDnnSoftmaxBase(DnnBase):
...
@@ -2003,7 +2003,7 @@ class GpuDnnSoftmaxBase(DnnBase):
self
.
tensor_format
=
tensor_format
self
.
tensor_format
=
tensor_format
if
algo
==
'log'
and
version
()
<
(
3000
,
3000
):
if
algo
==
'log'
and
version
()
<
(
3000
,
3000
):
raise
RuntimeError
(
"
CuDNN log-softmax requires C
uDNN v3"
)
raise
RuntimeError
(
"
cuDNN log-softmax requires c
uDNN v3"
)
assert
(
algo
in
(
'fast'
,
'accurate'
,
'log'
))
assert
(
algo
in
(
'fast'
,
'accurate'
,
'log'
))
self
.
algo
=
algo
self
.
algo
=
algo
...
@@ -2525,7 +2525,7 @@ if True:
...
@@ -2525,7 +2525,7 @@ if True:
@register_opt
(
'cudnn'
)
@register_opt
(
'cudnn'
)
@local_optimizer
([
GpuElemwise
,
LogSoftmax
])
@local_optimizer
([
GpuElemwise
,
LogSoftmax
])
def
local_log_softmax_dnn
(
node
):
def
local_log_softmax_dnn
(
node
):
# The log-softmax implementation is only available starting at
C
uDNN V3
# The log-softmax implementation is only available starting at
c
uDNN V3
if
not
dnn_available
()
or
version
()
<
(
3000
,
3000
):
if
not
dnn_available
()
or
version
()
<
(
3000
,
3000
):
return
return
...
...
theano/sandbox/gpuarray/__init__.py
浏览文件 @
3ae18f61
...
@@ -68,15 +68,15 @@ def init_dev(dev, name=None):
...
@@ -68,15 +68,15 @@ def init_dev(dev, name=None):
warn
=
None
warn
=
None
cudnn_version
=
""
cudnn_version
=
""
if
dev
.
startswith
(
'cuda'
):
if
dev
.
startswith
(
'cuda'
):
cudnn_version
=
" (
C
uDNN not available)"
cudnn_version
=
" (
c
uDNN not available)"
try
:
try
:
cudnn_version
=
dnn
.
version
()
cudnn_version
=
dnn
.
version
()
# 5100 should not print warning with cudnn 5 final.
# 5100 should not print warning with cudnn 5 final.
if
cudnn_version
>
5100
:
if
cudnn_version
>
5100
:
warn
=
(
"Your
C
uDNN version is more recent than Theano."
warn
=
(
"Your
c
uDNN version is more recent than Theano."
" If you see problems, try updating Theano or"
" If you see problems, try updating Theano or"
" downgrading
C
uDNN to version 5."
)
" downgrading
c
uDNN to version 5."
)
cudnn_version
=
" (
C
uDNN version
%
s)"
%
cudnn_version
cudnn_version
=
" (
c
uDNN version
%
s)"
%
cudnn_version
except
Exception
:
except
Exception
:
cudnn_version
=
dnn
.
dnn_present
.
msg
cudnn_version
=
dnn
.
dnn_present
.
msg
print
(
"Mapped name
%
s to device
%
s:
%
s
%
s"
%
(
print
(
"Mapped name
%
s to device
%
s:
%
s
%
s"
%
(
...
...
theano/sandbox/gpuarray/dnn.py
浏览文件 @
3ae18f61
...
@@ -36,7 +36,7 @@ from .opt_util import alpha_merge, output_merge, inplace_allocempty
...
@@ -36,7 +36,7 @@ from .opt_util import alpha_merge, output_merge, inplace_allocempty
from
theano.configdefaults
import
SUPPORTED_DNN_CONV_ALGO_BWD_FILTER
from
theano.configdefaults
import
SUPPORTED_DNN_CONV_ALGO_BWD_FILTER
def
raise_no_cudnn
(
msg
=
"
C
uDNN is required for convolution and pooling"
):
def
raise_no_cudnn
(
msg
=
"
c
uDNN is required for convolution and pooling"
):
raise
RuntimeError
(
msg
)
raise
RuntimeError
(
msg
)
...
@@ -74,7 +74,7 @@ if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
...
@@ -74,7 +74,7 @@ if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
try_run
=
False
,
output
=
True
)
try_run
=
False
,
output
=
True
)
if
not
avail
:
if
not
avail
:
return
False
,
(
"cannot compile with
C
uDNN. "
return
False
,
(
"cannot compile with
c
uDNN. "
"We got this error:
\n
"
+
str
(
err
))
"We got this error:
\n
"
+
str
(
err
))
return
True
,
None
return
True
,
None
...
@@ -108,7 +108,7 @@ def dnn_present():
...
@@ -108,7 +108,7 @@ def dnn_present():
if
config
.
dnn
.
enabled
==
"True"
:
if
config
.
dnn
.
enabled
==
"True"
:
if
not
dnn_present
.
avail
:
if
not
dnn_present
.
avail
:
raise
RuntimeError
(
raise
RuntimeError
(
"You enabled
C
uDNN, but we aren't able to use it:
%
s"
%
"You enabled
c
uDNN, but we aren't able to use it:
%
s"
%
dnn_present
.
msg
)
dnn_present
.
msg
)
return
dnn_present
.
avail
return
dnn_present
.
avail
...
@@ -229,7 +229,7 @@ def version(raises=True):
...
@@ -229,7 +229,7 @@ def version(raises=True):
This also does a check that the header version matches the runtime version.
This also does a check that the header version matches the runtime version.
:raises: If True, raise an exception if
C
uDNN is not present or badly installed.
:raises: If True, raise an exception if
c
uDNN is not present or badly installed.
Otherwise, return -1.
Otherwise, return -1.
"""
"""
...
@@ -421,23 +421,23 @@ class GpuDnnConv(DnnBase):
...
@@ -421,23 +421,23 @@ class GpuDnnConv(DnnBase):
if
version
()
<
3000
:
if
version
()
<
3000
:
if
self
.
algo
==
'fft'
:
if
self
.
algo
==
'fft'
:
raise
RuntimeError
(
"
CuDNN FFT convolution requires C
uDNN v3"
)
raise
RuntimeError
(
"
cuDNN FFT convolution requires c
uDNN v3"
)
elif
self
.
algo
in
[
'guess_once'
,
'guess_on_shape_change'
]:
elif
self
.
algo
in
[
'guess_once'
,
'guess_on_shape_change'
]:
raise
RuntimeError
(
"
C
uDNN selection of convolution "
raise
RuntimeError
(
"
c
uDNN selection of convolution "
"implementation based on heuristics "
"implementation based on heuristics "
"requires
C
uDNN v3"
)
"requires
c
uDNN v3"
)
elif
self
.
algo
in
[
'time_once'
,
'time_on_shape_change'
]:
elif
self
.
algo
in
[
'time_once'
,
'time_on_shape_change'
]:
raise
RuntimeError
(
"
CuDNN convolution timing requires C
uDNN v3"
)
raise
RuntimeError
(
"
cuDNN convolution timing requires c
uDNN v3"
)
# The fft_tiling implementation is only available from
C
uDNN V4 onward
# The fft_tiling implementation is only available from
c
uDNN V4 onward
if
version
()
<
4000
:
if
version
()
<
4000
:
if
self
.
algo
==
'fft_tiling'
:
if
self
.
algo
==
'fft_tiling'
:
raise
RuntimeError
(
"
C
uDNN tiled-FFT convolution requires "
raise
RuntimeError
(
"
c
uDNN tiled-FFT convolution requires "
"
C
uDNN v4 or more recent"
)
"
c
uDNN v4 or more recent"
)
if
version
()
<
5000
and
self
.
algo
==
'winograd'
:
if
version
()
<
5000
and
self
.
algo
==
'winograd'
:
raise
RuntimeError
(
"
C
uDNN winograd convolution requires "
raise
RuntimeError
(
"
c
uDNN winograd convolution requires "
"
C
uDNN v5 or more recent"
)
"
c
uDNN v5 or more recent"
)
assert
self
.
algo
in
[
'none'
,
'small'
,
'large'
,
'fft'
,
'fft_tiling'
,
assert
self
.
algo
in
[
'none'
,
'small'
,
'large'
,
'fft'
,
'fft_tiling'
,
'winograd'
,
'guess_once'
,
'guess_on_shape_change'
,
'winograd'
,
'guess_once'
,
'guess_on_shape_change'
,
...
@@ -714,13 +714,13 @@ class GpuDnnConvGradI(DnnBase):
...
@@ -714,13 +714,13 @@ class GpuDnnConvGradI(DnnBase):
algo
=
config
.
dnn
.
conv
.
algo_bwd_data
algo
=
config
.
dnn
.
conv
.
algo_bwd_data
self
.
algo
=
algo
self
.
algo
=
algo
# The small-workspace implementation is only available from
C
uDNN V4
# The small-workspace implementation is only available from
c
uDNN V4
# onward.
# onward.
if
version
()
<
4000
and
self
.
algo
==
'fft_tiling'
:
if
version
()
<
4000
and
self
.
algo
==
'fft_tiling'
:
raise
RuntimeError
(
"
CuDNN's tiled-FFT convolution requires C
uDNN "
raise
RuntimeError
(
"
cuDNN's tiled-FFT convolution requires c
uDNN "
"v4 or more recent"
)
"v4 or more recent"
)
if
version
()
<
5000
and
self
.
algo
==
'winograd'
:
if
version
()
<
5000
and
self
.
algo
==
'winograd'
:
raise
RuntimeError
(
"
CuDNN's winograd convolution requires C
uDNN "
raise
RuntimeError
(
"
cuDNN's winograd convolution requires c
uDNN "
"v5 or more recent"
)
"v5 or more recent"
)
assert
self
.
algo
in
[
'none'
,
'deterministic'
,
'fft'
,
'fft_tiling'
,
assert
self
.
algo
in
[
'none'
,
'deterministic'
,
'fft'
,
'fft_tiling'
,
...
@@ -856,7 +856,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
...
@@ -856,7 +856,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
removed at any time without a deprecation period. You have been warned.
removed at any time without a deprecation period. You have been warned.
algo : {'none', 'small', 'large', 'fft', 'guess_once', 'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
algo : {'none', 'small', 'large', 'fft', 'guess_once', 'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Convolution implementation to use. Some of its values may
Convolution implementation to use. Some of its values may
require certain versions of
C
uDNN to be installed. Default is
require certain versions of
c
uDNN to be installed. Default is
the value of :attr:`config.dnn.conv.algo_fwd`.
the value of :attr:`config.dnn.conv.algo_fwd`.
precision : {'as_input', 'float16', 'float32', 'float64'}
precision : {'as_input', 'float16', 'float32', 'float64'}
Description of the dtype in which the computation of the convolution
Description of the dtype in which the computation of the convolution
...
@@ -1014,10 +1014,10 @@ class GpuDnnPoolDesc(Op):
...
@@ -1014,10 +1014,10 @@ class GpuDnnPoolDesc(Op):
self
.
pad
=
pad
self
.
pad
=
pad
if
self
.
get_ndim
()
==
3
and
version
()
<
3000
:
if
self
.
get_ndim
()
==
3
and
version
()
<
3000
:
raise
RuntimeError
(
"
C
uDNN 3d pooling requires v3"
)
raise
RuntimeError
(
"
c
uDNN 3d pooling requires v3"
)
if
mode
==
'average_exc_pad'
and
max
(
pad
)
>
0
and
version
()
<
4004
:
if
mode
==
'average_exc_pad'
and
max
(
pad
)
>
0
and
version
()
<
4004
:
raise
RuntimeError
(
raise
RuntimeError
(
"
C
uDNN pooling mode 'average_exc_pad' requires at least v4"
)
"
c
uDNN pooling mode 'average_exc_pad' requires at least v4"
)
def
get_ndim
(
self
):
def
get_ndim
(
self
):
return
len
(
self
.
ws
)
return
len
(
self
.
ws
)
...
@@ -1277,7 +1277,7 @@ class GpuDnnSoftmaxBase(DnnBase):
...
@@ -1277,7 +1277,7 @@ class GpuDnnSoftmaxBase(DnnBase):
----------
----------
algo : {'fast', 'accurate', 'log'}
algo : {'fast', 'accurate', 'log'}
Indicating whether, respectively, computations should be optimized for
Indicating whether, respectively, computations should be optimized for
speed, for accuracy, or if
C
uDNN should rather compute the log-softmax instead.
speed, for accuracy, or if
c
uDNN should rather compute the log-softmax instead.
mode : {'instance', 'channel'}
mode : {'instance', 'channel'}
Indicating whether the softmax should be computed per image across 'c01'
Indicating whether the softmax should be computed per image across 'c01'
or per spatial location '01' per image across 'c'.
or per spatial location '01' per image across 'c'.
...
@@ -1291,7 +1291,7 @@ class GpuDnnSoftmaxBase(DnnBase):
...
@@ -1291,7 +1291,7 @@ class GpuDnnSoftmaxBase(DnnBase):
assert
(
algo
in
(
'fast'
,
'accurate'
,
'log'
))
assert
(
algo
in
(
'fast'
,
'accurate'
,
'log'
))
if
algo
==
'log'
and
version
(
raises
=
False
)
<
3000
:
if
algo
==
'log'
and
version
(
raises
=
False
)
<
3000
:
raise
RuntimeError
(
"Need
C
uDNN v3 for log-softmax"
)
raise
RuntimeError
(
"Need
c
uDNN v3 for log-softmax"
)
self
.
algo
=
algo
self
.
algo
=
algo
assert
(
mode
in
(
'instance'
,
'channel'
))
assert
(
mode
in
(
'instance'
,
'channel'
))
...
@@ -1326,7 +1326,7 @@ class GpuDnnSoftmax(GpuDnnSoftmaxBase):
...
@@ -1326,7 +1326,7 @@ class GpuDnnSoftmax(GpuDnnSoftmaxBase):
algo : {'fast', 'accurate', 'log'}
algo : {'fast', 'accurate', 'log'}
Indicating whether, respectively, computations should be optimized for
Indicating whether, respectively, computations should be optimized for
speed, for accuracy, or if
C
uDNN should rather compute the log-softmax instead.
speed, for accuracy, or if
c
uDNN should rather compute the log-softmax instead.
mode : {'instance', 'channel'}
mode : {'instance', 'channel'}
Indicating whether the softmax should be computed per image across 'c01'
Indicating whether the softmax should be computed per image across 'c01'
or per spatial location '01' per image across 'c'.
or per spatial location '01' per image across 'c'.
...
@@ -1360,7 +1360,7 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
...
@@ -1360,7 +1360,7 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
----------
----------
algo
algo
'fast', 'accurate' or 'log' indicating whether, respectively,
'fast', 'accurate' or 'log' indicating whether, respectively,
computations should be optimized for speed, for accuracy, or if
C
uDNN
computations should be optimized for speed, for accuracy, or if
c
uDNN
should rather compute the gradient of the log-softmax instead.
should rather compute the gradient of the log-softmax instead.
mode
mode
'instance' or 'channel' indicating whether the softmax should
'instance' or 'channel' indicating whether the softmax should
...
@@ -1549,7 +1549,7 @@ def local_avg_pool_dnn_grad_stride(node, ctx_name):
...
@@ -1549,7 +1549,7 @@ def local_avg_pool_dnn_grad_stride(node, ctx_name):
cg
=
gpu_contiguous
(
out_grad
)
cg
=
gpu_contiguous
(
out_grad
)
# We reuse cg because
C
uDNN does not use the value of the `out`
# We reuse cg because
c
uDNN does not use the value of the `out`
# argument but still checks its shape for average pooling. This
# argument but still checks its shape for average pooling. This
# has been observed in v2 and v3 as far as I know.
# has been observed in v2 and v3 as far as I know.
return
GpuDnnPoolGrad
(
mode
=
mode
)(
gpu_contiguous
(
inp
),
cg
,
cg
,
ds
,
st
,
pad
)
return
GpuDnnPoolGrad
(
mode
=
mode
)(
gpu_contiguous
(
inp
),
cg
,
cg
,
ds
,
st
,
pad
)
...
@@ -1579,7 +1579,7 @@ def local_log_softmax_dnn(node):
...
@@ -1579,7 +1579,7 @@ def local_log_softmax_dnn(node):
len
(
node
.
inputs
[
0
]
.
clients
)
==
1
):
len
(
node
.
inputs
[
0
]
.
clients
)
==
1
):
if
version
(
raises
=
False
)
<
3000
:
if
version
(
raises
=
False
)
<
3000
:
# No log-softmax before cudnn v3
# No log-softmax before cudnn v3
raise_no_cudnn
(
"Need
C
uDNN v3 for LogSoftmax"
)
raise_no_cudnn
(
"Need
c
uDNN v3 for LogSoftmax"
)
softmax_node
=
node
.
inputs
[
0
]
.
owner
softmax_node
=
node
.
inputs
[
0
]
.
owner
new_softmax
=
GpuDnnSoftmax
(
'log'
,
softmax_node
.
op
.
mode
)
new_softmax
=
GpuDnnSoftmax
(
'log'
,
softmax_node
.
op
.
mode
)
return
[
new_softmax
(
softmax_node
.
inputs
[
0
])]
return
[
new_softmax
(
softmax_node
.
inputs
[
0
])]
...
@@ -1594,7 +1594,7 @@ def local_logsoftmax_to_dnn(node, ctx_name):
...
@@ -1594,7 +1594,7 @@ def local_logsoftmax_to_dnn(node, ctx_name):
return
return
if
not
dnn_available
(
ctx_name
)
or
version
(
raises
=
False
)
<
3000
:
if
not
dnn_available
(
ctx_name
)
or
version
(
raises
=
False
)
<
3000
:
# No log-softmax before cudnn v3
# No log-softmax before cudnn v3
raise_no_cudnn
(
"Need
C
uDNN v3 for LogSoftmax"
)
raise_no_cudnn
(
"Need
c
uDNN v3 for LogSoftmax"
)
inp
=
inp
.
dimshuffle
(
0
,
1
,
'x'
,
'x'
)
inp
=
inp
.
dimshuffle
(
0
,
1
,
'x'
,
'x'
)
inp
.
tag
.
context_name
=
ctx_name
inp
.
tag
.
context_name
=
ctx_name
...
@@ -1627,7 +1627,7 @@ gpu_seqopt.register("NoCuDNNRaise", NoCuDNNRaise(), 0, 'cudnn')
...
@@ -1627,7 +1627,7 @@ gpu_seqopt.register("NoCuDNNRaise", NoCuDNNRaise(), 0, 'cudnn')
@op_lifter
([
SoftmaxGrad
])
@op_lifter
([
SoftmaxGrad
])
def
local_softmax_dnn_grad
(
node
,
ctx_name
):
def
local_softmax_dnn_grad
(
node
,
ctx_name
):
if
not
dnn_available
(
ctx_name
):
if
not
dnn_available
(
ctx_name
):
raise_no_cudnn
(
"
C
uDNN needed for SoftmaxGrad"
)
raise_no_cudnn
(
"
c
uDNN needed for SoftmaxGrad"
)
ins
=
[]
ins
=
[]
for
n
in
node
.
inputs
:
for
n
in
node
.
inputs
:
n
=
as_gpuarray_variable
(
n
,
ctx_name
)
n
=
as_gpuarray_variable
(
n
,
ctx_name
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论