Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
d5373b57
提交
d5373b57
authored
7月 20, 2016
作者:
Frédéric Bastien
提交者:
GitHub
7月 20, 2016
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #4764 from abergeron/dnn_f16
Float16 convolutions
上级
cb6c5b9c
ed9b3cf2
显示空白字符变更
内嵌
并排
正在显示
3 个修改的文件
包含
18 行增加
和
44 行删除
+18
-44
configdefaults.py
theano/configdefaults.py
+3
-2
dnn.py
theano/gpuarray/dnn.py
+13
-42
basic.py
theano/scalar/basic.py
+2
-0
没有找到文件。
theano/configdefaults.py
浏览文件 @
d5373b57
...
@@ -345,8 +345,9 @@ AddConfigVar('dnn.conv.algo_bwd_filter',
...
@@ -345,8 +345,9 @@ AddConfigVar('dnn.conv.algo_bwd_filter',
AddConfigVar
(
'dnn.conv.precision'
,
AddConfigVar
(
'dnn.conv.precision'
,
"Default data precision to use for the computation in cuDNN "
"Default data precision to use for the computation in cuDNN "
"convolutions (defaults to the same dtype as the inputs of the "
"convolutions (defaults to the same dtype as the inputs of the "
"convolutions)."
,
"convolutions, or float32 if inputs are float16)."
,
EnumStr
(
'as_input'
,
'float16'
,
'float32'
,
'float64'
),
EnumStr
(
'as_input_f32'
,
'as_input'
,
'float16'
,
'float32'
,
'float64'
),
in_c_key
=
False
)
in_c_key
=
False
)
...
...
theano/gpuarray/dnn.py
浏览文件 @
d5373b57
...
@@ -393,6 +393,8 @@ _one = constant(numpy.asarray(1.0, dtype='float64'))
...
@@ -393,6 +393,8 @@ _one = constant(numpy.asarray(1.0, dtype='float64'))
def
ensure_dt
(
val
,
default
,
name
,
dtype
):
def
ensure_dt
(
val
,
default
,
name
,
dtype
):
if
dtype
==
'float16'
:
dtype
=
'float32'
if
val
is
None
:
if
val
is
None
:
val
=
default
.
clone
()
val
=
default
.
clone
()
if
not
isinstance
(
val
,
Variable
):
if
not
isinstance
(
val
,
Variable
):
...
@@ -422,7 +424,7 @@ class GpuDnnConv(DnnBase):
...
@@ -422,7 +424,7 @@ class GpuDnnConv(DnnBase):
Default is the value of :attr:`config.dnn.conv.algo_fwd`.
Default is the value of :attr:`config.dnn.conv.algo_fwd`.
"""
"""
_f16_ok
=
True
__props__
=
(
'algo'
,
'inplace'
)
__props__
=
(
'algo'
,
'inplace'
)
def
__init__
(
self
,
algo
=
None
,
inplace
=
False
):
def
__init__
(
self
,
algo
=
None
,
inplace
=
False
):
...
@@ -437,22 +439,6 @@ class GpuDnnConv(DnnBase):
...
@@ -437,22 +439,6 @@ class GpuDnnConv(DnnBase):
if
self
.
inplace
:
if
self
.
inplace
:
self
.
destroy_map
=
{
0
:
[
2
]}
self
.
destroy_map
=
{
0
:
[
2
]}
if
version
()
<
3000
:
if
self
.
algo
==
'fft'
:
raise
RuntimeError
(
"cuDNN FFT convolution requires cuDNN v3"
)
elif
self
.
algo
in
[
'guess_once'
,
'guess_on_shape_change'
]:
raise
RuntimeError
(
"cuDNN selection of convolution "
"implementation based on heuristics "
"requires cuDNN v3"
)
elif
self
.
algo
in
[
'time_once'
,
'time_on_shape_change'
]:
raise
RuntimeError
(
"cuDNN convolution timing requires cuDNN v3"
)
# The fft_tiling implementation is only available from cuDNN V4 onward
if
version
()
<
4000
:
if
self
.
algo
==
'fft_tiling'
:
raise
RuntimeError
(
"cuDNN tiled-FFT convolution requires "
"cuDNN v4 or more recent"
)
if
version
()
<
5000
and
self
.
algo
==
'winograd'
:
if
version
()
<
5000
and
self
.
algo
==
'winograd'
:
raise
RuntimeError
(
"cuDNN winograd convolution requires "
raise
RuntimeError
(
"cuDNN winograd convolution requires "
"cuDNN v5 or more recent"
)
"cuDNN v5 or more recent"
)
...
@@ -488,7 +474,6 @@ class GpuDnnConv(DnnBase):
...
@@ -488,7 +474,6 @@ class GpuDnnConv(DnnBase):
elif
self
.
algo
==
'fft'
:
elif
self
.
algo
==
'fft'
:
alg
=
'CUDNN_CONVOLUTION_FWD_ALGO_FFT'
alg
=
'CUDNN_CONVOLUTION_FWD_ALGO_FFT'
elif
self
.
algo
==
'fft_tiling'
:
elif
self
.
algo
==
'fft_tiling'
:
# need v4
alg
=
'CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING'
alg
=
'CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING'
elif
self
.
algo
==
'winograd'
:
elif
self
.
algo
==
'winograd'
:
# need v5
# need v5
...
@@ -605,7 +590,7 @@ class GpuDnnConvGradW(DnnBase):
...
@@ -605,7 +590,7 @@ class GpuDnnConvGradW(DnnBase):
Default is the value of :attr:`config.dnn.conv.algo_bwd_filter`.
Default is the value of :attr:`config.dnn.conv.algo_bwd_filter`.
"""
"""
_f16_ok
=
True
__props__
=
(
'algo'
,
'inplace'
)
__props__
=
(
'algo'
,
'inplace'
)
def
__init__
(
self
,
inplace
=
False
,
algo
=
None
):
def
__init__
(
self
,
inplace
=
False
,
algo
=
None
):
...
@@ -650,9 +635,6 @@ class GpuDnnConvGradW(DnnBase):
...
@@ -650,9 +635,6 @@ class GpuDnnConvGradW(DnnBase):
if
self
.
inplace
:
if
self
.
inplace
:
defs
.
append
((
'CONV_INPLACE'
,
'1'
))
defs
.
append
((
'CONV_INPLACE'
,
'1'
))
if
version
()
<
3000
:
alg
=
'0'
else
:
alg
=
'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0'
alg
=
'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0'
if
self
.
algo
==
'none'
:
if
self
.
algo
==
'none'
:
alg
=
'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0'
alg
=
'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0'
...
@@ -720,7 +702,6 @@ gpu_dnn_conv_gradW.cache = {}
...
@@ -720,7 +702,6 @@ gpu_dnn_conv_gradW.cache = {}
class
GpuDnnConvGradI
(
DnnBase
):
class
GpuDnnConvGradI
(
DnnBase
):
"""
"""
The convolution gradient with respect to the inputs.
The convolution gradient with respect to the inputs.
...
@@ -735,7 +716,7 @@ class GpuDnnConvGradI(DnnBase):
...
@@ -735,7 +716,7 @@ class GpuDnnConvGradI(DnnBase):
Default is the value of :attr:`config.dnn.conv.algo_bwd_data`.
Default is the value of :attr:`config.dnn.conv.algo_bwd_data`.
"""
"""
_f16_ok
=
True
__props__
=
(
'algo'
,
'inplace'
,)
__props__
=
(
'algo'
,
'inplace'
,)
def
__init__
(
self
,
inplace
=
False
,
algo
=
None
):
def
__init__
(
self
,
inplace
=
False
,
algo
=
None
):
...
@@ -748,11 +729,6 @@ class GpuDnnConvGradI(DnnBase):
...
@@ -748,11 +729,6 @@ class GpuDnnConvGradI(DnnBase):
algo
=
config
.
dnn
.
conv
.
algo_bwd_data
algo
=
config
.
dnn
.
conv
.
algo_bwd_data
self
.
algo
=
algo
self
.
algo
=
algo
# The small-workspace implementation is only available from cuDNN V4
# onward.
if
version
()
<
4000
and
self
.
algo
==
'fft_tiling'
:
raise
RuntimeError
(
"cuDNN's tiled-FFT convolution requires cuDNN "
"v4 or more recent"
)
if
version
()
<
5000
and
self
.
algo
==
'winograd'
:
if
version
()
<
5000
and
self
.
algo
==
'winograd'
:
raise
RuntimeError
(
"cuDNN's winograd convolution requires cuDNN "
raise
RuntimeError
(
"cuDNN's winograd convolution requires cuDNN "
"v5 or more recent"
)
"v5 or more recent"
)
...
@@ -791,9 +767,6 @@ class GpuDnnConvGradI(DnnBase):
...
@@ -791,9 +767,6 @@ class GpuDnnConvGradI(DnnBase):
if
self
.
inplace
:
if
self
.
inplace
:
defs
.
append
((
'CONV_INPLACE'
,
'1'
))
defs
.
append
((
'CONV_INPLACE'
,
'1'
))
if
version
()
<
3000
:
alg
=
'0'
else
:
alg
=
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0'
alg
=
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0'
if
self
.
algo
==
'none'
:
if
self
.
algo
==
'none'
:
alg
=
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0'
alg
=
'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0'
...
@@ -915,8 +888,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
...
@@ -915,8 +888,12 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
# Establish dtype in which to perform the computation of the convolution
# Establish dtype in which to perform the computation of the convolution
if
precision
is
None
:
if
precision
is
None
:
precision
=
theano
.
config
.
dnn
.
conv
.
precision
precision
=
theano
.
config
.
dnn
.
conv
.
precision
if
precision
==
'as_input'
:
if
precision
==
'as_input'
or
precision
==
'as_input_f32'
:
precision
=
theano
.
scalar
.
upcast
(
img
.
dtype
,
kerns
.
dtype
)
nprec
=
theano
.
scalar
.
upcast
(
img
.
dtype
,
kerns
.
dtype
)
if
nprec
==
'float16'
and
precision
==
'as_input_f32'
:
precision
=
'float32'
else
:
precision
=
nprec
if
workmem
is
not
None
:
if
workmem
is
not
None
:
if
algo
is
not
None
:
if
algo
is
not
None
:
...
@@ -1059,12 +1036,6 @@ class GpuDnnPoolDesc(Op):
...
@@ -1059,12 +1036,6 @@ class GpuDnnPoolDesc(Op):
self
.
stride
=
stride
self
.
stride
=
stride
self
.
pad
=
pad
self
.
pad
=
pad
if
self
.
get_ndim
()
==
3
and
version
()
<
3000
:
raise
RuntimeError
(
"cuDNN 3d pooling requires v3"
)
if
mode
==
'average_exc_pad'
and
max
(
pad
)
>
0
and
version
()
<
4004
:
raise
RuntimeError
(
"cuDNN pooling mode 'average_exc_pad' requires at least v4"
)
def
get_ndim
(
self
):
def
get_ndim
(
self
):
return
len
(
self
.
ws
)
return
len
(
self
.
ws
)
...
@@ -1149,7 +1120,7 @@ class GpuDnnPool(DnnBase):
...
@@ -1149,7 +1120,7 @@ class GpuDnnPool(DnnBase):
(padX, padY) or (padX, padY, padZ)
(padX, padY) or (padX, padY, padZ)
"""
"""
_f16_ok
=
True
__props__
=
(
'mode'
,)
__props__
=
(
'mode'
,)
def
__init__
(
self
,
mode
=
'max'
):
def
__init__
(
self
,
mode
=
'max'
):
...
@@ -1234,7 +1205,7 @@ class GpuDnnPoolGrad(DnnBase):
...
@@ -1234,7 +1205,7 @@ class GpuDnnPoolGrad(DnnBase):
(padX, padY) or (padX, padY, padZ)
(padX, padY) or (padX, padY, padZ)
"""
"""
_f16_ok
=
True
__props__
=
(
'mode'
,)
__props__
=
(
'mode'
,)
def
__init__
(
self
,
mode
=
'max'
):
def
__init__
(
self
,
mode
=
'max'
):
...
...
theano/scalar/basic.py
浏览文件 @
d5373b57
...
@@ -308,6 +308,8 @@ class Scalar(Type):
...
@@ -308,6 +308,8 @@ class Scalar(Type):
"""
%
locals
()
"""
%
locals
()
def
c_extract
(
self
,
name
,
sub
,
check_input
=
True
):
def
c_extract
(
self
,
name
,
sub
,
check_input
=
True
):
if
self
.
dtype
==
'float16'
:
raise
NotImplementedError
(
'float16'
)
specs
=
self
.
dtype_specs
()
specs
=
self
.
dtype_specs
()
if
(
check_input
):
if
(
check_input
):
pre
=
"""
pre
=
"""
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论