Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
ea927aef
提交
ea927aef
authored
2月 03, 2017
作者:
Frédéric Bastien
提交者:
GitHub
2月 03, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #5474 from nouiz/abstract_bn
Follow up from abstract bn PR
上级
466cdaa8
475c1335
显示空白字符变更
内嵌
并排
正在显示
10 个修改的文件
包含
227 行增加
和
88 行删除
+227
-88
dnn.py
theano/gpuarray/dnn.py
+12
-33
opt.py
theano/gpuarray/opt.py
+56
-3
config.py
theano/gpuarray/tests/config.py
+1
-0
test_dnn.py
theano/gpuarray/tests/test_dnn.py
+45
-3
dnn.py
theano/sandbox/cuda/dnn.py
+16
-0
test_dnn.py
theano/sandbox/cuda/tests/test_dnn.py
+38
-36
basic.py
theano/scalar/basic.py
+8
-0
bn.py
theano/tensor/nnet/bn.py
+46
-9
test_bn.py
theano/tensor/nnet/tests/test_bn.py
+1
-1
type.py
theano/tensor/type.py
+4
-3
没有找到文件。
theano/gpuarray/dnn.py
浏览文件 @
ea927aef
...
@@ -28,7 +28,6 @@ from theano.tensor.nnet.abstract_conv import (AbstractConv2d,
...
@@ -28,7 +28,6 @@ from theano.tensor.nnet.abstract_conv import (AbstractConv2d,
assert_conv_shape
)
assert_conv_shape
)
from
theano.tensor.signal.pool
import
(
from
theano.tensor.signal.pool
import
(
Pool
,
MaxPoolGrad
,
AveragePoolGrad
)
Pool
,
MaxPoolGrad
,
AveragePoolGrad
)
from
theano.tensor.nnet
import
bn
from
.
import
pygpu
from
.
import
pygpu
from
.type
import
(
get_context
,
gpu_context_type
,
list_contexts
,
from
.type
import
(
get_context
,
gpu_context_type
,
list_contexts
,
GpuArraySharedVariable
)
GpuArraySharedVariable
)
...
@@ -87,10 +86,6 @@ def _make_handle(ctx):
...
@@ -87,10 +86,6 @@ def _make_handle(ctx):
return
handle
return
handle
def
raise_no_cudnn
(
msg
=
"cuDNN is required for convolution and pooling"
):
raise
RuntimeError
(
msg
)
def
_dnn_check_compile
():
def
_dnn_check_compile
():
preambule
=
"""
preambule
=
"""
#include <stdio.h>
#include <stdio.h>
...
@@ -2733,7 +2728,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
...
@@ -2733,7 +2728,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
inp2
=
inputs
[
1
]
inp2
=
inputs
[
1
]
if
not
dnn_available
(
inp1
.
type
.
context_name
):
if
not
dnn_available
(
inp1
.
type
.
context_name
):
r
aise_no_cudnn
()
r
eturn
if
op
.
filter_flip
:
if
op
.
filter_flip
:
conv_mode
=
'conv'
conv_mode
=
'conv'
...
@@ -2776,7 +2771,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
...
@@ -2776,7 +2771,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
inp2
=
inputs
[
1
]
inp2
=
inputs
[
1
]
if
not
dnn_available
(
inp1
.
type
.
context_name
):
if
not
dnn_available
(
inp1
.
type
.
context_name
):
r
aise_no_cudnn
()
r
eturn
if
op
.
filter_flip
:
if
op
.
filter_flip
:
conv_mode
=
'conv'
conv_mode
=
'conv'
...
@@ -2902,7 +2897,7 @@ def local_dnn_convi_output_merge(node, *inputs):
...
@@ -2902,7 +2897,7 @@ def local_dnn_convi_output_merge(node, *inputs):
def
local_gpua_pool_dnn_alternative
(
op
,
ctx_name
,
inputs
,
outputs
):
def
local_gpua_pool_dnn_alternative
(
op
,
ctx_name
,
inputs
,
outputs
):
if
not
dnn_available
(
ctx_name
):
if
not
dnn_available
(
ctx_name
):
r
aise_no_cudnn
()
r
eturn
if
not
op
.
ignore_border
:
if
not
op
.
ignore_border
:
return
return
img
,
ws
,
stride
,
pad
=
inputs
img
,
ws
,
stride
,
pad
=
inputs
...
@@ -2931,7 +2926,7 @@ pool_db2.register("local_gpua_pool_dnn_alternative",
...
@@ -2931,7 +2926,7 @@ pool_db2.register("local_gpua_pool_dnn_alternative",
def
local_gpua_pool_dnn_grad_stride
(
op
,
ctx_name
,
inputs
,
outputs
):
def
local_gpua_pool_dnn_grad_stride
(
op
,
ctx_name
,
inputs
,
outputs
):
if
not
dnn_available
(
ctx_name
):
if
not
dnn_available
(
ctx_name
):
r
aise_no_cudnn
()
r
eturn
if
not
op
.
ignore_border
:
if
not
op
.
ignore_border
:
return
return
inp
,
out
,
out_grad
,
ws
,
stride
,
pad
=
inputs
inp
,
out
,
out_grad
,
ws
,
stride
,
pad
=
inputs
...
@@ -2975,7 +2970,7 @@ pool_db2.register("local_gpua_pool_dnn_grad_stride",
...
@@ -2975,7 +2970,7 @@ pool_db2.register("local_gpua_pool_dnn_grad_stride",
def
local_gpua_avg_pool_dnn_grad_stride
(
op
,
ctx_name
,
inputs
,
outputs
):
def
local_gpua_avg_pool_dnn_grad_stride
(
op
,
ctx_name
,
inputs
,
outputs
):
if
not
dnn_available
(
ctx_name
):
if
not
dnn_available
(
ctx_name
):
r
aise_no_cudnn
()
r
eturn
if
not
op
.
ignore_border
:
if
not
op
.
ignore_border
:
return
return
inp
,
out_grad
,
ws
,
stride
,
pad
=
inputs
inp
,
out_grad
,
ws
,
stride
,
pad
=
inputs
...
@@ -3018,7 +3013,7 @@ pool_db2.register("local_gpua_avg_pool_dnn_grad_stride",
...
@@ -3018,7 +3013,7 @@ pool_db2.register("local_gpua_avg_pool_dnn_grad_stride",
def
local_softmax_dnn
(
node
):
def
local_softmax_dnn
(
node
):
if
isinstance
(
node
.
op
,
GpuSoftmax
):
if
isinstance
(
node
.
op
,
GpuSoftmax
):
if
not
dnn_available
(
node
.
outputs
[
0
]
.
type
.
context_name
):
if
not
dnn_available
(
node
.
outputs
[
0
]
.
type
.
context_name
):
r
aise_no_cudnn
()
r
eturn
ins
=
node
.
inputs
[
0
]
.
dimshuffle
(
0
,
1
,
'x'
,
'x'
)
ins
=
node
.
inputs
[
0
]
.
dimshuffle
(
0
,
1
,
'x'
,
'x'
)
ins
=
gpu_contiguous
(
ins
)
ins
=
gpu_contiguous
(
ins
)
out
=
GpuDnnSoftmax
(
'accurate'
,
'channel'
)(
ins
)
out
=
GpuDnnSoftmax
(
'accurate'
,
'channel'
)(
ins
)
...
@@ -3035,9 +3030,6 @@ def local_log_softmax_dnn(node):
...
@@ -3035,9 +3030,6 @@ def local_log_softmax_dnn(node):
node
.
inputs
[
0
]
.
owner
and
node
.
inputs
[
0
]
.
owner
and
isinstance
(
node
.
inputs
[
0
]
.
owner
.
op
,
GpuDnnSoftmax
)
and
isinstance
(
node
.
inputs
[
0
]
.
owner
.
op
,
GpuDnnSoftmax
)
and
len
(
node
.
inputs
[
0
]
.
clients
)
==
1
):
len
(
node
.
inputs
[
0
]
.
clients
)
==
1
):
if
version
(
raises
=
False
)
<
3000
:
# No log-softmax before cudnn v3
raise_no_cudnn
(
"Need cuDNN v3 for LogSoftmax"
)
softmax_node
=
node
.
inputs
[
0
]
.
owner
softmax_node
=
node
.
inputs
[
0
]
.
owner
new_softmax
=
GpuDnnSoftmax
(
'log'
,
softmax_node
.
op
.
mode
)
new_softmax
=
GpuDnnSoftmax
(
'log'
,
softmax_node
.
op
.
mode
)
return
[
new_softmax
(
softmax_node
.
inputs
[
0
])]
return
[
new_softmax
(
softmax_node
.
inputs
[
0
])]
...
@@ -3051,9 +3043,8 @@ def local_gpua_logsoftmax_to_dnn(op, ctx_name, inputs, outputs):
...
@@ -3051,9 +3043,8 @@ def local_gpua_logsoftmax_to_dnn(op, ctx_name, inputs, outputs):
inp
=
inputs
[
0
]
inp
=
inputs
[
0
]
if
inp
.
ndim
!=
2
:
if
inp
.
ndim
!=
2
:
return
return
if
not
dnn_available
(
ctx_name
)
or
version
(
raises
=
False
)
<
3000
:
if
not
dnn_available
(
ctx_name
):
# No log-softmax before cudnn v3
return
raise_no_cudnn
(
"Need cuDNN v3 for LogSoftmax"
)
inp
=
inp
.
dimshuffle
(
0
,
1
,
'x'
,
'x'
)
inp
=
inp
.
dimshuffle
(
0
,
1
,
'x'
,
'x'
)
inp
.
tag
.
context_name
=
ctx_name
inp
.
tag
.
context_name
=
ctx_name
...
@@ -3087,7 +3078,7 @@ gpu_seqopt.register("NoCuDNNRaise", NoCuDNNRaise(), 0, 'cudnn')
...
@@ -3087,7 +3078,7 @@ gpu_seqopt.register("NoCuDNNRaise", NoCuDNNRaise(), 0, 'cudnn')
@register_opt2
([
SoftmaxGrad
],
'cudnn'
,
'fast_compile'
)
@register_opt2
([
SoftmaxGrad
],
'cudnn'
,
'fast_compile'
)
def
local_gpua_softmax_dnn_grad
(
op
,
ctx_name
,
inputs
,
outputs
):
def
local_gpua_softmax_dnn_grad
(
op
,
ctx_name
,
inputs
,
outputs
):
if
not
dnn_available
(
ctx_name
):
if
not
dnn_available
(
ctx_name
):
r
aise_no_cudnn
(
"cuDNN needed for SoftmaxGrad"
)
r
eturn
ins
=
[]
ins
=
[]
for
n
in
inputs
:
for
n
in
inputs
:
n
=
as_gpuarray_variable
(
n
,
ctx_name
)
n
=
as_gpuarray_variable
(
n
,
ctx_name
)
...
@@ -3100,9 +3091,6 @@ def local_gpua_softmax_dnn_grad(op, ctx_name, inputs, outputs):
...
@@ -3100,9 +3091,6 @@ def local_gpua_softmax_dnn_grad(op, ctx_name, inputs, outputs):
return
[
out
.
dimshuffle
(
0
,
2
)]
return
[
out
.
dimshuffle
(
0
,
2
)]
@register_opt
(
'cudnn'
,
'fast_compile'
)
@op_lifter
([
bn
.
AbstractBatchNormTrain
])
@register_opt2
([
bn
.
AbstractBatchNormTrain
],
'cudnn'
,
'fast_compile'
)
def
local_abstract_batch_norm_train_cudnn
(
op
,
ctx_name
,
inputs
,
outputs
):
def
local_abstract_batch_norm_train_cudnn
(
op
,
ctx_name
,
inputs
,
outputs
):
x
,
scale
,
bias
,
epsilon
,
running_average_factor
=
inputs
[:
5
]
x
,
scale
,
bias
,
epsilon
,
running_average_factor
=
inputs
[:
5
]
running_mean
=
inputs
[
5
]
if
len
(
inputs
)
>
5
else
None
running_mean
=
inputs
[
5
]
if
len
(
inputs
)
>
5
else
None
...
@@ -3130,8 +3118,7 @@ def local_abstract_batch_norm_train_cudnn(op, ctx_name, inputs, outputs):
...
@@ -3130,8 +3118,7 @@ def local_abstract_batch_norm_train_cudnn(op, ctx_name, inputs, outputs):
ctx
=
infer_context_name
(
*
inputs
)
ctx
=
infer_context_name
(
*
inputs
)
if
not
dnn_available
(
ctx
):
if
not
dnn_available
(
ctx
):
# TODO should this raise_no_cudnn?
return
return
None
x
=
as_gpuarray_variable
(
x
,
context_name
=
ctx
)
x
=
as_gpuarray_variable
(
x
,
context_name
=
ctx
)
scale
=
as_gpuarray_variable
(
scale
,
context_name
=
ctx
)
scale
=
as_gpuarray_variable
(
scale
,
context_name
=
ctx
)
bias
=
as_gpuarray_variable
(
bias
,
context_name
=
ctx
)
bias
=
as_gpuarray_variable
(
bias
,
context_name
=
ctx
)
...
@@ -3186,9 +3173,6 @@ def local_batch_norm_inference_inplace(node):
...
@@ -3186,9 +3173,6 @@ def local_batch_norm_inference_inplace(node):
return
[
GpuDnnBatchNormInference
(
mode
=
node
.
op
.
mode
,
inplace
=
True
)(
*
node
.
inputs
)]
return
[
GpuDnnBatchNormInference
(
mode
=
node
.
op
.
mode
,
inplace
=
True
)(
*
node
.
inputs
)]
@register_opt
(
'cudnn'
,
'fast_compile'
)
@op_lifter
([
bn
.
AbstractBatchNormTrainGrad
])
@register_opt2
([
bn
.
AbstractBatchNormTrainGrad
],
'cudnn'
,
'fast_compile'
)
def
local_abstract_batch_norm_train_grad_cudnn
(
op
,
ctx_name
,
inputs
,
outputs
):
def
local_abstract_batch_norm_train_grad_cudnn
(
op
,
ctx_name
,
inputs
,
outputs
):
x
,
dy
,
scale
,
x_mean
,
x_invstd
,
epsilon
=
inputs
x
,
dy
,
scale
,
x_mean
,
x_invstd
,
epsilon
=
inputs
...
@@ -3234,8 +3218,7 @@ def local_abstract_batch_norm_train_grad_cudnn(op, ctx_name, inputs, outputs):
...
@@ -3234,8 +3218,7 @@ def local_abstract_batch_norm_train_grad_cudnn(op, ctx_name, inputs, outputs):
ctx
=
infer_context_name
(
*
inputs
)
ctx
=
infer_context_name
(
*
inputs
)
if
not
dnn_available
(
ctx
):
if
not
dnn_available
(
ctx
):
# TODO should this raise_no_cudnn?
return
return
None
x
=
as_gpuarray_variable
(
x
,
context_name
=
ctx
)
x
=
as_gpuarray_variable
(
x
,
context_name
=
ctx
)
dy
=
as_gpuarray_variable
(
dy
,
context_name
=
ctx
)
dy
=
as_gpuarray_variable
(
dy
,
context_name
=
ctx
)
scale
=
as_gpuarray_variable
(
scale
,
context_name
=
ctx
)
scale
=
as_gpuarray_variable
(
scale
,
context_name
=
ctx
)
...
@@ -3257,9 +3240,6 @@ def local_abstract_batch_norm_train_grad_cudnn(op, ctx_name, inputs, outputs):
...
@@ -3257,9 +3240,6 @@ def local_abstract_batch_norm_train_grad_cudnn(op, ctx_name, inputs, outputs):
return
[
g_wrt_inputs
,
g_wrt_scale
,
g_wrt_bias
]
return
[
g_wrt_inputs
,
g_wrt_scale
,
g_wrt_bias
]
@register_opt
(
'cudnn'
,
'fast_compile'
)
@op_lifter
([
bn
.
AbstractBatchNormInference
])
@register_opt2
([
bn
.
AbstractBatchNormInference
],
'cudnn'
,
'fast_compile'
)
def
local_abstract_batch_norm_inference_cudnn
(
op
,
ctx_name
,
inputs
,
outputs
):
def
local_abstract_batch_norm_inference_cudnn
(
op
,
ctx_name
,
inputs
,
outputs
):
x
,
scale
,
bias
,
estimated_mean
,
estimated_variance
,
epsilon
=
inputs
x
,
scale
,
bias
,
estimated_mean
,
estimated_variance
,
epsilon
=
inputs
...
@@ -3280,8 +3260,7 @@ def local_abstract_batch_norm_inference_cudnn(op, ctx_name, inputs, outputs):
...
@@ -3280,8 +3260,7 @@ def local_abstract_batch_norm_inference_cudnn(op, ctx_name, inputs, outputs):
ctx
=
infer_context_name
(
*
inputs
)
ctx
=
infer_context_name
(
*
inputs
)
if
not
dnn_available
(
ctx
):
if
not
dnn_available
(
ctx
):
# TODO should this raise_no_cudnn?
return
return
None
x
=
as_gpuarray_variable
(
x
,
context_name
=
ctx
)
x
=
as_gpuarray_variable
(
x
,
context_name
=
ctx
)
scale
=
as_gpuarray_variable
(
scale
,
context_name
=
ctx
)
scale
=
as_gpuarray_variable
(
scale
,
context_name
=
ctx
)
bias
=
as_gpuarray_variable
(
bias
,
context_name
=
ctx
)
bias
=
as_gpuarray_variable
(
bias
,
context_name
=
ctx
)
...
...
theano/gpuarray/opt.py
浏览文件 @
ea927aef
...
@@ -22,6 +22,7 @@ from theano.scalar.basic import Scalar, Pow, Cast
...
@@ -22,6 +22,7 @@ from theano.scalar.basic import Scalar, Pow, Cast
from
theano.scalar.basic_scipy
import
Erfinv
,
Erfcinv
from
theano.scalar.basic_scipy
import
Erfinv
,
Erfcinv
from
theano.scan_module
import
scan_utils
,
scan_op
,
scan_opt
from
theano.scan_module
import
scan_utils
,
scan_op
,
scan_opt
from
theano.tensor.nnet
import
bn
from
theano.tensor.nnet.conv
import
ConvOp
from
theano.tensor.nnet.conv
import
ConvOp
from
theano.tensor.nnet.blocksparse
import
SparseBlockGemv
,
SparseBlockOuter
from
theano.tensor.nnet.blocksparse
import
SparseBlockGemv
,
SparseBlockOuter
from
theano.tensor.nnet.abstract_conv
import
(
BaseAbstractConv
,
from
theano.tensor.nnet.abstract_conv
import
(
BaseAbstractConv
,
...
@@ -1964,9 +1965,8 @@ abstractconv_groupopt = theano.gof.optdb.LocalGroupDB()
...
@@ -1964,9 +1965,8 @@ abstractconv_groupopt = theano.gof.optdb.LocalGroupDB()
abstractconv_groupopt
.
__name__
=
"gpuarray_abstractconv_opts"
abstractconv_groupopt
.
__name__
=
"gpuarray_abstractconv_opts"
register_opt
(
'fast_compile'
)(
abstractconv_groupopt
)
register_opt
(
'fast_compile'
)(
abstractconv_groupopt
)
# cuDNN is first, but only registered if cuDNN is available.
# We import these opts here instead of at the top of this file
# (we import these opts here instead of at the top of this file
# to avoid a circular dependency problem with dnn
# to avoid a circular dependency problem with dnn)
from
.dnn
import
(
local_abstractconv_cudnn
,
local_abstractconv_gw_cudnn
,
from
.dnn
import
(
local_abstractconv_cudnn
,
local_abstractconv_gw_cudnn
,
local_abstractconv_gi_cudnn
)
# noqa: 402
local_abstractconv_gi_cudnn
)
# noqa: 402
abstractconv_groupopt
.
register
(
'local_abstractconv_dnn'
,
abstractconv_groupopt
.
register
(
'local_abstractconv_dnn'
,
...
@@ -2005,3 +2005,56 @@ abstractconv_groupopt.register('local_abstractconv3d_gradinputs',
...
@@ -2005,3 +2005,56 @@ abstractconv_groupopt.register('local_abstractconv3d_gradinputs',
local_abstractconv3d_gradinputs_gemm
,
30
,
local_abstractconv3d_gradinputs_gemm
,
30
,
'conv_gemm'
,
'conv_gemm'
,
'gpuarray'
,
'fast_compile'
,
'fast_run'
)
'gpuarray'
,
'fast_compile'
,
'fast_run'
)
# Register cuDNN batch normalization implementation
# We import these opts here instead of at the top of this file
# to avoid a circular dependency problem with dnn
from
.dnn
import
(
local_abstract_batch_norm_train_cudnn
,
local_abstract_batch_norm_train_grad_cudnn
,
local_abstract_batch_norm_inference_cudnn
)
# noqa: 402
abstract_batch_norm_groupopt
=
theano
.
gof
.
optdb
.
LocalGroupDB
()
abstract_batch_norm_groupopt
.
__name__
=
"gpuarray_batchnorm_opts"
register_opt
(
'fast_compile'
)(
abstract_batch_norm_groupopt
)
abstract_batch_norm_db
=
LocalGroupDB
()
abstract_batch_norm_db2
=
LocalGroupDB
(
local_opt
=
theano
.
gof
.
opt
.
GraphToGPULocalOptGroup
)
abstract_batch_norm_db2
.
__name__
=
"abstract_batch_norm_db2"
register_opt
(
'fast_compile'
,
name
=
'abstract_batch_norm_db'
)(
abstract_batch_norm_db
)
register_opt2
([
bn
.
AbstractBatchNormTrain
,
bn
.
AbstractBatchNormTrainGrad
,
bn
.
AbstractBatchNormInference
],
'fast_compile'
,
name
=
'abstract_batch_norm_db2'
)(
abstract_batch_norm_db2
)
for
op
,
fct
,
cpu
in
[(
bn
.
AbstractBatchNormTrain
,
local_abstract_batch_norm_train_cudnn
,
bn
.
local_abstract_batch_norm_train
),
(
bn
.
AbstractBatchNormTrainGrad
,
local_abstract_batch_norm_train_grad_cudnn
,
bn
.
local_abstract_batch_norm_train_grad
),
(
bn
.
AbstractBatchNormInference
,
local_abstract_batch_norm_inference_cudnn
,
bn
.
local_abstract_batch_norm_inference
)]:
lifter
=
op_lifter
([
op
])(
fct
)
abstract_batch_norm_db
.
register
(
fct
.
__name__
,
lifter
,
'gpuarray'
,
'fast_compile'
,
'fast_run'
,
'cudnn'
,
'batchnorm_dnn'
,
position
=
1
)
abstract_batch_norm_db2
.
register
(
fct
.
__name__
,
local_optimizer
([
op
])(
fct
),
'gpuarray'
,
'fast_compile'
,
'fast_run'
,
'cudnn'
,
'batchnorm_dnn'
,
position
=
1
)
# cpu is a normal optimization. We can't register it in
# GraphToGPU. So for now, only add it to the slower EQ phase. If
# there is no cuDNN, we still want to move it to the GPU now with
# a Theano graph so to have this graph on the GPU.
abstract_batch_norm_db
.
register
(
cpu
.
__name__
,
cpu
,
'gpuarray'
,
'fast_compile'
,
'fast_run'
,
position
=
'last'
)
theano/gpuarray/tests/config.py
浏览文件 @
ea927aef
...
@@ -22,6 +22,7 @@ if theano.config.mode == 'FAST_COMPILE':
...
@@ -22,6 +22,7 @@ if theano.config.mode == 'FAST_COMPILE':
else
:
else
:
mode_with_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
including
(
'gpuarray'
)
.
excluding
(
'gpu'
)
mode_with_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
including
(
'gpuarray'
)
.
excluding
(
'gpu'
)
mode_without_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
excluding
(
'gpuarray'
)
mode_without_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
excluding
(
'gpuarray'
)
mode_without_gpu
.
check_py_code
=
False
# If using float16, cast reference input to float32
# If using float16, cast reference input to float32
...
...
theano/gpuarray/tests/test_dnn.py
浏览文件 @
ea927aef
...
@@ -26,6 +26,10 @@ from .rnn_support import Model, GRU, LSTM, WrapperLayer
...
@@ -26,6 +26,10 @@ from .rnn_support import Model, GRU, LSTM, WrapperLayer
from
theano.configdefaults
import
SUPPORTED_DNN_CONV_ALGO_FWD
from
theano.configdefaults
import
SUPPORTED_DNN_CONV_ALGO_FWD
mode_with_gpu
=
mode_with_gpu
.
including
()
# Globally disabled for mode_without_gpu
mode_with_gpu
.
check_py_code
=
False
# If using float16, set CUDNN precision to float32
# If using float16, set CUDNN precision to float32
def
set_precision
(
floatX
):
def
set_precision
(
floatX
):
...
@@ -1451,7 +1455,7 @@ def test_dnn_batchnorm_train():
...
@@ -1451,7 +1455,7 @@ def test_dnn_batchnorm_train():
bn
.
AbstractBatchNormTrainGrad
))
for
n
bn
.
AbstractBatchNormTrainGrad
))
for
n
in
f_abstract
.
maker
.
fgraph
.
toposort
()])
in
f_abstract
.
maker
.
fgraph
.
toposort
()])
# run
# run
for
data_shape
in
((
5
,
10
,
30
,
4
0
,
10
,
5
),
(
4
,
3
,
1
,
1
,
1
,
1
),
(
1
,
1
,
5
,
5
,
5
,
5
)):
for
data_shape
in
((
5
,
10
,
30
,
4
,
10
,
5
),
(
4
,
3
,
1
,
1
,
1
,
1
),
(
2
,
3
,
5
,
5
,
5
,
5
)):
data_shape
=
data_shape
[:
ndim
]
data_shape
=
data_shape
[:
ndim
]
param_shape
=
tuple
(
1
if
d
in
axes
else
s
param_shape
=
tuple
(
1
if
d
in
axes
else
s
for
d
,
s
in
enumerate
(
data_shape
))
for
d
,
s
in
enumerate
(
data_shape
))
...
@@ -1505,7 +1509,7 @@ def test_dnn_batchnorm_train_without_running_averages():
...
@@ -1505,7 +1509,7 @@ def test_dnn_batchnorm_train_without_running_averages():
bn
.
batch_normalization_train
(
x
,
scale
,
bias
,
'per-activation'
)
bn
.
batch_normalization_train
(
x
,
scale
,
bias
,
'per-activation'
)
# backward pass
# backward pass
grads_gpu
=
T
.
grad
(
None
,
wrt
=
[
x
,
scale
,
bias
],
known_grads
=
{
out_gpu
:
dy
})
grads_gpu
=
T
.
grad
(
None
,
wrt
=
[
x
,
scale
,
bias
],
known_grads
=
{
out_gpu
:
dy
})
grads_abstract
=
T
.
grad
(
None
,
wrt
=
[
x
,
scale
,
bias
],
known_grads
=
{
out_
gpu
:
dy
})
grads_abstract
=
T
.
grad
(
None
,
wrt
=
[
x
,
scale
,
bias
],
known_grads
=
{
out_
abstract
:
dy
})
# compile
# compile
f_gpu
=
theano
.
function
([
x
,
scale
,
bias
,
dy
],
f_gpu
=
theano
.
function
([
x
,
scale
,
bias
,
dy
],
[
out_gpu
,
x_mean_gpu
,
x_invstd_gpu
]
+
[
out_gpu
,
x_mean_gpu
,
x_invstd_gpu
]
+
...
@@ -1533,6 +1537,44 @@ def test_dnn_batchnorm_train_without_running_averages():
...
@@ -1533,6 +1537,44 @@ def test_dnn_batchnorm_train_without_running_averages():
f_abstract
(
X
,
Scale
,
Bias
,
Dy
)
f_abstract
(
X
,
Scale
,
Bias
,
Dy
)
def
test_without_dnn_batchnorm_train_without_running_averages
():
# compile and run batch_normalization_train without running averages
# But disable cudnn and make sure it run on the GPU.
utt
.
seed_rng
()
x
,
scale
,
bias
,
dy
=
T
.
tensor4
(
'x'
),
T
.
tensor4
(
'scale'
),
T
.
tensor4
(
'bias'
),
T
.
tensor4
(
'dy'
)
data_shape
=
(
5
,
10
,
30
,
25
)
param_shape
=
(
1
,
10
,
30
,
25
)
# forward pass
out_abstract
,
x_mean_abstract
,
x_invstd_abstract
=
\
bn
.
batch_normalization_train
(
x
,
scale
,
bias
,
'per-activation'
)
# backward pass
grads_abstract
=
T
.
grad
(
None
,
wrt
=
[
x
,
scale
,
bias
],
known_grads
=
{
out_abstract
:
dy
})
# compile
f_abstract
=
theano
.
function
([
x
,
scale
,
bias
,
dy
],
[
out_abstract
,
x_mean_abstract
,
x_invstd_abstract
]
+
grads_abstract
,
mode
=
mode_with_gpu
.
excluding
(
'cudnn'
))
# check if the abstract Ops have been replaced
assert
not
any
([
isinstance
(
n
.
op
,
dnn
.
GpuDnnBatchNorm
)
for
n
in
f_abstract
.
maker
.
fgraph
.
toposort
()])
assert
not
any
([
isinstance
(
n
.
op
,
dnn
.
GpuDnnBatchNormGrad
)
for
n
in
f_abstract
.
maker
.
fgraph
.
toposort
()])
assert
not
any
([
isinstance
(
n
.
op
,
(
bn
.
AbstractBatchNormTrain
,
bn
.
AbstractBatchNormInference
,
bn
.
AbstractBatchNormTrainGrad
))
for
n
in
f_abstract
.
maker
.
fgraph
.
toposort
()])
assert
any
([
isinstance
(
n
.
op
,
dnn
.
GpuElemwise
)
for
n
in
f_abstract
.
maker
.
fgraph
.
toposort
()])
# run
X
=
4
+
3
*
np
.
random
.
randn
(
*
data_shape
)
.
astype
(
theano
.
config
.
floatX
)
Dy
=
-
1
+
2
*
np
.
random
.
randn
(
*
data_shape
)
.
astype
(
theano
.
config
.
floatX
)
Scale
=
np
.
random
.
randn
(
*
param_shape
)
.
astype
(
theano
.
config
.
floatX
)
Bias
=
np
.
random
.
randn
(
*
param_shape
)
.
astype
(
theano
.
config
.
floatX
)
f_abstract
(
X
,
Scale
,
Bias
,
Dy
)
def
test_dnn_batchnorm_train_inplace
():
def
test_dnn_batchnorm_train_inplace
():
# test inplace_running_mean and inplace_running_var
# test inplace_running_mean and inplace_running_var
if
not
dnn
.
dnn_available
(
test_ctx_name
):
if
not
dnn
.
dnn_available
(
test_ctx_name
):
...
@@ -1628,7 +1670,7 @@ def test_batchnorm_inference():
...
@@ -1628,7 +1670,7 @@ def test_batchnorm_inference():
bn
.
AbstractBatchNormTrainGrad
))
for
n
bn
.
AbstractBatchNormTrainGrad
))
for
n
in
f_abstract
.
maker
.
fgraph
.
toposort
()])
in
f_abstract
.
maker
.
fgraph
.
toposort
()])
# run
# run
for
data_shape
in
((
10
,
2
0
,
30
,
40
,
10
,
5
),
(
4
,
3
,
1
,
1
,
1
,
1
),
(
1
,
1
,
5
,
5
,
5
,
5
)):
for
data_shape
in
((
10
,
2
,
30
,
4
,
10
,
5
),
(
4
,
3
,
1
,
1
,
1
,
1
),
(
1
,
1
,
5
,
5
,
5
,
5
)):
data_shape
=
data_shape
[:
ndim
]
data_shape
=
data_shape
[:
ndim
]
param_shape
=
tuple
(
1
if
d
in
axes
else
s
param_shape
=
tuple
(
1
if
d
in
axes
else
s
for
d
,
s
in
enumerate
(
data_shape
))
for
d
,
s
in
enumerate
(
data_shape
))
...
...
theano/sandbox/cuda/dnn.py
浏览文件 @
ea927aef
...
@@ -3705,6 +3705,19 @@ def local_gpu_batch_norm_inference_inplace(node):
...
@@ -3705,6 +3705,19 @@ def local_gpu_batch_norm_inference_inplace(node):
inplace
=
True
)(
*
node
.
inputs
)]
inplace
=
True
)(
*
node
.
inputs
)]
def
values_eq_approx_high_tol
(
a
,
b
):
"""
This fct is needed to don't have DebugMode raise useless
errors due to rounding error.
This happen as we reduce on the two last dimensions, so this
can raise the absolute error if the number of elements we
reduce on is significant.
"""
return
tensor
.
TensorType
.
values_eq_approx
(
a
,
b
,
atol
=
0.015
)
@local_optimizer
([
bn
.
AbstractBatchNormTrainGrad
])
@local_optimizer
([
bn
.
AbstractBatchNormTrainGrad
])
def
local_abstract_batch_norm_train_grad_cudnn
(
node
):
def
local_abstract_batch_norm_train_grad_cudnn
(
node
):
if
not
isinstance
(
node
.
op
,
bn
.
AbstractBatchNormTrainGrad
):
if
not
isinstance
(
node
.
op
,
bn
.
AbstractBatchNormTrainGrad
):
...
@@ -3781,6 +3794,9 @@ def local_abstract_batch_norm_train_grad_cudnn(node):
...
@@ -3781,6 +3794,9 @@ def local_abstract_batch_norm_train_grad_cudnn(node):
if
isinstance
(
node
.
outputs
[
2
]
.
type
,
tensor
.
TensorType
):
if
isinstance
(
node
.
outputs
[
2
]
.
type
,
tensor
.
TensorType
):
g_wrt_bias
=
tensor
.
as_tensor_variable
(
g_wrt_bias
)
g_wrt_bias
=
tensor
.
as_tensor_variable
(
g_wrt_bias
)
# TODO copy_stack_trace?
# TODO copy_stack_trace?
g_wrt_inputs
.
tag
.
values_eq_approx
=
values_eq_approx_high_tol
g_wrt_scale
.
tag
.
values_eq_approx
=
values_eq_approx_high_tol
return
[
g_wrt_inputs
,
g_wrt_scale
,
g_wrt_bias
]
return
[
g_wrt_inputs
,
g_wrt_scale
,
g_wrt_bias
]
...
...
theano/sandbox/cuda/tests/test_dnn.py
浏览文件 @
ea927aef
...
@@ -38,6 +38,8 @@ if theano.config.mode == 'FAST_COMPILE':
...
@@ -38,6 +38,8 @@ if theano.config.mode == 'FAST_COMPILE':
else
:
else
:
mode_with_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
including
(
'gpu'
)
mode_with_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
including
(
'gpu'
)
mode_without_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
excluding
(
'gpu'
)
mode_without_gpu
=
theano
.
compile
.
mode
.
get_default_mode
()
.
excluding
(
'gpu'
)
mode_with_gpu
.
check_py_code
=
False
mode_without_gpu
.
check_py_code
=
False
def
test_dnn_conv_desc_merge
():
def
test_dnn_conv_desc_merge
():
...
@@ -732,7 +734,7 @@ def test_batchnorm_train():
...
@@ -732,7 +734,7 @@ def test_batchnorm_train():
raise
SkipTest
(
"batch normalization requires cudnn v5+"
)
raise
SkipTest
(
"batch normalization requires cudnn v5+"
)
utt
.
seed_rng
()
utt
.
seed_rng
()
tensor6
=
T
.
TensorType
(
theano
.
config
.
floatX
,
(
False
,)
*
6
)
tensor6
=
T
.
TensorType
(
'float32'
,
(
False
,)
*
6
)
for
mode
in
(
'per-activation'
,
'spatial'
):
for
mode
in
(
'per-activation'
,
'spatial'
):
for
vartype
in
(
tensor6
,
T
.
ftensor5
,
T
.
ftensor4
,
T
.
ftensor3
,
T
.
fmatrix
,
T
.
fvector
):
for
vartype
in
(
tensor6
,
T
.
ftensor5
,
T
.
ftensor4
,
T
.
ftensor3
,
T
.
fmatrix
,
T
.
fvector
):
...
@@ -766,7 +768,7 @@ def test_batchnorm_train():
...
@@ -766,7 +768,7 @@ def test_batchnorm_train():
x_invstd_ref
=
T
.
inv
(
T
.
sqrt
(
x_var_ref
+
eps
))
x_invstd_ref
=
T
.
inv
(
T
.
sqrt
(
x_var_ref
+
eps
))
scale_ref
=
T
.
addbroadcast
(
scale
,
*
axes
)
scale_ref
=
T
.
addbroadcast
(
scale
,
*
axes
)
bias_ref
=
T
.
addbroadcast
(
bias
,
*
axes
)
bias_ref
=
T
.
addbroadcast
(
bias
,
*
axes
)
m
=
T
.
cast
(
T
.
prod
(
x
.
shape
)
/
T
.
prod
(
scale
.
shape
),
theano
.
config
.
floatX
)
m
=
T
.
cast
(
T
.
prod
(
x
.
shape
)
/
T
.
prod
(
scale
.
shape
),
'float32'
)
out_ref
=
(
x
-
x_mean_ref
)
*
(
scale_ref
*
x_invstd_ref
)
+
bias_ref
out_ref
=
(
x
-
x_mean_ref
)
*
(
scale_ref
*
x_invstd_ref
)
+
bias_ref
out_running_mean_ref
=
running_mean
*
(
1
-
running_average_factor
)
+
\
out_running_mean_ref
=
running_mean
*
(
1
-
running_average_factor
)
+
\
x_mean_ref
*
running_average_factor
x_mean_ref
*
running_average_factor
...
@@ -801,16 +803,16 @@ def test_batchnorm_train():
...
@@ -801,16 +803,16 @@ def test_batchnorm_train():
bn
.
AbstractBatchNormTrainGrad
))
for
n
bn
.
AbstractBatchNormTrainGrad
))
for
n
in
f_abstract
.
maker
.
fgraph
.
toposort
()])
in
f_abstract
.
maker
.
fgraph
.
toposort
()])
# run
# run
for
data_shape
in
((
5
,
10
,
30
,
40
,
10
,
5
),
(
4
,
3
,
1
,
1
,
1
,
1
),
(
1
,
1
,
5
,
5
,
5
,
5
)):
for
data_shape
in
((
5
,
2
,
30
,
4
,
10
,
5
),
(
4
,
3
,
1
,
1
,
1
,
1
),
(
2
,
3
,
5
,
5
,
5
,
5
)):
data_shape
=
data_shape
[:
ndim
]
data_shape
=
data_shape
[:
ndim
]
param_shape
=
tuple
(
1
if
d
in
axes
else
s
param_shape
=
tuple
(
1
if
d
in
axes
else
s
for
d
,
s
in
enumerate
(
data_shape
))
for
d
,
s
in
enumerate
(
data_shape
))
X
=
4
+
3
*
numpy
.
random
.
randn
(
*
data_shape
)
.
astype
(
theano
.
config
.
floatX
)
X
=
4
+
3
*
numpy
.
random
.
randn
(
*
data_shape
)
.
astype
(
'float32'
)
Dy
=
-
1
+
2
*
numpy
.
random
.
randn
(
*
data_shape
)
.
astype
(
theano
.
config
.
floatX
)
Dy
=
-
1
+
2
*
numpy
.
random
.
randn
(
*
data_shape
)
.
astype
(
'float32'
)
Scale
=
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
theano
.
config
.
floatX
)
Scale
=
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
'float32'
)
Bias
=
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
theano
.
config
.
floatX
)
Bias
=
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
'float32'
)
Running_mean
=
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
theano
.
config
.
floatX
)
Running_mean
=
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
'float32'
)
Running_var
=
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
theano
.
config
.
floatX
)
Running_var
=
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
'float32'
)
outputs_gpu
=
f_gpu
(
X
,
Scale
,
Bias
,
Running_mean
,
Running_var
,
Dy
)
outputs_gpu
=
f_gpu
(
X
,
Scale
,
Bias
,
Running_mean
,
Running_var
,
Dy
)
outputs_abstract
=
f_abstract
(
X
,
Scale
,
Bias
,
Running_mean
,
Running_var
,
Dy
)
outputs_abstract
=
f_abstract
(
X
,
Scale
,
Bias
,
Running_mean
,
Running_var
,
Dy
)
outputs_ref
=
f_ref
(
X
,
Scale
,
Bias
,
Running_mean
,
Running_var
,
Dy
)
outputs_ref
=
f_ref
(
X
,
Scale
,
Bias
,
Running_mean
,
Running_var
,
Dy
)
...
@@ -844,7 +846,7 @@ def test_dnn_batchnorm_train_without_running_averages():
...
@@ -844,7 +846,7 @@ def test_dnn_batchnorm_train_without_running_averages():
raise
SkipTest
(
"batch normalization requires cudnn v5+"
)
raise
SkipTest
(
"batch normalization requires cudnn v5+"
)
utt
.
seed_rng
()
utt
.
seed_rng
()
x
,
scale
,
bias
,
dy
=
T
.
tensor4
(
'x'
),
T
.
tensor4
(
'scale'
),
T
.
tensor4
(
'bias'
),
T
.
tensor4
(
'dy'
)
x
,
scale
,
bias
,
dy
=
T
.
ftensor4
(
'x'
),
T
.
ftensor4
(
'scale'
),
T
.
ftensor4
(
'bias'
),
T
.
f
tensor4
(
'dy'
)
data_shape
=
(
5
,
10
,
30
,
25
)
data_shape
=
(
5
,
10
,
30
,
25
)
param_shape
=
(
1
,
10
,
30
,
25
)
param_shape
=
(
1
,
10
,
30
,
25
)
...
@@ -875,10 +877,10 @@ def test_dnn_batchnorm_train_without_running_averages():
...
@@ -875,10 +877,10 @@ def test_dnn_batchnorm_train_without_running_averages():
bn
.
AbstractBatchNormTrainGrad
))
bn
.
AbstractBatchNormTrainGrad
))
for
n
in
f_abstract
.
maker
.
fgraph
.
toposort
()])
for
n
in
f_abstract
.
maker
.
fgraph
.
toposort
()])
# run
# run
X
=
4
+
3
*
numpy
.
random
.
randn
(
*
data_shape
)
.
astype
(
theano
.
config
.
floatX
)
X
=
4
+
3
*
numpy
.
random
.
randn
(
*
data_shape
)
.
astype
(
'float32'
)
Dy
=
-
1
+
2
*
numpy
.
random
.
randn
(
*
data_shape
)
.
astype
(
theano
.
config
.
floatX
)
Dy
=
-
1
+
2
*
numpy
.
random
.
randn
(
*
data_shape
)
.
astype
(
'float32'
)
Scale
=
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
theano
.
config
.
floatX
)
Scale
=
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
'float32'
)
Bias
=
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
theano
.
config
.
floatX
)
Bias
=
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
'float32'
)
f_gpu
(
X
,
Scale
,
Bias
,
Dy
)
f_gpu
(
X
,
Scale
,
Bias
,
Dy
)
f_abstract
(
X
,
Scale
,
Bias
,
Dy
)
f_abstract
(
X
,
Scale
,
Bias
,
Dy
)
...
@@ -891,14 +893,14 @@ def test_dnn_batchnorm_train_inplace():
...
@@ -891,14 +893,14 @@ def test_dnn_batchnorm_train_inplace():
raise
SkipTest
(
"batch normalization requires cudnn v5+"
)
raise
SkipTest
(
"batch normalization requires cudnn v5+"
)
utt
.
seed_rng
()
utt
.
seed_rng
()
x
,
scale
,
bias
=
T
.
tensor4
(
'x'
),
T
.
tensor4
(
'scale'
),
T
.
tensor4
(
'bias'
)
x
,
scale
,
bias
=
T
.
ftensor4
(
'x'
),
T
.
ftensor4
(
'scale'
),
T
.
f
tensor4
(
'bias'
)
data_shape
=
(
5
,
10
,
30
,
25
)
data_shape
=
(
5
,
10
,
30
,
25
)
param_shape
=
(
1
,
10
,
30
,
25
)
param_shape
=
(
1
,
10
,
30
,
25
)
running_mean
=
shared
(
running_mean
=
shared
(
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
theano
.
config
.
floatX
),
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
'float32'
),
broadcastable
=
(
True
,
False
,
False
,
False
))
broadcastable
=
(
True
,
False
,
False
,
False
))
running_var
=
shared
(
running_var
=
shared
(
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
theano
.
config
.
floatX
),
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
'float32'
),
broadcastable
=
(
True
,
False
,
False
,
False
))
broadcastable
=
(
True
,
False
,
False
,
False
))
# forward pass
# forward pass
...
@@ -923,9 +925,9 @@ def test_dnn_batchnorm_train_inplace():
...
@@ -923,9 +925,9 @@ def test_dnn_batchnorm_train_inplace():
assert
nodes
[
0
]
.
op
.
inplace_running_var
assert
nodes
[
0
]
.
op
.
inplace_running_var
assert
nodes
[
0
]
.
op
.
inplace_output
assert
nodes
[
0
]
.
op
.
inplace_output
# run
# run
X
=
4
+
3
*
numpy
.
random
.
randn
(
*
data_shape
)
.
astype
(
theano
.
config
.
floatX
)
X
=
4
+
3
*
numpy
.
random
.
randn
(
*
data_shape
)
.
astype
(
'float32'
)
Scale
=
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
theano
.
config
.
floatX
)
Scale
=
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
'float32'
)
Bias
=
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
theano
.
config
.
floatX
)
Bias
=
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
'float32'
)
f
(
X
,
Scale
,
Bias
)
f
(
X
,
Scale
,
Bias
)
...
@@ -936,10 +938,10 @@ def test_batchnorm_inference():
...
@@ -936,10 +938,10 @@ def test_batchnorm_inference():
raise
SkipTest
(
"batch normalization requires cudnn v5+"
)
raise
SkipTest
(
"batch normalization requires cudnn v5+"
)
utt
.
seed_rng
()
utt
.
seed_rng
()
tensor6
=
T
.
TensorType
(
theano
.
config
.
floatX
,
(
False
,)
*
6
)
tensor6
=
T
.
TensorType
(
'float32'
,
(
False
,)
*
6
)
for
mode
in
(
'per-activation'
,
'spatial'
):
for
mode
in
(
'per-activation'
,
'spatial'
):
for
vartype
in
(
tensor6
,
T
.
tensor5
,
T
.
tensor4
,
T
.
tensor3
,
T
.
matrix
,
T
.
vector
):
for
vartype
in
(
tensor6
,
T
.
ftensor5
,
T
.
ftensor4
,
T
.
ftensor3
,
T
.
fmatrix
,
T
.
f
vector
):
x
,
scale
,
bias
,
mean
,
var
=
(
vartype
(
n
)
x
,
scale
,
bias
,
mean
,
var
=
(
vartype
(
n
)
for
n
in
(
'x'
,
'scale'
,
'bias'
,
'mean'
,
'var'
))
for
n
in
(
'x'
,
'scale'
,
'bias'
,
'mean'
,
'var'
))
ndim
=
x
.
ndim
ndim
=
x
.
ndim
...
@@ -980,16 +982,16 @@ def test_batchnorm_inference():
...
@@ -980,16 +982,16 @@ def test_batchnorm_inference():
bn
.
AbstractBatchNormTrainGrad
))
for
n
bn
.
AbstractBatchNormTrainGrad
))
for
n
in
f_abstract
.
maker
.
fgraph
.
toposort
()])
in
f_abstract
.
maker
.
fgraph
.
toposort
()])
# run
# run
for
data_shape
in
((
10
,
2
0
,
30
,
40
,
10
,
5
),
(
4
,
3
,
1
,
1
,
1
,
1
),
(
1
,
1
,
5
,
5
,
5
,
5
)):
for
data_shape
in
((
10
,
2
,
15
,
4
,
6
,
5
),
(
4
,
3
,
1
,
1
,
1
,
1
),
(
1
,
1
,
5
,
5
,
5
,
5
)):
data_shape
=
data_shape
[:
ndim
]
data_shape
=
data_shape
[:
ndim
]
param_shape
=
tuple
(
1
if
d
in
axes
else
s
param_shape
=
tuple
(
1
if
d
in
axes
else
s
for
d
,
s
in
enumerate
(
data_shape
))
for
d
,
s
in
enumerate
(
data_shape
))
X
=
4
+
3
*
numpy
.
random
.
randn
(
*
data_shape
)
.
astype
(
theano
.
config
.
floatX
)
X
=
4
+
3
*
numpy
.
random
.
randn
(
*
data_shape
)
.
astype
(
'float32'
)
Dy
=
-
1
+
2
*
numpy
.
random
.
randn
(
*
data_shape
)
.
astype
(
theano
.
config
.
floatX
)
Dy
=
-
1
+
2
*
numpy
.
random
.
randn
(
*
data_shape
)
.
astype
(
'float32'
)
Scale
=
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
theano
.
config
.
floatX
)
Scale
=
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
'float32'
)
Bias
=
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
theano
.
config
.
floatX
)
Bias
=
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
'float32'
)
Mean
=
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
theano
.
config
.
floatX
)
Mean
=
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
'float32'
)
Var
=
numpy
.
random
.
rand
(
*
param_shape
)
.
astype
(
theano
.
config
.
floatX
)
Var
=
numpy
.
random
.
rand
(
*
param_shape
)
.
astype
(
'float32'
)
outputs_gpu
=
f_gpu
(
X
,
Scale
,
Bias
,
Mean
,
Var
,
Dy
)
outputs_gpu
=
f_gpu
(
X
,
Scale
,
Bias
,
Mean
,
Var
,
Dy
)
outputs_abstract
=
f_abstract
(
X
,
Scale
,
Bias
,
Mean
,
Var
,
Dy
)
outputs_abstract
=
f_abstract
(
X
,
Scale
,
Bias
,
Mean
,
Var
,
Dy
)
outputs_ref
=
f_ref
(
X
,
Scale
,
Bias
,
Mean
,
Var
,
Dy
)
outputs_ref
=
f_ref
(
X
,
Scale
,
Bias
,
Mean
,
Var
,
Dy
)
...
@@ -1017,7 +1019,7 @@ def test_batchnorm_inference_inplace():
...
@@ -1017,7 +1019,7 @@ def test_batchnorm_inference_inplace():
raise
SkipTest
(
"batch normalization requires cudnn v5+"
)
raise
SkipTest
(
"batch normalization requires cudnn v5+"
)
utt
.
seed_rng
()
utt
.
seed_rng
()
x
,
scale
,
bias
,
mean
,
var
=
(
T
.
tensor4
(
n
)
for
n
in
(
'x'
,
'scale'
,
'bias'
,
'mean'
,
'var'
))
x
,
scale
,
bias
,
mean
,
var
=
(
T
.
f
tensor4
(
n
)
for
n
in
(
'x'
,
'scale'
,
'bias'
,
'mean'
,
'var'
))
data_shape
=
(
5
,
10
,
30
,
25
)
data_shape
=
(
5
,
10
,
30
,
25
)
param_shape
=
(
1
,
10
,
30
,
25
)
param_shape
=
(
1
,
10
,
30
,
25
)
...
@@ -1031,11 +1033,11 @@ def test_batchnorm_inference_inplace():
...
@@ -1031,11 +1033,11 @@ def test_batchnorm_inference_inplace():
assert
nodes
[
0
]
.
op
.
inplace
assert
nodes
[
0
]
.
op
.
inplace
# run
# run
X
=
4
+
3
*
numpy
.
random
.
randn
(
*
data_shape
)
.
astype
(
theano
.
config
.
floatX
)
X
=
4
+
3
*
numpy
.
random
.
randn
(
*
data_shape
)
.
astype
(
'float32'
)
Scale
=
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
theano
.
config
.
floatX
)
Scale
=
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
'float32'
)
Bias
=
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
theano
.
config
.
floatX
)
Bias
=
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
'float32'
)
Mean
=
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
theano
.
config
.
floatX
)
Mean
=
numpy
.
random
.
randn
(
*
param_shape
)
.
astype
(
'float32'
)
Var
=
numpy
.
random
.
rand
(
*
param_shape
)
.
astype
(
theano
.
config
.
floatX
)
Var
=
numpy
.
random
.
rand
(
*
param_shape
)
.
astype
(
'float32'
)
f
(
X
,
Scale
,
Bias
,
Mean
,
Var
)
f
(
X
,
Scale
,
Bias
,
Mean
,
Var
)
...
@@ -1045,7 +1047,7 @@ def test_dnn_batchnorm_valid_and_invalid_axes():
...
@@ -1045,7 +1047,7 @@ def test_dnn_batchnorm_valid_and_invalid_axes():
if
cuda
.
dnn
.
version
()
<
(
5000
,
5000
):
if
cuda
.
dnn
.
version
()
<
(
5000
,
5000
):
raise
SkipTest
(
"batch normalization requires cudnn v5+"
)
raise
SkipTest
(
"batch normalization requires cudnn v5+"
)
for
vartype
in
(
T
.
tensor5
,
T
.
tensor4
,
T
.
tensor3
,
T
.
matrix
):
for
vartype
in
(
T
.
ftensor5
,
T
.
ftensor4
,
T
.
ftensor3
,
T
.
f
matrix
):
x
,
scale
,
bias
,
mean
,
var
,
dy
=
(
vartype
(
n
)
x
,
scale
,
bias
,
mean
,
var
,
dy
=
(
vartype
(
n
)
for
n
in
(
'x'
,
'scale'
,
'bias'
,
'mean'
,
'var'
,
'dy'
))
for
n
in
(
'x'
,
'scale'
,
'bias'
,
'mean'
,
'var'
,
'dy'
))
ndim
=
x
.
ndim
ndim
=
x
.
ndim
...
...
theano/scalar/basic.py
浏览文件 @
ea927aef
...
@@ -88,6 +88,14 @@ def upcast(dtype, *dtypes):
...
@@ -88,6 +88,14 @@ def upcast(dtype, *dtypes):
return
rval
return
rval
def
as_common_dtype
(
*
vars
):
"""
For for theano.scalar.Scalar and TensorVariable.
"""
dtype
=
upcast
(
*
[
v
.
dtype
for
v
in
vars
])
return
(
v
.
astype
(
dtype
)
for
v
in
vars
)
def
get_scalar_type
(
dtype
):
def
get_scalar_type
(
dtype
):
"""
"""
Return a Scalar(dtype) object.
Return a Scalar(dtype) object.
...
...
theano/tensor/nnet/bn.py
浏览文件 @
ea927aef
...
@@ -7,7 +7,7 @@ from theano.gof.opt import copy_stack_trace
...
@@ -7,7 +7,7 @@ from theano.gof.opt import copy_stack_trace
from
theano.tensor
import
as_tensor_variable
,
TensorType
from
theano.tensor
import
as_tensor_variable
,
TensorType
from
theano.tensor
import
basic
as
T
from
theano.tensor
import
basic
as
T
from
theano.tensor.opt
import
register_specialize_device
from
theano.tensor.opt
import
register_specialize_device
from
theano.scalar
import
Composite
from
theano.scalar
import
Composite
,
as_common_dtype
from
theano.scalar
import
add
,
sub
,
true_div
,
mul
from
theano.scalar
import
add
,
sub
,
true_div
,
mul
...
@@ -413,15 +413,27 @@ class AbstractBatchNormTrain(Op):
...
@@ -413,15 +413,27 @@ class AbstractBatchNormTrain(Op):
def
make_node
(
self
,
x
,
scale
,
bias
,
epsilon
=
1e-4
,
def
make_node
(
self
,
x
,
scale
,
bias
,
epsilon
=
1e-4
,
running_average_factor
=
0.1
,
running_average_factor
=
0.1
,
running_mean
=
None
,
running_var
=
None
):
running_mean
=
None
,
running_var
=
None
):
x
=
as_tensor_variable
(
x
)
scale
=
as_tensor_variable
(
scale
)
bias
=
as_tensor_variable
(
bias
)
epsilon
=
as_tensor_variable
(
epsilon
)
running_average_factor
=
as_tensor_variable
(
running_average_factor
)
if
running_mean
is
not
None
:
running_mean
=
as_tensor_variable
(
running_mean
)
if
running_var
is
not
None
:
running_var
=
as_tensor_variable
(
running_var
)
assert
x
.
ndim
==
scale
.
ndim
==
bias
.
ndim
assert
x
.
ndim
==
scale
.
ndim
==
bias
.
ndim
assert
((
running_mean
is
None
and
running_var
is
None
)
or
assert
((
running_mean
is
None
and
running_var
is
None
)
or
(
running_mean
is
not
None
and
running_var
is
not
None
))
(
running_mean
is
not
None
and
running_var
is
not
None
))
assert
(
running_mean
is
None
or
running_mean
.
ndim
==
x
.
ndim
)
assert
(
running_mean
is
None
or
running_mean
.
ndim
==
x
.
ndim
)
assert
(
running_var
is
None
or
running_var
.
ndim
==
x
.
ndim
)
assert
(
running_var
is
None
or
running_var
.
ndim
==
x
.
ndim
)
if
not
isinstance
(
epsilon
,
theano
.
Variable
):
# Upcast to common dtype on the non-scalar
epsilon
=
as_tensor_variable
(
epsilon
)
# Keep as is dtype of scalar (epsilon and running_average_factor)
if
not
isinstance
(
running_average_factor
,
theano
.
Variable
):
if
running_mean
:
running_average_factor
=
as_tensor_variable
(
running_average_factor
)
x
,
scale
,
bias
,
running_mean
,
running_var
=
as_common_dtype
(
x
,
scale
,
bias
,
running_mean
,
running_var
)
else
:
x
,
scale
,
bias
=
as_common_dtype
(
x
,
scale
,
bias
)
inputs
=
[
x
,
scale
,
bias
,
epsilon
,
running_average_factor
]
inputs
=
[
x
,
scale
,
bias
,
epsilon
,
running_average_factor
]
output_types
=
[
x
.
type
(),
scale
.
type
(),
scale
.
type
()]
output_types
=
[
x
.
type
(),
scale
.
type
(),
scale
.
type
()]
if
running_mean
is
not
None
and
running_var
is
not
None
:
if
running_mean
is
not
None
and
running_var
is
not
None
:
...
@@ -513,9 +525,18 @@ class AbstractBatchNormInference(Op):
...
@@ -513,9 +525,18 @@ class AbstractBatchNormInference(Op):
return
[
shape
[
0
]]
return
[
shape
[
0
]]
def
make_node
(
self
,
x
,
scale
,
bias
,
estimated_mean
,
estimated_variance
,
epsilon
=
1e-4
):
def
make_node
(
self
,
x
,
scale
,
bias
,
estimated_mean
,
estimated_variance
,
epsilon
=
1e-4
):
assert
x
.
ndim
==
scale
.
ndim
==
bias
.
ndim
==
estimated_mean
.
ndim
==
estimated_variance
.
ndim
x
=
as_tensor_variable
(
x
)
if
not
isinstance
(
epsilon
,
theano
.
Variable
):
scale
=
as_tensor_variable
(
scale
)
bias
=
as_tensor_variable
(
bias
)
estimated_mean
=
as_tensor_variable
(
estimated_mean
)
estimated_variance
=
as_tensor_variable
(
estimated_variance
)
epsilon
=
as_tensor_variable
(
epsilon
)
epsilon
=
as_tensor_variable
(
epsilon
)
# Upcast to common dtype on the non-scalar
# Keep as is dtype of scalar (epsilon)
x
,
scale
,
bias
,
estimated_mean
,
estimated_variance
=
as_common_dtype
(
x
,
scale
,
bias
,
estimated_mean
,
estimated_variance
)
assert
x
.
ndim
==
scale
.
ndim
==
bias
.
ndim
==
estimated_mean
.
ndim
==
estimated_variance
.
ndim
return
Apply
(
self
,
[
x
,
scale
,
bias
,
estimated_mean
,
estimated_variance
,
epsilon
],
[
x
.
type
()])
return
Apply
(
self
,
[
x
,
scale
,
bias
,
estimated_mean
,
estimated_variance
,
epsilon
],
[
x
.
type
()])
def
grad
(
self
,
inputs
,
grads
):
def
grad
(
self
,
inputs
,
grads
):
...
@@ -561,9 +582,18 @@ class AbstractBatchNormTrainGrad(Op):
...
@@ -561,9 +582,18 @@ class AbstractBatchNormTrainGrad(Op):
self
.
axes
=
axes
self
.
axes
=
axes
def
make_node
(
self
,
x
,
dy
,
scale
,
x_mean
,
x_invstd
,
epsilon
=
1e-4
):
def
make_node
(
self
,
x
,
dy
,
scale
,
x_mean
,
x_invstd
,
epsilon
=
1e-4
):
assert
x
.
ndim
==
dy
.
ndim
==
scale
.
ndim
==
x_mean
.
ndim
==
x_invstd
.
ndim
x
=
as_tensor_variable
(
x
)
if
not
isinstance
(
epsilon
,
theano
.
Variable
):
dy
=
as_tensor_variable
(
dy
)
scale
=
as_tensor_variable
(
scale
)
x_mean
=
as_tensor_variable
(
x_mean
)
x_invstd
=
as_tensor_variable
(
x_invstd
)
epsilon
=
as_tensor_variable
(
epsilon
)
epsilon
=
as_tensor_variable
(
epsilon
)
# Upcast to common dtype on the non-scalar
# Keep as is dtype of scalar (epsilon)
x
,
dy
,
scale
,
x_mean
,
x_invstd
=
as_common_dtype
(
x
,
dy
,
scale
,
x_mean
,
x_invstd
)
assert
x
.
ndim
==
dy
.
ndim
==
scale
.
ndim
==
x_mean
.
ndim
==
x_invstd
.
ndim
return
Apply
(
self
,
[
x
,
dy
,
scale
,
x_mean
,
x_invstd
,
epsilon
],
return
Apply
(
self
,
[
x
,
dy
,
scale
,
x_mean
,
x_invstd
,
epsilon
],
[
x
.
type
(),
scale
.
type
(),
scale
.
type
()])
[
x
.
type
(),
scale
.
type
(),
scale
.
type
()])
...
@@ -612,6 +642,9 @@ def local_abstract_batch_norm_train(node):
...
@@ -612,6 +642,9 @@ def local_abstract_batch_norm_train(node):
mean
=
x
.
mean
(
axes
,
keepdims
=
True
)
mean
=
x
.
mean
(
axes
,
keepdims
=
True
)
var
=
x
.
var
(
axes
,
keepdims
=
True
)
var
=
x
.
var
(
axes
,
keepdims
=
True
)
# The epsilon should not upcast the dtype.
if
var
.
dtype
==
'float32'
and
epsilon
.
dtype
==
'float64'
:
epsilon
=
epsilon
.
astype
(
'float32'
)
invstd
=
T
.
inv
(
T
.
sqrt
(
var
+
epsilon
))
invstd
=
T
.
inv
(
T
.
sqrt
(
var
+
epsilon
))
out
=
(
x
-
mean
)
*
(
scale
*
invstd
)
+
bias
out
=
(
x
-
mean
)
*
(
scale
*
invstd
)
+
bias
results
=
[
out
,
mean
,
invstd
]
results
=
[
out
,
mean
,
invstd
]
...
@@ -687,6 +720,10 @@ def local_abstract_batch_norm_inference(node):
...
@@ -687,6 +720,10 @@ def local_abstract_batch_norm_inference(node):
not
isinstance
(
epsilon
.
type
,
TensorType
):
not
isinstance
(
epsilon
.
type
,
TensorType
):
return
None
return
None
# The epsilon should not upcast the dtype.
if
estimated_variance
.
dtype
==
'float32'
and
epsilon
.
dtype
==
'float64'
:
epsilon
=
epsilon
.
astype
(
'float32'
)
result
=
(
x
-
estimated_mean
)
*
(
scale
/
T
.
sqrt
(
estimated_variance
+
epsilon
))
+
bias
result
=
(
x
-
estimated_mean
)
*
(
scale
/
T
.
sqrt
(
estimated_variance
+
epsilon
))
+
bias
result
=
T
.
patternbroadcast
(
result
,
node
.
outputs
[
0
]
.
broadcastable
)
result
=
T
.
patternbroadcast
(
result
,
node
.
outputs
[
0
]
.
broadcastable
)
...
...
theano/tensor/nnet/tests/test_bn.py
浏览文件 @
ea927aef
...
@@ -201,7 +201,7 @@ def test_batch_normalization_train():
...
@@ -201,7 +201,7 @@ def test_batch_normalization_train():
bn
.
AbstractBatchNormTrainGrad
))
bn
.
AbstractBatchNormTrainGrad
))
for
n
in
f
.
maker
.
fgraph
.
toposort
()])
for
n
in
f
.
maker
.
fgraph
.
toposort
()])
# run
# run
for
data_shape
in
((
5
,
10
,
30
,
40
,
10
),
(
4
,
3
,
1
,
1
,
1
),
(
1
,
1
,
5
,
5
,
5
)):
for
data_shape
in
((
5
,
10
,
30
,
40
,
10
),
(
4
,
3
,
1
,
1
,
1
),
(
2
,
3
,
5
,
5
,
5
)):
data_shape
=
data_shape
[:
ndim
]
data_shape
=
data_shape
[:
ndim
]
param_shape
=
tuple
(
1
if
d
in
axes2
else
s
param_shape
=
tuple
(
1
if
d
in
axes2
else
s
for
d
,
s
in
enumerate
(
data_shape
))
for
d
,
s
in
enumerate
(
data_shape
))
...
...
theano/tensor/type.py
浏览文件 @
ea927aef
...
@@ -203,9 +203,10 @@ class TensorType(Type):
...
@@ -203,9 +203,10 @@ class TensorType(Type):
"""
"""
Convert a symbolic Variable into a TensorType, if compatible.
Convert a symbolic Variable into a TensorType, if compatible.
For the moment, only a TensorType or CudaNdarrayType will be
For the moment, only a TensorType, GpuArrayType and
converted, provided they have the same number of dimensions,
CudaNdarrayType will be
broadcastable pattern, and dtype.
converted, provided they have the same number of dimensions and
dtype and have "compatible" broadcastable pattern.
"""
"""
if
hasattr
(
other
,
'_as_TensorVariable'
):
if
hasattr
(
other
,
'_as_TensorVariable'
):
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论