提交 ea927aef authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #5474 from nouiz/abstract_bn

Follow up from abstract bn PR
...@@ -28,7 +28,6 @@ from theano.tensor.nnet.abstract_conv import (AbstractConv2d, ...@@ -28,7 +28,6 @@ from theano.tensor.nnet.abstract_conv import (AbstractConv2d,
assert_conv_shape) assert_conv_shape)
from theano.tensor.signal.pool import ( from theano.tensor.signal.pool import (
Pool, MaxPoolGrad, AveragePoolGrad) Pool, MaxPoolGrad, AveragePoolGrad)
from theano.tensor.nnet import bn
from . import pygpu from . import pygpu
from .type import (get_context, gpu_context_type, list_contexts, from .type import (get_context, gpu_context_type, list_contexts,
GpuArraySharedVariable) GpuArraySharedVariable)
...@@ -87,10 +86,6 @@ def _make_handle(ctx): ...@@ -87,10 +86,6 @@ def _make_handle(ctx):
return handle return handle
def raise_no_cudnn(msg="cuDNN is required for convolution and pooling"):
raise RuntimeError(msg)
def _dnn_check_compile(): def _dnn_check_compile():
preambule = """ preambule = """
#include <stdio.h> #include <stdio.h>
...@@ -2733,7 +2728,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs): ...@@ -2733,7 +2728,7 @@ def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
inp2 = inputs[1] inp2 = inputs[1]
if not dnn_available(inp1.type.context_name): if not dnn_available(inp1.type.context_name):
raise_no_cudnn() return
if op.filter_flip: if op.filter_flip:
conv_mode = 'conv' conv_mode = 'conv'
...@@ -2776,7 +2771,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs): ...@@ -2776,7 +2771,7 @@ def local_abstractconv3d_cudnn_graph(op, context_name, inputs, outputs):
inp2 = inputs[1] inp2 = inputs[1]
if not dnn_available(inp1.type.context_name): if not dnn_available(inp1.type.context_name):
raise_no_cudnn() return
if op.filter_flip: if op.filter_flip:
conv_mode = 'conv' conv_mode = 'conv'
...@@ -2902,7 +2897,7 @@ def local_dnn_convi_output_merge(node, *inputs): ...@@ -2902,7 +2897,7 @@ def local_dnn_convi_output_merge(node, *inputs):
def local_gpua_pool_dnn_alternative(op, ctx_name, inputs, outputs): def local_gpua_pool_dnn_alternative(op, ctx_name, inputs, outputs):
if not dnn_available(ctx_name): if not dnn_available(ctx_name):
raise_no_cudnn() return
if not op.ignore_border: if not op.ignore_border:
return return
img, ws, stride, pad = inputs img, ws, stride, pad = inputs
...@@ -2931,7 +2926,7 @@ pool_db2.register("local_gpua_pool_dnn_alternative", ...@@ -2931,7 +2926,7 @@ pool_db2.register("local_gpua_pool_dnn_alternative",
def local_gpua_pool_dnn_grad_stride(op, ctx_name, inputs, outputs): def local_gpua_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
if not dnn_available(ctx_name): if not dnn_available(ctx_name):
raise_no_cudnn() return
if not op.ignore_border: if not op.ignore_border:
return return
inp, out, out_grad, ws, stride, pad = inputs inp, out, out_grad, ws, stride, pad = inputs
...@@ -2975,7 +2970,7 @@ pool_db2.register("local_gpua_pool_dnn_grad_stride", ...@@ -2975,7 +2970,7 @@ pool_db2.register("local_gpua_pool_dnn_grad_stride",
def local_gpua_avg_pool_dnn_grad_stride(op, ctx_name, inputs, outputs): def local_gpua_avg_pool_dnn_grad_stride(op, ctx_name, inputs, outputs):
if not dnn_available(ctx_name): if not dnn_available(ctx_name):
raise_no_cudnn() return
if not op.ignore_border: if not op.ignore_border:
return return
inp, out_grad, ws, stride, pad = inputs inp, out_grad, ws, stride, pad = inputs
...@@ -3018,7 +3013,7 @@ pool_db2.register("local_gpua_avg_pool_dnn_grad_stride", ...@@ -3018,7 +3013,7 @@ pool_db2.register("local_gpua_avg_pool_dnn_grad_stride",
def local_softmax_dnn(node): def local_softmax_dnn(node):
if isinstance(node.op, GpuSoftmax): if isinstance(node.op, GpuSoftmax):
if not dnn_available(node.outputs[0].type.context_name): if not dnn_available(node.outputs[0].type.context_name):
raise_no_cudnn() return
ins = node.inputs[0].dimshuffle(0, 1, 'x', 'x') ins = node.inputs[0].dimshuffle(0, 1, 'x', 'x')
ins = gpu_contiguous(ins) ins = gpu_contiguous(ins)
out = GpuDnnSoftmax('accurate', 'channel')(ins) out = GpuDnnSoftmax('accurate', 'channel')(ins)
...@@ -3035,9 +3030,6 @@ def local_log_softmax_dnn(node): ...@@ -3035,9 +3030,6 @@ def local_log_softmax_dnn(node):
node.inputs[0].owner and node.inputs[0].owner and
isinstance(node.inputs[0].owner.op, GpuDnnSoftmax) and isinstance(node.inputs[0].owner.op, GpuDnnSoftmax) and
len(node.inputs[0].clients) == 1): len(node.inputs[0].clients) == 1):
if version(raises=False) < 3000:
# No log-softmax before cudnn v3
raise_no_cudnn("Need cuDNN v3 for LogSoftmax")
softmax_node = node.inputs[0].owner softmax_node = node.inputs[0].owner
new_softmax = GpuDnnSoftmax('log', softmax_node.op.mode) new_softmax = GpuDnnSoftmax('log', softmax_node.op.mode)
return [new_softmax(softmax_node.inputs[0])] return [new_softmax(softmax_node.inputs[0])]
...@@ -3051,9 +3043,8 @@ def local_gpua_logsoftmax_to_dnn(op, ctx_name, inputs, outputs): ...@@ -3051,9 +3043,8 @@ def local_gpua_logsoftmax_to_dnn(op, ctx_name, inputs, outputs):
inp = inputs[0] inp = inputs[0]
if inp.ndim != 2: if inp.ndim != 2:
return return
if not dnn_available(ctx_name) or version(raises=False) < 3000: if not dnn_available(ctx_name):
# No log-softmax before cudnn v3 return
raise_no_cudnn("Need cuDNN v3 for LogSoftmax")
inp = inp.dimshuffle(0, 1, 'x', 'x') inp = inp.dimshuffle(0, 1, 'x', 'x')
inp.tag.context_name = ctx_name inp.tag.context_name = ctx_name
...@@ -3087,7 +3078,7 @@ gpu_seqopt.register("NoCuDNNRaise", NoCuDNNRaise(), 0, 'cudnn') ...@@ -3087,7 +3078,7 @@ gpu_seqopt.register("NoCuDNNRaise", NoCuDNNRaise(), 0, 'cudnn')
@register_opt2([SoftmaxGrad], 'cudnn', 'fast_compile') @register_opt2([SoftmaxGrad], 'cudnn', 'fast_compile')
def local_gpua_softmax_dnn_grad(op, ctx_name, inputs, outputs): def local_gpua_softmax_dnn_grad(op, ctx_name, inputs, outputs):
if not dnn_available(ctx_name): if not dnn_available(ctx_name):
raise_no_cudnn("cuDNN needed for SoftmaxGrad") return
ins = [] ins = []
for n in inputs: for n in inputs:
n = as_gpuarray_variable(n, ctx_name) n = as_gpuarray_variable(n, ctx_name)
...@@ -3100,9 +3091,6 @@ def local_gpua_softmax_dnn_grad(op, ctx_name, inputs, outputs): ...@@ -3100,9 +3091,6 @@ def local_gpua_softmax_dnn_grad(op, ctx_name, inputs, outputs):
return [out.dimshuffle(0, 2)] return [out.dimshuffle(0, 2)]
@register_opt('cudnn', 'fast_compile')
@op_lifter([bn.AbstractBatchNormTrain])
@register_opt2([bn.AbstractBatchNormTrain], 'cudnn', 'fast_compile')
def local_abstract_batch_norm_train_cudnn(op, ctx_name, inputs, outputs): def local_abstract_batch_norm_train_cudnn(op, ctx_name, inputs, outputs):
x, scale, bias, epsilon, running_average_factor = inputs[:5] x, scale, bias, epsilon, running_average_factor = inputs[:5]
running_mean = inputs[5] if len(inputs) > 5 else None running_mean = inputs[5] if len(inputs) > 5 else None
...@@ -3130,8 +3118,7 @@ def local_abstract_batch_norm_train_cudnn(op, ctx_name, inputs, outputs): ...@@ -3130,8 +3118,7 @@ def local_abstract_batch_norm_train_cudnn(op, ctx_name, inputs, outputs):
ctx = infer_context_name(*inputs) ctx = infer_context_name(*inputs)
if not dnn_available(ctx): if not dnn_available(ctx):
# TODO should this raise_no_cudnn? return
return None
x = as_gpuarray_variable(x, context_name=ctx) x = as_gpuarray_variable(x, context_name=ctx)
scale = as_gpuarray_variable(scale, context_name=ctx) scale = as_gpuarray_variable(scale, context_name=ctx)
bias = as_gpuarray_variable(bias, context_name=ctx) bias = as_gpuarray_variable(bias, context_name=ctx)
...@@ -3186,9 +3173,6 @@ def local_batch_norm_inference_inplace(node): ...@@ -3186,9 +3173,6 @@ def local_batch_norm_inference_inplace(node):
return [GpuDnnBatchNormInference(mode=node.op.mode, inplace=True)(*node.inputs)] return [GpuDnnBatchNormInference(mode=node.op.mode, inplace=True)(*node.inputs)]
@register_opt('cudnn', 'fast_compile')
@op_lifter([bn.AbstractBatchNormTrainGrad])
@register_opt2([bn.AbstractBatchNormTrainGrad], 'cudnn', 'fast_compile')
def local_abstract_batch_norm_train_grad_cudnn(op, ctx_name, inputs, outputs): def local_abstract_batch_norm_train_grad_cudnn(op, ctx_name, inputs, outputs):
x, dy, scale, x_mean, x_invstd, epsilon = inputs x, dy, scale, x_mean, x_invstd, epsilon = inputs
...@@ -3234,8 +3218,7 @@ def local_abstract_batch_norm_train_grad_cudnn(op, ctx_name, inputs, outputs): ...@@ -3234,8 +3218,7 @@ def local_abstract_batch_norm_train_grad_cudnn(op, ctx_name, inputs, outputs):
ctx = infer_context_name(*inputs) ctx = infer_context_name(*inputs)
if not dnn_available(ctx): if not dnn_available(ctx):
# TODO should this raise_no_cudnn? return
return None
x = as_gpuarray_variable(x, context_name=ctx) x = as_gpuarray_variable(x, context_name=ctx)
dy = as_gpuarray_variable(dy, context_name=ctx) dy = as_gpuarray_variable(dy, context_name=ctx)
scale = as_gpuarray_variable(scale, context_name=ctx) scale = as_gpuarray_variable(scale, context_name=ctx)
...@@ -3257,9 +3240,6 @@ def local_abstract_batch_norm_train_grad_cudnn(op, ctx_name, inputs, outputs): ...@@ -3257,9 +3240,6 @@ def local_abstract_batch_norm_train_grad_cudnn(op, ctx_name, inputs, outputs):
return [g_wrt_inputs, g_wrt_scale, g_wrt_bias] return [g_wrt_inputs, g_wrt_scale, g_wrt_bias]
@register_opt('cudnn', 'fast_compile')
@op_lifter([bn.AbstractBatchNormInference])
@register_opt2([bn.AbstractBatchNormInference], 'cudnn', 'fast_compile')
def local_abstract_batch_norm_inference_cudnn(op, ctx_name, inputs, outputs): def local_abstract_batch_norm_inference_cudnn(op, ctx_name, inputs, outputs):
x, scale, bias, estimated_mean, estimated_variance, epsilon = inputs x, scale, bias, estimated_mean, estimated_variance, epsilon = inputs
...@@ -3280,8 +3260,7 @@ def local_abstract_batch_norm_inference_cudnn(op, ctx_name, inputs, outputs): ...@@ -3280,8 +3260,7 @@ def local_abstract_batch_norm_inference_cudnn(op, ctx_name, inputs, outputs):
ctx = infer_context_name(*inputs) ctx = infer_context_name(*inputs)
if not dnn_available(ctx): if not dnn_available(ctx):
# TODO should this raise_no_cudnn? return
return None
x = as_gpuarray_variable(x, context_name=ctx) x = as_gpuarray_variable(x, context_name=ctx)
scale = as_gpuarray_variable(scale, context_name=ctx) scale = as_gpuarray_variable(scale, context_name=ctx)
bias = as_gpuarray_variable(bias, context_name=ctx) bias = as_gpuarray_variable(bias, context_name=ctx)
......
...@@ -22,6 +22,7 @@ from theano.scalar.basic import Scalar, Pow, Cast ...@@ -22,6 +22,7 @@ from theano.scalar.basic import Scalar, Pow, Cast
from theano.scalar.basic_scipy import Erfinv, Erfcinv from theano.scalar.basic_scipy import Erfinv, Erfcinv
from theano.scan_module import scan_utils, scan_op, scan_opt from theano.scan_module import scan_utils, scan_op, scan_opt
from theano.tensor.nnet import bn
from theano.tensor.nnet.conv import ConvOp from theano.tensor.nnet.conv import ConvOp
from theano.tensor.nnet.blocksparse import SparseBlockGemv, SparseBlockOuter from theano.tensor.nnet.blocksparse import SparseBlockGemv, SparseBlockOuter
from theano.tensor.nnet.abstract_conv import (BaseAbstractConv, from theano.tensor.nnet.abstract_conv import (BaseAbstractConv,
...@@ -1964,9 +1965,8 @@ abstractconv_groupopt = theano.gof.optdb.LocalGroupDB() ...@@ -1964,9 +1965,8 @@ abstractconv_groupopt = theano.gof.optdb.LocalGroupDB()
abstractconv_groupopt.__name__ = "gpuarray_abstractconv_opts" abstractconv_groupopt.__name__ = "gpuarray_abstractconv_opts"
register_opt('fast_compile')(abstractconv_groupopt) register_opt('fast_compile')(abstractconv_groupopt)
# cuDNN is first, but only registered if cuDNN is available. # We import these opts here instead of at the top of this file
# (we import these opts here instead of at the top of this file # to avoid a circular dependency problem with dnn
# to avoid a circular dependency problem with dnn)
from .dnn import (local_abstractconv_cudnn, local_abstractconv_gw_cudnn, from .dnn import (local_abstractconv_cudnn, local_abstractconv_gw_cudnn,
local_abstractconv_gi_cudnn) # noqa: 402 local_abstractconv_gi_cudnn) # noqa: 402
abstractconv_groupopt.register('local_abstractconv_dnn', abstractconv_groupopt.register('local_abstractconv_dnn',
...@@ -2005,3 +2005,56 @@ abstractconv_groupopt.register('local_abstractconv3d_gradinputs', ...@@ -2005,3 +2005,56 @@ abstractconv_groupopt.register('local_abstractconv3d_gradinputs',
local_abstractconv3d_gradinputs_gemm, 30, local_abstractconv3d_gradinputs_gemm, 30,
'conv_gemm', 'conv_gemm',
'gpuarray', 'fast_compile', 'fast_run') 'gpuarray', 'fast_compile', 'fast_run')
# Register cuDNN batch normalization implementation
# We import these opts here instead of at the top of this file
# to avoid a circular dependency problem with dnn
from .dnn import (local_abstract_batch_norm_train_cudnn,
local_abstract_batch_norm_train_grad_cudnn,
local_abstract_batch_norm_inference_cudnn) # noqa: 402
abstract_batch_norm_groupopt = theano.gof.optdb.LocalGroupDB()
abstract_batch_norm_groupopt.__name__ = "gpuarray_batchnorm_opts"
register_opt('fast_compile')(abstract_batch_norm_groupopt)
abstract_batch_norm_db = LocalGroupDB()
abstract_batch_norm_db2 = LocalGroupDB(
local_opt=theano.gof.opt.GraphToGPULocalOptGroup)
abstract_batch_norm_db2.__name__ = "abstract_batch_norm_db2"
register_opt('fast_compile', name='abstract_batch_norm_db')(
abstract_batch_norm_db)
register_opt2([bn.AbstractBatchNormTrain,
bn.AbstractBatchNormTrainGrad,
bn.AbstractBatchNormInference],
'fast_compile', name='abstract_batch_norm_db2')(
abstract_batch_norm_db2)
for op, fct, cpu in [(bn.AbstractBatchNormTrain,
local_abstract_batch_norm_train_cudnn,
bn.local_abstract_batch_norm_train),
(bn.AbstractBatchNormTrainGrad,
local_abstract_batch_norm_train_grad_cudnn,
bn.local_abstract_batch_norm_train_grad),
(bn.AbstractBatchNormInference,
local_abstract_batch_norm_inference_cudnn,
bn.local_abstract_batch_norm_inference)]:
lifter = op_lifter([op])(fct)
abstract_batch_norm_db.register(fct.__name__,
lifter,
'gpuarray', 'fast_compile', 'fast_run',
'cudnn', 'batchnorm_dnn',
position=1)
abstract_batch_norm_db2.register(fct.__name__,
local_optimizer([op])(fct),
'gpuarray', 'fast_compile', 'fast_run',
'cudnn', 'batchnorm_dnn',
position=1)
# cpu is a normal optimization. We can't register it in
# GraphToGPU. So for now, only add it to the slower EQ phase. If
# there is no cuDNN, we still want to move it to the GPU now with
# a Theano graph so to have this graph on the GPU.
abstract_batch_norm_db.register(cpu.__name__, cpu,
'gpuarray', 'fast_compile', 'fast_run',
position='last')
...@@ -22,6 +22,7 @@ if theano.config.mode == 'FAST_COMPILE': ...@@ -22,6 +22,7 @@ if theano.config.mode == 'FAST_COMPILE':
else: else:
mode_with_gpu = theano.compile.mode.get_default_mode().including('gpuarray').excluding('gpu') mode_with_gpu = theano.compile.mode.get_default_mode().including('gpuarray').excluding('gpu')
mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpuarray') mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpuarray')
mode_without_gpu.check_py_code = False
# If using float16, cast reference input to float32 # If using float16, cast reference input to float32
......
...@@ -26,6 +26,10 @@ from .rnn_support import Model, GRU, LSTM, WrapperLayer ...@@ -26,6 +26,10 @@ from .rnn_support import Model, GRU, LSTM, WrapperLayer
from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_FWD from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_FWD
mode_with_gpu = mode_with_gpu.including()
# Globally disabled for mode_without_gpu
mode_with_gpu.check_py_code = False
# If using float16, set CUDNN precision to float32 # If using float16, set CUDNN precision to float32
def set_precision(floatX): def set_precision(floatX):
...@@ -1451,7 +1455,7 @@ def test_dnn_batchnorm_train(): ...@@ -1451,7 +1455,7 @@ def test_dnn_batchnorm_train():
bn.AbstractBatchNormTrainGrad)) for n bn.AbstractBatchNormTrainGrad)) for n
in f_abstract.maker.fgraph.toposort()]) in f_abstract.maker.fgraph.toposort()])
# run # run
for data_shape in ((5, 10, 30, 40, 10, 5), (4, 3, 1, 1, 1, 1), (1, 1, 5, 5, 5, 5)): for data_shape in ((5, 10, 30, 4, 10, 5), (4, 3, 1, 1, 1, 1), (2, 3, 5, 5, 5, 5)):
data_shape = data_shape[:ndim] data_shape = data_shape[:ndim]
param_shape = tuple(1 if d in axes else s param_shape = tuple(1 if d in axes else s
for d, s in enumerate(data_shape)) for d, s in enumerate(data_shape))
...@@ -1505,7 +1509,7 @@ def test_dnn_batchnorm_train_without_running_averages(): ...@@ -1505,7 +1509,7 @@ def test_dnn_batchnorm_train_without_running_averages():
bn.batch_normalization_train(x, scale, bias, 'per-activation') bn.batch_normalization_train(x, scale, bias, 'per-activation')
# backward pass # backward pass
grads_gpu = T.grad(None, wrt=[x, scale, bias], known_grads={out_gpu: dy}) grads_gpu = T.grad(None, wrt=[x, scale, bias], known_grads={out_gpu: dy})
grads_abstract = T.grad(None, wrt=[x, scale, bias], known_grads={out_gpu: dy}) grads_abstract = T.grad(None, wrt=[x, scale, bias], known_grads={out_abstract: dy})
# compile # compile
f_gpu = theano.function([x, scale, bias, dy], f_gpu = theano.function([x, scale, bias, dy],
[out_gpu, x_mean_gpu, x_invstd_gpu] + [out_gpu, x_mean_gpu, x_invstd_gpu] +
...@@ -1533,6 +1537,44 @@ def test_dnn_batchnorm_train_without_running_averages(): ...@@ -1533,6 +1537,44 @@ def test_dnn_batchnorm_train_without_running_averages():
f_abstract(X, Scale, Bias, Dy) f_abstract(X, Scale, Bias, Dy)
def test_without_dnn_batchnorm_train_without_running_averages():
# compile and run batch_normalization_train without running averages
# But disable cudnn and make sure it run on the GPU.
utt.seed_rng()
x, scale, bias, dy = T.tensor4('x'), T.tensor4('scale'), T.tensor4('bias'), T.tensor4('dy')
data_shape = (5, 10, 30, 25)
param_shape = (1, 10, 30, 25)
# forward pass
out_abstract, x_mean_abstract, x_invstd_abstract = \
bn.batch_normalization_train(x, scale, bias, 'per-activation')
# backward pass
grads_abstract = T.grad(None, wrt=[x, scale, bias], known_grads={out_abstract: dy})
# compile
f_abstract = theano.function([x, scale, bias, dy],
[out_abstract, x_mean_abstract, x_invstd_abstract] +
grads_abstract,
mode=mode_with_gpu.excluding('cudnn'))
# check if the abstract Ops have been replaced
assert not any([isinstance(n.op, dnn.GpuDnnBatchNorm)
for n in f_abstract.maker.fgraph.toposort()])
assert not any([isinstance(n.op, dnn.GpuDnnBatchNormGrad)
for n in f_abstract.maker.fgraph.toposort()])
assert not any([isinstance(n.op, (bn.AbstractBatchNormTrain,
bn.AbstractBatchNormInference,
bn.AbstractBatchNormTrainGrad))
for n in f_abstract.maker.fgraph.toposort()])
assert any([isinstance(n.op, dnn.GpuElemwise)
for n in f_abstract.maker.fgraph.toposort()])
# run
X = 4 + 3 * np.random.randn(*data_shape).astype(theano.config.floatX)
Dy = -1 + 2 * np.random.randn(*data_shape).astype(theano.config.floatX)
Scale = np.random.randn(*param_shape).astype(theano.config.floatX)
Bias = np.random.randn(*param_shape).astype(theano.config.floatX)
f_abstract(X, Scale, Bias, Dy)
def test_dnn_batchnorm_train_inplace(): def test_dnn_batchnorm_train_inplace():
# test inplace_running_mean and inplace_running_var # test inplace_running_mean and inplace_running_var
if not dnn.dnn_available(test_ctx_name): if not dnn.dnn_available(test_ctx_name):
...@@ -1628,7 +1670,7 @@ def test_batchnorm_inference(): ...@@ -1628,7 +1670,7 @@ def test_batchnorm_inference():
bn.AbstractBatchNormTrainGrad)) for n bn.AbstractBatchNormTrainGrad)) for n
in f_abstract.maker.fgraph.toposort()]) in f_abstract.maker.fgraph.toposort()])
# run # run
for data_shape in ((10, 20, 30, 40, 10, 5), (4, 3, 1, 1, 1, 1), (1, 1, 5, 5, 5, 5)): for data_shape in ((10, 2, 30, 4, 10, 5), (4, 3, 1, 1, 1, 1), (1, 1, 5, 5, 5, 5)):
data_shape = data_shape[:ndim] data_shape = data_shape[:ndim]
param_shape = tuple(1 if d in axes else s param_shape = tuple(1 if d in axes else s
for d, s in enumerate(data_shape)) for d, s in enumerate(data_shape))
......
...@@ -3705,6 +3705,19 @@ def local_gpu_batch_norm_inference_inplace(node): ...@@ -3705,6 +3705,19 @@ def local_gpu_batch_norm_inference_inplace(node):
inplace=True)(*node.inputs)] inplace=True)(*node.inputs)]
def values_eq_approx_high_tol(a, b):
"""
This fct is needed to don't have DebugMode raise useless
errors due to rounding error.
This happen as we reduce on the two last dimensions, so this
can raise the absolute error if the number of elements we
reduce on is significant.
"""
return tensor.TensorType.values_eq_approx(a, b, atol=0.015)
@local_optimizer([bn.AbstractBatchNormTrainGrad]) @local_optimizer([bn.AbstractBatchNormTrainGrad])
def local_abstract_batch_norm_train_grad_cudnn(node): def local_abstract_batch_norm_train_grad_cudnn(node):
if not isinstance(node.op, bn.AbstractBatchNormTrainGrad): if not isinstance(node.op, bn.AbstractBatchNormTrainGrad):
...@@ -3781,6 +3794,9 @@ def local_abstract_batch_norm_train_grad_cudnn(node): ...@@ -3781,6 +3794,9 @@ def local_abstract_batch_norm_train_grad_cudnn(node):
if isinstance(node.outputs[2].type, tensor.TensorType): if isinstance(node.outputs[2].type, tensor.TensorType):
g_wrt_bias = tensor.as_tensor_variable(g_wrt_bias) g_wrt_bias = tensor.as_tensor_variable(g_wrt_bias)
# TODO copy_stack_trace? # TODO copy_stack_trace?
g_wrt_inputs.tag.values_eq_approx = values_eq_approx_high_tol
g_wrt_scale.tag.values_eq_approx = values_eq_approx_high_tol
return [g_wrt_inputs, g_wrt_scale, g_wrt_bias] return [g_wrt_inputs, g_wrt_scale, g_wrt_bias]
......
...@@ -38,6 +38,8 @@ if theano.config.mode == 'FAST_COMPILE': ...@@ -38,6 +38,8 @@ if theano.config.mode == 'FAST_COMPILE':
else: else:
mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu') mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpu') mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpu')
mode_with_gpu.check_py_code = False
mode_without_gpu.check_py_code = False
def test_dnn_conv_desc_merge(): def test_dnn_conv_desc_merge():
...@@ -732,7 +734,7 @@ def test_batchnorm_train(): ...@@ -732,7 +734,7 @@ def test_batchnorm_train():
raise SkipTest("batch normalization requires cudnn v5+") raise SkipTest("batch normalization requires cudnn v5+")
utt.seed_rng() utt.seed_rng()
tensor6 = T.TensorType(theano.config.floatX, (False,) * 6) tensor6 = T.TensorType('float32', (False,) * 6)
for mode in ('per-activation', 'spatial'): for mode in ('per-activation', 'spatial'):
for vartype in (tensor6, T.ftensor5, T.ftensor4, T.ftensor3, T.fmatrix, T.fvector): for vartype in (tensor6, T.ftensor5, T.ftensor4, T.ftensor3, T.fmatrix, T.fvector):
...@@ -766,7 +768,7 @@ def test_batchnorm_train(): ...@@ -766,7 +768,7 @@ def test_batchnorm_train():
x_invstd_ref = T.inv(T.sqrt(x_var_ref + eps)) x_invstd_ref = T.inv(T.sqrt(x_var_ref + eps))
scale_ref = T.addbroadcast(scale, *axes) scale_ref = T.addbroadcast(scale, *axes)
bias_ref = T.addbroadcast(bias, *axes) bias_ref = T.addbroadcast(bias, *axes)
m = T.cast(T.prod(x.shape) / T.prod(scale.shape), theano.config.floatX) m = T.cast(T.prod(x.shape) / T.prod(scale.shape), 'float32')
out_ref = (x - x_mean_ref) * (scale_ref * x_invstd_ref) + bias_ref out_ref = (x - x_mean_ref) * (scale_ref * x_invstd_ref) + bias_ref
out_running_mean_ref = running_mean * (1 - running_average_factor) + \ out_running_mean_ref = running_mean * (1 - running_average_factor) + \
x_mean_ref * running_average_factor x_mean_ref * running_average_factor
...@@ -801,16 +803,16 @@ def test_batchnorm_train(): ...@@ -801,16 +803,16 @@ def test_batchnorm_train():
bn.AbstractBatchNormTrainGrad)) for n bn.AbstractBatchNormTrainGrad)) for n
in f_abstract.maker.fgraph.toposort()]) in f_abstract.maker.fgraph.toposort()])
# run # run
for data_shape in ((5, 10, 30, 40, 10, 5), (4, 3, 1, 1, 1, 1), (1, 1, 5, 5, 5, 5)): for data_shape in ((5, 2, 30, 4, 10, 5), (4, 3, 1, 1, 1, 1), (2, 3, 5, 5, 5, 5)):
data_shape = data_shape[:ndim] data_shape = data_shape[:ndim]
param_shape = tuple(1 if d in axes else s param_shape = tuple(1 if d in axes else s
for d, s in enumerate(data_shape)) for d, s in enumerate(data_shape))
X = 4 + 3 * numpy.random.randn(*data_shape).astype(theano.config.floatX) X = 4 + 3 * numpy.random.randn(*data_shape).astype('float32')
Dy = -1 + 2 * numpy.random.randn(*data_shape).astype(theano.config.floatX) Dy = -1 + 2 * numpy.random.randn(*data_shape).astype('float32')
Scale = numpy.random.randn(*param_shape).astype(theano.config.floatX) Scale = numpy.random.randn(*param_shape).astype('float32')
Bias = numpy.random.randn(*param_shape).astype(theano.config.floatX) Bias = numpy.random.randn(*param_shape).astype('float32')
Running_mean = numpy.random.randn(*param_shape).astype(theano.config.floatX) Running_mean = numpy.random.randn(*param_shape).astype('float32')
Running_var = numpy.random.randn(*param_shape).astype(theano.config.floatX) Running_var = numpy.random.randn(*param_shape).astype('float32')
outputs_gpu = f_gpu(X, Scale, Bias, Running_mean, Running_var, Dy) outputs_gpu = f_gpu(X, Scale, Bias, Running_mean, Running_var, Dy)
outputs_abstract = f_abstract(X, Scale, Bias, Running_mean, Running_var, Dy) outputs_abstract = f_abstract(X, Scale, Bias, Running_mean, Running_var, Dy)
outputs_ref = f_ref(X, Scale, Bias, Running_mean, Running_var, Dy) outputs_ref = f_ref(X, Scale, Bias, Running_mean, Running_var, Dy)
...@@ -844,7 +846,7 @@ def test_dnn_batchnorm_train_without_running_averages(): ...@@ -844,7 +846,7 @@ def test_dnn_batchnorm_train_without_running_averages():
raise SkipTest("batch normalization requires cudnn v5+") raise SkipTest("batch normalization requires cudnn v5+")
utt.seed_rng() utt.seed_rng()
x, scale, bias, dy = T.tensor4('x'), T.tensor4('scale'), T.tensor4('bias'), T.tensor4('dy') x, scale, bias, dy = T.ftensor4('x'), T.ftensor4('scale'), T.ftensor4('bias'), T.ftensor4('dy')
data_shape = (5, 10, 30, 25) data_shape = (5, 10, 30, 25)
param_shape = (1, 10, 30, 25) param_shape = (1, 10, 30, 25)
...@@ -875,10 +877,10 @@ def test_dnn_batchnorm_train_without_running_averages(): ...@@ -875,10 +877,10 @@ def test_dnn_batchnorm_train_without_running_averages():
bn.AbstractBatchNormTrainGrad)) bn.AbstractBatchNormTrainGrad))
for n in f_abstract.maker.fgraph.toposort()]) for n in f_abstract.maker.fgraph.toposort()])
# run # run
X = 4 + 3 * numpy.random.randn(*data_shape).astype(theano.config.floatX) X = 4 + 3 * numpy.random.randn(*data_shape).astype('float32')
Dy = -1 + 2 * numpy.random.randn(*data_shape).astype(theano.config.floatX) Dy = -1 + 2 * numpy.random.randn(*data_shape).astype('float32')
Scale = numpy.random.randn(*param_shape).astype(theano.config.floatX) Scale = numpy.random.randn(*param_shape).astype('float32')
Bias = numpy.random.randn(*param_shape).astype(theano.config.floatX) Bias = numpy.random.randn(*param_shape).astype('float32')
f_gpu(X, Scale, Bias, Dy) f_gpu(X, Scale, Bias, Dy)
f_abstract(X, Scale, Bias, Dy) f_abstract(X, Scale, Bias, Dy)
...@@ -891,14 +893,14 @@ def test_dnn_batchnorm_train_inplace(): ...@@ -891,14 +893,14 @@ def test_dnn_batchnorm_train_inplace():
raise SkipTest("batch normalization requires cudnn v5+") raise SkipTest("batch normalization requires cudnn v5+")
utt.seed_rng() utt.seed_rng()
x, scale, bias = T.tensor4('x'), T.tensor4('scale'), T.tensor4('bias') x, scale, bias = T.ftensor4('x'), T.ftensor4('scale'), T.ftensor4('bias')
data_shape = (5, 10, 30, 25) data_shape = (5, 10, 30, 25)
param_shape = (1, 10, 30, 25) param_shape = (1, 10, 30, 25)
running_mean = shared( running_mean = shared(
numpy.random.randn(*param_shape).astype(theano.config.floatX), numpy.random.randn(*param_shape).astype('float32'),
broadcastable=(True, False, False, False)) broadcastable=(True, False, False, False))
running_var = shared( running_var = shared(
numpy.random.randn(*param_shape).astype(theano.config.floatX), numpy.random.randn(*param_shape).astype('float32'),
broadcastable=(True, False, False, False)) broadcastable=(True, False, False, False))
# forward pass # forward pass
...@@ -923,9 +925,9 @@ def test_dnn_batchnorm_train_inplace(): ...@@ -923,9 +925,9 @@ def test_dnn_batchnorm_train_inplace():
assert nodes[0].op.inplace_running_var assert nodes[0].op.inplace_running_var
assert nodes[0].op.inplace_output assert nodes[0].op.inplace_output
# run # run
X = 4 + 3 * numpy.random.randn(*data_shape).astype(theano.config.floatX) X = 4 + 3 * numpy.random.randn(*data_shape).astype('float32')
Scale = numpy.random.randn(*param_shape).astype(theano.config.floatX) Scale = numpy.random.randn(*param_shape).astype('float32')
Bias = numpy.random.randn(*param_shape).astype(theano.config.floatX) Bias = numpy.random.randn(*param_shape).astype('float32')
f(X, Scale, Bias) f(X, Scale, Bias)
...@@ -936,10 +938,10 @@ def test_batchnorm_inference(): ...@@ -936,10 +938,10 @@ def test_batchnorm_inference():
raise SkipTest("batch normalization requires cudnn v5+") raise SkipTest("batch normalization requires cudnn v5+")
utt.seed_rng() utt.seed_rng()
tensor6 = T.TensorType(theano.config.floatX, (False,) * 6) tensor6 = T.TensorType('float32', (False,) * 6)
for mode in ('per-activation', 'spatial'): for mode in ('per-activation', 'spatial'):
for vartype in (tensor6, T.tensor5, T.tensor4, T.tensor3, T.matrix, T.vector): for vartype in (tensor6, T.ftensor5, T.ftensor4, T.ftensor3, T.fmatrix, T.fvector):
x, scale, bias, mean, var = (vartype(n) x, scale, bias, mean, var = (vartype(n)
for n in ('x', 'scale', 'bias', 'mean', 'var')) for n in ('x', 'scale', 'bias', 'mean', 'var'))
ndim = x.ndim ndim = x.ndim
...@@ -980,16 +982,16 @@ def test_batchnorm_inference(): ...@@ -980,16 +982,16 @@ def test_batchnorm_inference():
bn.AbstractBatchNormTrainGrad)) for n bn.AbstractBatchNormTrainGrad)) for n
in f_abstract.maker.fgraph.toposort()]) in f_abstract.maker.fgraph.toposort()])
# run # run
for data_shape in ((10, 20, 30, 40, 10, 5), (4, 3, 1, 1, 1, 1), (1, 1, 5, 5, 5, 5)): for data_shape in ((10, 2, 15, 4, 6, 5), (4, 3, 1, 1, 1, 1), (1, 1, 5, 5, 5, 5)):
data_shape = data_shape[:ndim] data_shape = data_shape[:ndim]
param_shape = tuple(1 if d in axes else s param_shape = tuple(1 if d in axes else s
for d, s in enumerate(data_shape)) for d, s in enumerate(data_shape))
X = 4 + 3 * numpy.random.randn(*data_shape).astype(theano.config.floatX) X = 4 + 3 * numpy.random.randn(*data_shape).astype('float32')
Dy = -1 + 2 * numpy.random.randn(*data_shape).astype(theano.config.floatX) Dy = -1 + 2 * numpy.random.randn(*data_shape).astype('float32')
Scale = numpy.random.randn(*param_shape).astype(theano.config.floatX) Scale = numpy.random.randn(*param_shape).astype('float32')
Bias = numpy.random.randn(*param_shape).astype(theano.config.floatX) Bias = numpy.random.randn(*param_shape).astype('float32')
Mean = numpy.random.randn(*param_shape).astype(theano.config.floatX) Mean = numpy.random.randn(*param_shape).astype('float32')
Var = numpy.random.rand(*param_shape).astype(theano.config.floatX) Var = numpy.random.rand(*param_shape).astype('float32')
outputs_gpu = f_gpu(X, Scale, Bias, Mean, Var, Dy) outputs_gpu = f_gpu(X, Scale, Bias, Mean, Var, Dy)
outputs_abstract = f_abstract(X, Scale, Bias, Mean, Var, Dy) outputs_abstract = f_abstract(X, Scale, Bias, Mean, Var, Dy)
outputs_ref = f_ref(X, Scale, Bias, Mean, Var, Dy) outputs_ref = f_ref(X, Scale, Bias, Mean, Var, Dy)
...@@ -1017,7 +1019,7 @@ def test_batchnorm_inference_inplace(): ...@@ -1017,7 +1019,7 @@ def test_batchnorm_inference_inplace():
raise SkipTest("batch normalization requires cudnn v5+") raise SkipTest("batch normalization requires cudnn v5+")
utt.seed_rng() utt.seed_rng()
x, scale, bias, mean, var = (T.tensor4(n) for n in ('x', 'scale', 'bias', 'mean', 'var')) x, scale, bias, mean, var = (T.ftensor4(n) for n in ('x', 'scale', 'bias', 'mean', 'var'))
data_shape = (5, 10, 30, 25) data_shape = (5, 10, 30, 25)
param_shape = (1, 10, 30, 25) param_shape = (1, 10, 30, 25)
...@@ -1031,11 +1033,11 @@ def test_batchnorm_inference_inplace(): ...@@ -1031,11 +1033,11 @@ def test_batchnorm_inference_inplace():
assert nodes[0].op.inplace assert nodes[0].op.inplace
# run # run
X = 4 + 3 * numpy.random.randn(*data_shape).astype(theano.config.floatX) X = 4 + 3 * numpy.random.randn(*data_shape).astype('float32')
Scale = numpy.random.randn(*param_shape).astype(theano.config.floatX) Scale = numpy.random.randn(*param_shape).astype('float32')
Bias = numpy.random.randn(*param_shape).astype(theano.config.floatX) Bias = numpy.random.randn(*param_shape).astype('float32')
Mean = numpy.random.randn(*param_shape).astype(theano.config.floatX) Mean = numpy.random.randn(*param_shape).astype('float32')
Var = numpy.random.rand(*param_shape).astype(theano.config.floatX) Var = numpy.random.rand(*param_shape).astype('float32')
f(X, Scale, Bias, Mean, Var) f(X, Scale, Bias, Mean, Var)
...@@ -1045,7 +1047,7 @@ def test_dnn_batchnorm_valid_and_invalid_axes(): ...@@ -1045,7 +1047,7 @@ def test_dnn_batchnorm_valid_and_invalid_axes():
if cuda.dnn.version() < (5000, 5000): if cuda.dnn.version() < (5000, 5000):
raise SkipTest("batch normalization requires cudnn v5+") raise SkipTest("batch normalization requires cudnn v5+")
for vartype in (T.tensor5, T.tensor4, T.tensor3, T.matrix): for vartype in (T.ftensor5, T.ftensor4, T.ftensor3, T.fmatrix):
x, scale, bias, mean, var, dy = (vartype(n) x, scale, bias, mean, var, dy = (vartype(n)
for n in ('x', 'scale', 'bias', 'mean', 'var', 'dy')) for n in ('x', 'scale', 'bias', 'mean', 'var', 'dy'))
ndim = x.ndim ndim = x.ndim
......
...@@ -88,6 +88,14 @@ def upcast(dtype, *dtypes): ...@@ -88,6 +88,14 @@ def upcast(dtype, *dtypes):
return rval return rval
def as_common_dtype(*vars):
"""
For for theano.scalar.Scalar and TensorVariable.
"""
dtype = upcast(*[v.dtype for v in vars])
return (v.astype(dtype) for v in vars)
def get_scalar_type(dtype): def get_scalar_type(dtype):
""" """
Return a Scalar(dtype) object. Return a Scalar(dtype) object.
......
...@@ -7,7 +7,7 @@ from theano.gof.opt import copy_stack_trace ...@@ -7,7 +7,7 @@ from theano.gof.opt import copy_stack_trace
from theano.tensor import as_tensor_variable, TensorType from theano.tensor import as_tensor_variable, TensorType
from theano.tensor import basic as T from theano.tensor import basic as T
from theano.tensor.opt import register_specialize_device from theano.tensor.opt import register_specialize_device
from theano.scalar import Composite from theano.scalar import Composite, as_common_dtype
from theano.scalar import add, sub, true_div, mul from theano.scalar import add, sub, true_div, mul
...@@ -413,15 +413,27 @@ class AbstractBatchNormTrain(Op): ...@@ -413,15 +413,27 @@ class AbstractBatchNormTrain(Op):
def make_node(self, x, scale, bias, epsilon=1e-4, def make_node(self, x, scale, bias, epsilon=1e-4,
running_average_factor=0.1, running_average_factor=0.1,
running_mean=None, running_var=None): running_mean=None, running_var=None):
x = as_tensor_variable(x)
scale = as_tensor_variable(scale)
bias = as_tensor_variable(bias)
epsilon = as_tensor_variable(epsilon)
running_average_factor = as_tensor_variable(running_average_factor)
if running_mean is not None:
running_mean = as_tensor_variable(running_mean)
if running_var is not None:
running_var = as_tensor_variable(running_var)
assert x.ndim == scale.ndim == bias.ndim assert x.ndim == scale.ndim == bias.ndim
assert ((running_mean is None and running_var is None) or assert ((running_mean is None and running_var is None) or
(running_mean is not None and running_var is not None)) (running_mean is not None and running_var is not None))
assert (running_mean is None or running_mean.ndim == x.ndim) assert (running_mean is None or running_mean.ndim == x.ndim)
assert (running_var is None or running_var.ndim == x.ndim) assert (running_var is None or running_var.ndim == x.ndim)
if not isinstance(epsilon, theano.Variable): # Upcast to common dtype on the non-scalar
epsilon = as_tensor_variable(epsilon) # Keep as is dtype of scalar (epsilon and running_average_factor)
if not isinstance(running_average_factor, theano.Variable): if running_mean:
running_average_factor = as_tensor_variable(running_average_factor) x, scale, bias, running_mean, running_var = as_common_dtype(
x, scale, bias, running_mean, running_var)
else:
x, scale, bias = as_common_dtype(x, scale, bias)
inputs = [x, scale, bias, epsilon, running_average_factor] inputs = [x, scale, bias, epsilon, running_average_factor]
output_types = [x.type(), scale.type(), scale.type()] output_types = [x.type(), scale.type(), scale.type()]
if running_mean is not None and running_var is not None: if running_mean is not None and running_var is not None:
...@@ -513,9 +525,18 @@ class AbstractBatchNormInference(Op): ...@@ -513,9 +525,18 @@ class AbstractBatchNormInference(Op):
return [shape[0]] return [shape[0]]
def make_node(self, x, scale, bias, estimated_mean, estimated_variance, epsilon=1e-4): def make_node(self, x, scale, bias, estimated_mean, estimated_variance, epsilon=1e-4):
x = as_tensor_variable(x)
scale = as_tensor_variable(scale)
bias = as_tensor_variable(bias)
estimated_mean = as_tensor_variable(estimated_mean)
estimated_variance = as_tensor_variable(estimated_variance)
epsilon = as_tensor_variable(epsilon)
# Upcast to common dtype on the non-scalar
# Keep as is dtype of scalar (epsilon)
x, scale, bias, estimated_mean, estimated_variance = as_common_dtype(
x, scale, bias, estimated_mean, estimated_variance)
assert x.ndim == scale.ndim == bias.ndim == estimated_mean.ndim == estimated_variance.ndim assert x.ndim == scale.ndim == bias.ndim == estimated_mean.ndim == estimated_variance.ndim
if not isinstance(epsilon, theano.Variable):
epsilon = as_tensor_variable(epsilon)
return Apply(self, [x, scale, bias, estimated_mean, estimated_variance, epsilon], [x.type()]) return Apply(self, [x, scale, bias, estimated_mean, estimated_variance, epsilon], [x.type()])
def grad(self, inputs, grads): def grad(self, inputs, grads):
...@@ -561,9 +582,18 @@ class AbstractBatchNormTrainGrad(Op): ...@@ -561,9 +582,18 @@ class AbstractBatchNormTrainGrad(Op):
self.axes = axes self.axes = axes
def make_node(self, x, dy, scale, x_mean, x_invstd, epsilon=1e-4): def make_node(self, x, dy, scale, x_mean, x_invstd, epsilon=1e-4):
x = as_tensor_variable(x)
dy = as_tensor_variable(dy)
scale = as_tensor_variable(scale)
x_mean = as_tensor_variable(x_mean)
x_invstd = as_tensor_variable(x_invstd)
epsilon = as_tensor_variable(epsilon)
# Upcast to common dtype on the non-scalar
# Keep as is dtype of scalar (epsilon)
x, dy, scale, x_mean, x_invstd = as_common_dtype(
x, dy, scale, x_mean, x_invstd)
assert x.ndim == dy.ndim == scale.ndim == x_mean.ndim == x_invstd.ndim assert x.ndim == dy.ndim == scale.ndim == x_mean.ndim == x_invstd.ndim
if not isinstance(epsilon, theano.Variable):
epsilon = as_tensor_variable(epsilon)
return Apply(self, [x, dy, scale, x_mean, x_invstd, epsilon], return Apply(self, [x, dy, scale, x_mean, x_invstd, epsilon],
[x.type(), scale.type(), scale.type()]) [x.type(), scale.type(), scale.type()])
...@@ -612,6 +642,9 @@ def local_abstract_batch_norm_train(node): ...@@ -612,6 +642,9 @@ def local_abstract_batch_norm_train(node):
mean = x.mean(axes, keepdims=True) mean = x.mean(axes, keepdims=True)
var = x.var(axes, keepdims=True) var = x.var(axes, keepdims=True)
# The epsilon should not upcast the dtype.
if var.dtype == 'float32' and epsilon.dtype == 'float64':
epsilon = epsilon.astype('float32')
invstd = T.inv(T.sqrt(var + epsilon)) invstd = T.inv(T.sqrt(var + epsilon))
out = (x - mean) * (scale * invstd) + bias out = (x - mean) * (scale * invstd) + bias
results = [out, mean, invstd] results = [out, mean, invstd]
...@@ -687,6 +720,10 @@ def local_abstract_batch_norm_inference(node): ...@@ -687,6 +720,10 @@ def local_abstract_batch_norm_inference(node):
not isinstance(epsilon.type, TensorType): not isinstance(epsilon.type, TensorType):
return None return None
# The epsilon should not upcast the dtype.
if estimated_variance.dtype == 'float32' and epsilon.dtype == 'float64':
epsilon = epsilon.astype('float32')
result = (x - estimated_mean) * (scale / T.sqrt(estimated_variance + epsilon)) + bias result = (x - estimated_mean) * (scale / T.sqrt(estimated_variance + epsilon)) + bias
result = T.patternbroadcast(result, node.outputs[0].broadcastable) result = T.patternbroadcast(result, node.outputs[0].broadcastable)
......
...@@ -201,7 +201,7 @@ def test_batch_normalization_train(): ...@@ -201,7 +201,7 @@ def test_batch_normalization_train():
bn.AbstractBatchNormTrainGrad)) bn.AbstractBatchNormTrainGrad))
for n in f.maker.fgraph.toposort()]) for n in f.maker.fgraph.toposort()])
# run # run
for data_shape in ((5, 10, 30, 40, 10), (4, 3, 1, 1, 1), (1, 1, 5, 5, 5)): for data_shape in ((5, 10, 30, 40, 10), (4, 3, 1, 1, 1), (2, 3, 5, 5, 5)):
data_shape = data_shape[:ndim] data_shape = data_shape[:ndim]
param_shape = tuple(1 if d in axes2 else s param_shape = tuple(1 if d in axes2 else s
for d, s in enumerate(data_shape)) for d, s in enumerate(data_shape))
......
...@@ -203,9 +203,10 @@ class TensorType(Type): ...@@ -203,9 +203,10 @@ class TensorType(Type):
""" """
Convert a symbolic Variable into a TensorType, if compatible. Convert a symbolic Variable into a TensorType, if compatible.
For the moment, only a TensorType or CudaNdarrayType will be For the moment, only a TensorType, GpuArrayType and
converted, provided they have the same number of dimensions, CudaNdarrayType will be
broadcastable pattern, and dtype. converted, provided they have the same number of dimensions and
dtype and have "compatible" broadcastable pattern.
""" """
if hasattr(other, '_as_TensorVariable'): if hasattr(other, '_as_TensorVariable'):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论