提交 51a2a510 authored 作者: Gijs van Tulder's avatar Gijs van Tulder

Now also run 6d+ batch norm on cuDNN.

上级 38e40c70
...@@ -2451,9 +2451,6 @@ def dnn_batch_normalization_train(inputs, gamma, beta, mode='per-activation', ...@@ -2451,9 +2451,6 @@ def dnn_batch_normalization_train(inputs, gamma, beta, mode='per-activation',
For 5d tensors, the axes are (0, 2, 3, 4). For 5d tensors, the axes are (0, 2, 3, 4).
""" """
ndim = inputs.ndim ndim = inputs.ndim
if ndim > 5:
raise ValueError("dnn_batch_normalization_train currently supports "
"up to 5-dimensional tensors only, got %d" % ndim)
if gamma.ndim != ndim or beta.ndim != ndim: if gamma.ndim != ndim or beta.ndim != ndim:
raise ValueError("gamma and beta must be of the same dimensionality " raise ValueError("gamma and beta must be of the same dimensionality "
"as inputs; got %d and %d instead of %d" % "as inputs; got %d and %d instead of %d" %
...@@ -2465,11 +2462,21 @@ def dnn_batch_normalization_train(inputs, gamma, beta, mode='per-activation', ...@@ -2465,11 +2462,21 @@ def dnn_batch_normalization_train(inputs, gamma, beta, mode='per-activation',
inputs = theano.tensor.shape_padright(inputs, 4 - ndim) inputs = theano.tensor.shape_padright(inputs, 4 - ndim)
gamma = theano.tensor.shape_padright(gamma, 4 - ndim) gamma = theano.tensor.shape_padright(gamma, 4 - ndim)
beta = theano.tensor.shape_padright(beta, 4 - ndim) beta = theano.tensor.shape_padright(beta, 4 - ndim)
elif ndim > 5:
inputs_shape = inputs.shape
params_shape = gamma.shape
inputs = theano.tensor.flatten(inputs, 5)
gamma = theano.tensor.flatten(gamma, 5)
beta = theano.tensor.flatten(beta, 5)
batchnorm_op = GpuDnnBatchNorm(mode=mode) batchnorm_op = GpuDnnBatchNorm(mode=mode)
result = tuple(batchnorm_op(gpu_contiguous(inputs), gpu_contiguous(gamma), result = tuple(batchnorm_op(gpu_contiguous(inputs), gpu_contiguous(gamma),
gpu_contiguous(beta), epsilon=epsilon)) gpu_contiguous(beta), epsilon=epsilon))
if ndim < 4: if ndim < 4:
result = tuple(theano.tensor.flatten(r, ndim) for r in result) result = tuple(theano.tensor.flatten(r, ndim) for r in result)
elif ndim > 5:
result = (theano.tensor.reshape(result[0], inputs_shape),
theano.tensor.reshape(result[1], params_shape),
theano.tensor.reshape(result[2], params_shape))
return result return result
...@@ -2522,9 +2529,6 @@ def dnn_batch_normalization_test(inputs, gamma, beta, mean, var, ...@@ -2522,9 +2529,6 @@ def dnn_batch_normalization_test(inputs, gamma, beta, mean, var,
For 5d tensors, the axes would be (0, 2, 3, 4). For 5d tensors, the axes would be (0, 2, 3, 4).
""" """
ndim = inputs.ndim ndim = inputs.ndim
if ndim > 5:
raise ValueError("dnn_batch_normalization_test currently supports "
"up to 5-dimensional tensors only, got %d" % ndim)
if gamma.ndim != ndim or beta.ndim != ndim: if gamma.ndim != ndim or beta.ndim != ndim:
raise ValueError("gamma and beta must be of the same dimensionality " raise ValueError("gamma and beta must be of the same dimensionality "
"as inputs; got %d and %d instead of %d" % "as inputs; got %d and %d instead of %d" %
...@@ -2542,12 +2546,21 @@ def dnn_batch_normalization_test(inputs, gamma, beta, mean, var, ...@@ -2542,12 +2546,21 @@ def dnn_batch_normalization_test(inputs, gamma, beta, mean, var,
beta = theano.tensor.shape_padright(beta, 4 - ndim) beta = theano.tensor.shape_padright(beta, 4 - ndim)
mean = theano.tensor.shape_padright(mean, 4 - ndim) mean = theano.tensor.shape_padright(mean, 4 - ndim)
var = theano.tensor.shape_padright(var, 4 - ndim) var = theano.tensor.shape_padright(var, 4 - ndim)
elif ndim > 5:
inputs_shape = inputs.shape
inputs = theano.tensor.flatten(inputs, 5)
gamma = theano.tensor.flatten(gamma, 5)
beta = theano.tensor.flatten(beta, 5)
mean = theano.tensor.flatten(mean, 5)
var = theano.tensor.flatten(var, 5)
batchnorm_op = GpuDnnBatchNormInference(mode=mode) batchnorm_op = GpuDnnBatchNormInference(mode=mode)
result = batchnorm_op(gpu_contiguous(inputs), gpu_contiguous(gamma), result = batchnorm_op(gpu_contiguous(inputs), gpu_contiguous(gamma),
gpu_contiguous(beta), gpu_contiguous(mean), gpu_contiguous(beta), gpu_contiguous(mean),
gpu_contiguous(var), epsilon=epsilon) gpu_contiguous(var), epsilon=epsilon)
if ndim < 4: if ndim < 4:
result = theano.tensor.flatten(result, ndim) result = theano.tensor.flatten(result, ndim)
elif ndim > 5:
result = theano.tensor.reshape(result, inputs_shape)
return result return result
...@@ -2938,10 +2951,6 @@ def local_abstract_batch_norm_train_cudnn(node): ...@@ -2938,10 +2951,6 @@ def local_abstract_batch_norm_train_cudnn(node):
x, scale, bias, epsilon = node.inputs x, scale, bias, epsilon = node.inputs
if x.ndim > 5:
# TODO do something better than this (reshape?)
return None
# input on gpu? TODO what about the output? # input on gpu? TODO what about the output?
x_on_gpu = (isinstance(x.type, GpuArrayType) or x_on_gpu = (isinstance(x.type, GpuArrayType) or
(x.owner and isinstance(x.owner.op, HostFromGpu))) (x.owner and isinstance(x.owner.op, HostFromGpu)))
...@@ -2992,10 +3001,6 @@ def local_abstract_batch_norm_train_grad_cudnn(node): ...@@ -2992,10 +3001,6 @@ def local_abstract_batch_norm_train_grad_cudnn(node):
x, dy, scale, x_mean, x_invstd, epsilon = node.inputs x, dy, scale, x_mean, x_invstd, epsilon = node.inputs
if x.ndim > 5:
# TODO do something better than this (reshape?)
return None
# input on gpu? TODO what about the output? # input on gpu? TODO what about the output?
x_on_gpu = (isinstance(x.type, GpuArrayType) or x_on_gpu = (isinstance(x.type, GpuArrayType) or
(x.owner and isinstance(x.owner.op, HostFromGpu))) (x.owner and isinstance(x.owner.op, HostFromGpu)))
...@@ -3020,6 +3025,14 @@ def local_abstract_batch_norm_train_grad_cudnn(node): ...@@ -3020,6 +3025,14 @@ def local_abstract_batch_norm_train_grad_cudnn(node):
scale = theano.tensor.shape_padright(scale, 4 - ndim) scale = theano.tensor.shape_padright(scale, 4 - ndim)
x_mean = theano.tensor.shape_padright(x_mean, 4 - ndim) x_mean = theano.tensor.shape_padright(x_mean, 4 - ndim)
x_invstd = theano.tensor.shape_padright(x_invstd, 4 - ndim) x_invstd = theano.tensor.shape_padright(x_invstd, 4 - ndim)
elif ndim > 5:
x_shape = x.shape
params_shape = scale.shape
x = theano.tensor.flatten(x, 5)
dy = theano.tensor.flatten(dy, 5)
scale = theano.tensor.flatten(scale, 5)
x_mean = theano.tensor.flatten(x_mean, 5)
x_invstd = theano.tensor.flatten(x_invstd, 5)
try: try:
eps = theano.tensor.get_scalar_constant_value(epsilon) eps = theano.tensor.get_scalar_constant_value(epsilon)
...@@ -3045,6 +3058,10 @@ def local_abstract_batch_norm_train_grad_cudnn(node): ...@@ -3045,6 +3058,10 @@ def local_abstract_batch_norm_train_grad_cudnn(node):
g_wrt_inputs = theano.tensor.flatten(g_wrt_inputs, ndim) g_wrt_inputs = theano.tensor.flatten(g_wrt_inputs, ndim)
g_wrt_scale = theano.tensor.flatten(g_wrt_scale, ndim) g_wrt_scale = theano.tensor.flatten(g_wrt_scale, ndim)
g_wrt_bias = theano.tensor.flatten(g_wrt_bias, ndim) g_wrt_bias = theano.tensor.flatten(g_wrt_bias, ndim)
elif ndim > 5:
g_wrt_inputs = theano.tensor.reshape(g_wrt_inputs, x_shape)
g_wrt_scale = theano.tensor.reshape(g_wrt_scale, params_shape)
g_wrt_bias = theano.tensor.reshape(g_wrt_bias, params_shape)
# If the original output was on CPU, we have to transfer it # If the original output was on CPU, we have to transfer it
if isinstance(node.outputs[0].type, tensor.TensorType): if isinstance(node.outputs[0].type, tensor.TensorType):
...@@ -3064,10 +3081,6 @@ def local_abstract_batch_norm_inference_cudnn(node): ...@@ -3064,10 +3081,6 @@ def local_abstract_batch_norm_inference_cudnn(node):
x, scale, bias, estimated_mean, estimated_variance, epsilon = node.inputs x, scale, bias, estimated_mean, estimated_variance, epsilon = node.inputs
if x.ndim > 5:
# TODO do something better than this (reshape?)
return None
axes = tuple(node.op.axes) axes = tuple(node.op.axes)
if axes == (0,): if axes == (0,):
mode = 'per-activation' mode = 'per-activation'
......
...@@ -1380,8 +1380,10 @@ def test_dnn_batchnorm_train(): ...@@ -1380,8 +1380,10 @@ def test_dnn_batchnorm_train():
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
utt.seed_rng() utt.seed_rng()
tensor6 = T.TensorType(theano.config.floatX, (False,) * 6)
for mode in ('per-activation', 'spatial'): for mode in ('per-activation', 'spatial'):
for vartype in (T.tensor5, T.tensor4, T.tensor3, T.matrix, T.vector): for vartype in (tensor6, T.tensor5, T.tensor4, T.tensor3, T.matrix, T.vector):
x, scale, bias = (vartype(n) for n in ('x', 'scale', 'bias')) x, scale, bias = (vartype(n) for n in ('x', 'scale', 'bias'))
ndim = x.ndim ndim = x.ndim
eps = 5e-3 # some non-standard value to test if it's used eps = 5e-3 # some non-standard value to test if it's used
...@@ -1428,7 +1430,7 @@ def test_dnn_batchnorm_train(): ...@@ -1428,7 +1430,7 @@ def test_dnn_batchnorm_train():
bn.AbstractBatchNormTrainGrad)) for n bn.AbstractBatchNormTrainGrad)) for n
in f_abstract.maker.fgraph.toposort()]) in f_abstract.maker.fgraph.toposort()])
# run # run
for data_shape in ((5, 10, 30, 40, 10), (4, 3, 1, 1, 1), (1, 1, 5, 5, 5)): for data_shape in ((5, 10, 30, 40, 10, 5), (4, 3, 1, 1, 1, 1), (1, 1, 5, 5, 5, 5)):
data_shape = data_shape[:ndim] data_shape = data_shape[:ndim]
param_shape = tuple(1 if d in axes else s param_shape = tuple(1 if d in axes else s
for d, s in enumerate(data_shape)) for d, s in enumerate(data_shape))
...@@ -1447,10 +1449,10 @@ def test_dnn_batchnorm_train(): ...@@ -1447,10 +1449,10 @@ def test_dnn_batchnorm_train():
utt.assert_allclose(outputs_abstract[1], outputs_ref[1]) # mean utt.assert_allclose(outputs_abstract[1], outputs_ref[1]) # mean
utt.assert_allclose(outputs_abstract[2], outputs_ref[2]) # invstd utt.assert_allclose(outputs_abstract[2], outputs_ref[2]) # invstd
# compare gradients # compare gradients
utt.assert_allclose(outputs_gpu[3], outputs_ref[3], atol=1e-4) # dx utt.assert_allclose(outputs_gpu[3], outputs_ref[3], atol=2e-4) # dx
utt.assert_allclose(outputs_gpu[4], outputs_ref[4], rtol=2e-4, atol=1e-4) # dscale utt.assert_allclose(outputs_gpu[4], outputs_ref[4], rtol=2e-4, atol=1e-4) # dscale
utt.assert_allclose(outputs_gpu[5], outputs_ref[5]) # dbias utt.assert_allclose(outputs_gpu[5], outputs_ref[5]) # dbias
utt.assert_allclose(outputs_abstract[3], outputs_ref[3], atol=1e-4) # dx utt.assert_allclose(outputs_abstract[3], outputs_ref[3], atol=2e-4) # dx
utt.assert_allclose(outputs_abstract[4], outputs_ref[4], rtol=2e-4, atol=1e-4) # dscale utt.assert_allclose(outputs_abstract[4], outputs_ref[4], rtol=2e-4, atol=1e-4) # dscale
utt.assert_allclose(outputs_abstract[5], outputs_ref[5]) # dbias utt.assert_allclose(outputs_abstract[5], outputs_ref[5]) # dbias
...@@ -1460,8 +1462,10 @@ def test_batchnorm_inference(): ...@@ -1460,8 +1462,10 @@ def test_batchnorm_inference():
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
utt.seed_rng() utt.seed_rng()
tensor6 = T.TensorType(theano.config.floatX, (False,) * 6)
for mode in ('per-activation', 'spatial'): for mode in ('per-activation', 'spatial'):
for vartype in (T.tensor5, T.tensor4, T.tensor3, T.matrix, T.vector): for vartype in (tensor6, T.tensor5, T.tensor4, T.tensor3, T.matrix, T.vector):
x, scale, bias, mean, var = (vartype(n) x, scale, bias, mean, var = (vartype(n)
for n in ('x', 'scale', 'bias', 'mean', 'var')) for n in ('x', 'scale', 'bias', 'mean', 'var'))
ndim = x.ndim ndim = x.ndim
...@@ -1502,7 +1506,7 @@ def test_batchnorm_inference(): ...@@ -1502,7 +1506,7 @@ def test_batchnorm_inference():
bn.AbstractBatchNormTrainGrad)) for n bn.AbstractBatchNormTrainGrad)) for n
in f_abstract.maker.fgraph.toposort()]) in f_abstract.maker.fgraph.toposort()])
# run # run
for data_shape in ((10, 20, 30, 40, 10), (4, 3, 1, 1, 1), (1, 1, 5, 5, 5)): for data_shape in ((10, 20, 30, 40, 10, 5), (4, 3, 1, 1, 1, 1), (1, 1, 5, 5, 5, 5)):
data_shape = data_shape[:ndim] data_shape = data_shape[:ndim]
param_shape = tuple(1 if d in axes else s param_shape = tuple(1 if d in axes else s
for d, s in enumerate(data_shape)) for d, s in enumerate(data_shape))
......
...@@ -119,9 +119,8 @@ def batch_normalization_train(inputs, gamma, beta, axes='per-activation', ...@@ -119,9 +119,8 @@ def batch_normalization_train(inputs, gamma, beta, axes='per-activation',
Notes Notes
----- -----
For 5d and lower-dimensional inputs, and only if per-activation or spatial If per-activation or spatial normalization is selected, this operation
normalization is selected, this operation will use the cuDNN implementation. will use the cuDNN implementation. (This requires cuDNN 5 or newer.)
(This requires cuDNN 5 or newer.)
The returned values are equivalent to: The returned values are equivalent to:
...@@ -205,9 +204,8 @@ def batch_normalization_test(inputs, gamma, beta, mean, var, ...@@ -205,9 +204,8 @@ def batch_normalization_test(inputs, gamma, beta, mean, var,
Notes Notes
----- -----
For 5d and lower-dimensional inputs, and only if per-activation or spatial If per-activation or spatial normalization is selected, this operation
normalization is selected, this operation will use the cuDNN implementation. will use the cuDNN implementation. (This requires cuDNN 5 or newer.)
(This requires cuDNN 5 or newer.)
The returned value is equivalent to: The returned value is equivalent to:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论