提交 aac38eea authored 作者: abergeron's avatar abergeron 提交者: GitHub

Merge pull request #4948 from gvtulder/f-batchnorm-3d

GpuDnnBatchNorm with 5d inputs
...@@ -1604,9 +1604,8 @@ class GpuDnnBatchNorm(DnnBase): ...@@ -1604,9 +1604,8 @@ class GpuDnnBatchNorm(DnnBase):
scale = as_gpuarray_variable(scale, ctx_name) scale = as_gpuarray_variable(scale, ctx_name)
bias = as_gpuarray_variable(bias, ctx_name) bias = as_gpuarray_variable(bias, ctx_name)
epsilon = as_scalar(epsilon).astype('float64') epsilon = as_scalar(epsilon).astype('float64')
assert x.ndim == 4 assert x.ndim == scale.ndim == bias.ndim
assert scale.ndim == 4 assert x.ndim in (4, 5)
assert bias.ndim == 4
return Apply(self, [x, scale, bias, epsilon], [x.type(), scale.type(), scale.type()]) return Apply(self, [x, scale, bias, epsilon], [x.type(), scale.type(), scale.type()])
def grad(self, inputs, grads): def grad(self, inputs, grads):
...@@ -1668,11 +1667,8 @@ class GpuDnnBatchNormInference(DnnBase): ...@@ -1668,11 +1667,8 @@ class GpuDnnBatchNormInference(DnnBase):
estimated_mean = as_gpuarray_variable(estimated_mean, ctx_name) estimated_mean = as_gpuarray_variable(estimated_mean, ctx_name)
estimated_variance = as_gpuarray_variable(estimated_variance, ctx_name) estimated_variance = as_gpuarray_variable(estimated_variance, ctx_name)
epsilon = as_scalar(epsilon).astype('float64') epsilon = as_scalar(epsilon).astype('float64')
assert x.ndim == 4 assert x.ndim == scale.ndim == bias.ndim == estimated_mean.ndim == estimated_variance.ndim
assert scale.ndim == 4 assert x.ndim in (4, 5)
assert bias.ndim == 4
assert estimated_mean.ndim == 4
assert estimated_variance.ndim == 4
return Apply(self, [x, scale, bias, estimated_mean, estimated_variance, epsilon], [x.type()]) return Apply(self, [x, scale, bias, estimated_mean, estimated_variance, epsilon], [x.type()])
def grad(self, inputs, grads): def grad(self, inputs, grads):
...@@ -1682,7 +1678,7 @@ class GpuDnnBatchNormInference(DnnBase): ...@@ -1682,7 +1678,7 @@ class GpuDnnBatchNormInference(DnnBase):
if self.mode == "per-activation": if self.mode == "per-activation":
axes = (0,) axes = (0,)
elif self.mode == "spatial": elif self.mode == "spatial":
axes = (0, 2, 3) axes = (0,) + tuple(range(2, x.ndim))
scale, bias, est_mean, est_var = (theano.tensor.addbroadcast(t, *axes) scale, bias, est_mean, est_var = (theano.tensor.addbroadcast(t, *axes)
for t in (scale, bias, est_mean, est_var)) for t in (scale, bias, est_mean, est_var))
...@@ -1731,7 +1727,8 @@ class GpuDnnBatchNormGrad(DnnBase): ...@@ -1731,7 +1727,8 @@ class GpuDnnBatchNormGrad(DnnBase):
x_mean = as_gpuarray_variable(x_mean, ctx_name) x_mean = as_gpuarray_variable(x_mean, ctx_name)
x_invstd = as_gpuarray_variable(x_invstd, ctx_name) x_invstd = as_gpuarray_variable(x_invstd, ctx_name)
epsilon = as_scalar(epsilon).astype('float64') epsilon = as_scalar(epsilon).astype('float64')
assert x.ndim == 4 and dy.ndim == 4 and scale.ndim == 4 and x_mean.ndim == 4 and x_invstd.ndim == 4 assert x.ndim == dy.ndim == scale.ndim == x_mean.ndim == x_invstd.ndim
assert x.ndim in (4, 5)
return Apply(self, [x, dy, scale, x_mean, x_invstd, epsilon], [x.type(), scale.type(), scale.type()]) return Apply(self, [x, dy, scale, x_mean, x_invstd, epsilon], [x.type(), scale.type(), scale.type()])
def infer_shape(self, node, shape): def infer_shape(self, node, shape):
...@@ -1781,11 +1778,13 @@ def dnn_batch_normalization_train(inputs, gamma, beta, mode='per-activation', ...@@ -1781,11 +1778,13 @@ def dnn_batch_normalization_train(inputs, gamma, beta, mode='per-activation',
mean = inputs.mean(axes, keepdims=True) mean = inputs.mean(axes, keepdims=True)
stdinv = T.inv(T.sqrt(inputs.var(axes, keepdims=True) + epsilon)) stdinv = T.inv(T.sqrt(inputs.var(axes, keepdims=True) + epsilon))
out = (inputs - mean) * gamma * stdinv + beta out = (inputs - mean) * gamma * stdinv + beta
For 5d tensors, the axes are (0, 2, 3, 4).
""" """
ndim = inputs.ndim ndim = inputs.ndim
if ndim > 4: if ndim > 5:
raise ValueError("dnn_batch_normalization_train currently supports " raise ValueError("dnn_batch_normalization_train currently supports "
"up to 4-dimensional tensors only, got %d" % ndim) "up to 5-dimensional tensors only, got %d" % ndim)
if gamma.ndim != ndim or beta.ndim != ndim: if gamma.ndim != ndim or beta.ndim != ndim:
raise ValueError("gamma and beta must be of the same dimensionality " raise ValueError("gamma and beta must be of the same dimensionality "
"as inputs; got %d and %d instead of %d" % "as inputs; got %d and %d instead of %d" %
...@@ -1850,11 +1849,13 @@ def dnn_batch_normalization_test(inputs, gamma, beta, mean, var, ...@@ -1850,11 +1849,13 @@ def dnn_batch_normalization_test(inputs, gamma, beta, mean, var,
gamma, beta, mean, var = (T.addbroadcast(t, *axes) gamma, beta, mean, var = (T.addbroadcast(t, *axes)
for t in (gamma, beta, mean, var)) for t in (gamma, beta, mean, var))
out = (inputs - mean) * gamma / T.sqrt(var + epsilon) + beta out = (inputs - mean) * gamma / T.sqrt(var + epsilon) + beta
For 5d tensors, the axes would be (0, 2, 3, 4).
""" """
ndim = inputs.ndim ndim = inputs.ndim
if ndim > 4: if ndim > 5:
raise ValueError("dnn_batch_normalization_test currently supports " raise ValueError("dnn_batch_normalization_test currently supports "
"up to 4-dimensional tensors only, got %d" % ndim) "up to 5-dimensional tensors only, got %d" % ndim)
if gamma.ndim != ndim or beta.ndim != ndim: if gamma.ndim != ndim or beta.ndim != ndim:
raise ValueError("gamma and beta must be of the same dimensionality " raise ValueError("gamma and beta must be of the same dimensionality "
"as inputs; got %d and %d instead of %d" % "as inputs; got %d and %d instead of %d" %
......
...@@ -1391,7 +1391,7 @@ def test_dnn_batchnorm_train(): ...@@ -1391,7 +1391,7 @@ def test_dnn_batchnorm_train():
utt.seed_rng() utt.seed_rng()
for mode in ('per-activation', 'spatial'): for mode in ('per-activation', 'spatial'):
for vartype in (T.ftensor4, T.ftensor3, T.fmatrix, T.fvector): for vartype in (T.ftensor5, T.ftensor4, T.ftensor3, T.fmatrix, T.fvector):
x, scale, bias = (vartype(n) for n in ('x', 'scale', 'bias')) x, scale, bias = (vartype(n) for n in ('x', 'scale', 'bias'))
ndim = x.ndim ndim = x.ndim
eps = 5e-3 # some non-standard value to test if it's used eps = 5e-3 # some non-standard value to test if it's used
...@@ -1419,7 +1419,7 @@ def test_dnn_batchnorm_train(): ...@@ -1419,7 +1419,7 @@ def test_dnn_batchnorm_train():
[out, x_mean, x_invstd, out2, x_mean2, x_invstd2] + [out, x_mean, x_invstd, out2, x_mean2, x_invstd2] +
grads + grads2, mode=mode_with_gpu) grads + grads2, mode=mode_with_gpu)
# run # run
for data_shape in ((10, 20, 30, 40), (4, 3, 1, 1), (1, 1, 5, 5)): for data_shape in ((5, 10, 30, 40, 10), (4, 3, 1, 1, 1), (1, 1, 5, 5, 5)):
data_shape = data_shape[:ndim] data_shape = data_shape[:ndim]
param_shape = tuple(1 if d in axes else s param_shape = tuple(1 if d in axes else s
for d, s in enumerate(data_shape)) for d, s in enumerate(data_shape))
...@@ -1433,8 +1433,8 @@ def test_dnn_batchnorm_train(): ...@@ -1433,8 +1433,8 @@ def test_dnn_batchnorm_train():
utt.assert_allclose(outputs[1], outputs[1 + 3]) # mean utt.assert_allclose(outputs[1], outputs[1 + 3]) # mean
utt.assert_allclose(outputs[2], outputs[2 + 3]) # invstd utt.assert_allclose(outputs[2], outputs[2 + 3]) # invstd
# compare gradients # compare gradients
utt.assert_allclose(outputs[6], outputs[6 + 3]) # dx utt.assert_allclose(outputs[6], outputs[6 + 3], atol=1e-4) # dx
utt.assert_allclose(outputs[7], outputs[7 + 3], rtol=3e-3) # dscale utt.assert_allclose(outputs[7], outputs[7 + 3], rtol=2e-4, atol=1e-4) # dscale
utt.assert_allclose(outputs[8], outputs[8 + 3]) # dbias utt.assert_allclose(outputs[8], outputs[8 + 3]) # dbias
...@@ -1446,7 +1446,7 @@ def test_batchnorm_inference(): ...@@ -1446,7 +1446,7 @@ def test_batchnorm_inference():
utt.seed_rng() utt.seed_rng()
for mode in ('per-activation', 'spatial'): for mode in ('per-activation', 'spatial'):
for vartype in (T.ftensor4, T.ftensor3, T.fmatrix, T.fvector): for vartype in (T.ftensor5, T.ftensor4, T.ftensor3, T.fmatrix, T.fvector):
x, scale, bias, mean, var = (vartype(n) for n in ('x', 'scale', x, scale, bias, mean, var = (vartype(n) for n in ('x', 'scale',
'bias', 'mean', 'bias', 'mean',
'var')) 'var'))
...@@ -1473,7 +1473,7 @@ def test_batchnorm_inference(): ...@@ -1473,7 +1473,7 @@ def test_batchnorm_inference():
f = theano.function([x, scale, bias, mean, var, dy], f = theano.function([x, scale, bias, mean, var, dy],
[out, out2] + grads + grads2, mode=mode_with_gpu) [out, out2] + grads + grads2, mode=mode_with_gpu)
# run # run
for data_shape in ((10, 20, 30, 40), (4, 3, 1, 1), (1, 1, 5, 5)): for data_shape in ((10, 20, 30, 40, 10), (4, 3, 1, 1, 1), (1, 1, 5, 5, 5)):
data_shape = data_shape[:ndim] data_shape = data_shape[:ndim]
param_shape = tuple(1 if d in axes else s param_shape = tuple(1 if d in axes else s
for d, s in enumerate(data_shape)) for d, s in enumerate(data_shape))
...@@ -1487,8 +1487,8 @@ def test_batchnorm_inference(): ...@@ -1487,8 +1487,8 @@ def test_batchnorm_inference():
# compare outputs # compare outputs
utt.assert_allclose(outputs[0], outputs[1]) # out utt.assert_allclose(outputs[0], outputs[1]) # out
# compare gradients # compare gradients
utt.assert_allclose(outputs[2], outputs[2 + 5]) # dx utt.assert_allclose(outputs[2], outputs[2 + 5], atol=4e-5) # dx
utt.assert_allclose(outputs[3], outputs[3 + 5]) # dscale utt.assert_allclose(outputs[3], outputs[3 + 5], atol=4e-5) # dscale
utt.assert_allclose(outputs[4], outputs[4 + 5]) # dbias utt.assert_allclose(outputs[4], outputs[4 + 5]) # dbias
utt.assert_allclose(outputs[5], outputs[5 + 5]) # dmean utt.assert_allclose(outputs[5], outputs[5 + 5]) # dmean
utt.assert_allclose(outputs[6], outputs[6 + 5], atol=2e-5) # dvar utt.assert_allclose(outputs[6], outputs[6 + 5], rtol=2e-3, atol=4e-5) # dvar
...@@ -2364,7 +2364,7 @@ class GpuDnnBatchNormBase(DnnBase): ...@@ -2364,7 +2364,7 @@ class GpuDnnBatchNormBase(DnnBase):
""" """
__props__ = ('mode', 'epsilon') __props__ = ('mode', 'epsilon')
tensor_4d_descs = [] tensor_descs = []
def __init__(self, mode='per-activation', epsilon=1e-4): def __init__(self, mode='per-activation', epsilon=1e-4):
DnnBase.__init__(self) DnnBase.__init__(self)
...@@ -2380,7 +2380,7 @@ class GpuDnnBatchNormBase(DnnBase): ...@@ -2380,7 +2380,7 @@ class GpuDnnBatchNormBase(DnnBase):
def c_support_code_struct(self, node, name): def c_support_code_struct(self, node, name):
result = '' result = ''
for id in self.tensor_4d_descs: for id in self.tensor_descs:
result += c_define_tensor_desc('%s_%s' % (id, name)) result += c_define_tensor_desc('%s_%s' % (id, name))
return result return result
...@@ -2389,13 +2389,13 @@ class GpuDnnBatchNormBase(DnnBase): ...@@ -2389,13 +2389,13 @@ class GpuDnnBatchNormBase(DnnBase):
cudnnStatus_t err%(name)s; cudnnStatus_t err%(name)s;
""" % dict(name=name) """ % dict(name=name)
for id in self.tensor_4d_descs: for id in self.tensor_descs:
result += c_init_tensor_desc('%s_%s' % (id, name), 'err' + name, sub['fail']) result += c_init_tensor_desc('%s_%s' % (id, name), 'err' + name, sub['fail'])
return result return result
def c_cleanup_code_struct(self, node, name): def c_cleanup_code_struct(self, node, name):
result = '' result = ''
for id in self.tensor_4d_descs: for id in self.tensor_descs:
result += c_clean_tensor_desc('%s_%s' % (id, name)) result += c_clean_tensor_desc('%s_%s' % (id, name))
return result return result
...@@ -2419,7 +2419,7 @@ double epsilon%(name)s = %(epsilon)e; ...@@ -2419,7 +2419,7 @@ double epsilon%(name)s = %(epsilon)e;
return result return result
def c_code_cache_version(self): def c_code_cache_version(self):
return (2, version()) return (3, version())
class GpuDnnBatchNormInference(GpuDnnBatchNormBase): class GpuDnnBatchNormInference(GpuDnnBatchNormBase):
...@@ -2436,7 +2436,7 @@ class GpuDnnBatchNormInference(GpuDnnBatchNormBase): ...@@ -2436,7 +2436,7 @@ class GpuDnnBatchNormInference(GpuDnnBatchNormBase):
Note: scale, bias, mean and variance must follow the same tensor layout! Note: scale, bias, mean and variance must follow the same tensor layout!
""" """
tensor_4d_descs = ['bn_input', 'bn_output', 'bn_params'] tensor_descs = ['bn_input', 'bn_output', 'bn_params']
def infer_shape(self, node, shape): def infer_shape(self, node, shape):
# output shape equals shape of x # output shape equals shape of x
...@@ -2448,11 +2448,8 @@ class GpuDnnBatchNormInference(GpuDnnBatchNormBase): ...@@ -2448,11 +2448,8 @@ class GpuDnnBatchNormInference(GpuDnnBatchNormBase):
bias = as_cuda_ndarray_variable(bias) bias = as_cuda_ndarray_variable(bias)
estimated_mean = as_cuda_ndarray_variable(estimated_mean) estimated_mean = as_cuda_ndarray_variable(estimated_mean)
estimated_variance = as_cuda_ndarray_variable(estimated_variance) estimated_variance = as_cuda_ndarray_variable(estimated_variance)
assert x.ndim == 4 assert x.ndim == scale.ndim == bias.ndim == estimated_mean.ndim == estimated_variance.ndim
assert scale.ndim == 4 assert x.ndim in (4, 5)
assert bias.ndim == 4
assert estimated_mean.ndim == 4
assert estimated_variance.ndim == 4
return Apply(self, [x, scale, bias, estimated_mean, estimated_variance], return Apply(self, [x, scale, bias, estimated_mean, estimated_variance],
[x.type()]) [x.type()])
...@@ -2464,23 +2461,30 @@ class GpuDnnBatchNormInference(GpuDnnBatchNormBase): ...@@ -2464,23 +2461,30 @@ class GpuDnnBatchNormInference(GpuDnnBatchNormBase):
inp, scale, bias, est_mean, est_var = inputs inp, scale, bias, est_mean, est_var = inputs
outp, = outputs outp, = outputs
# set input tensor descriptors from input tensors # call cuDNN function
result += c_set_tensor4d(inp, 'bn_input_' + name, 'err' + name, sub['fail'])
result += c_set_tensor4d(scale, 'bn_params_' + name, 'err' + name, sub['fail'])
# build and prepare the output variable
result += """ result += """
if (CudaNdarray_prep_output(&%(outp)s, 4, CudaNdarray_HOST_DIMS(%(inp)s)) != 0) // set input tensor descriptors from input tensors
if (c_set_tensorNd(%(inp)s, bn_input_%(name)s) != 0)
{
%(fail)s
}
if (c_set_tensorNd(%(scale)s, bn_params_%(name)s) != 0)
{ {
%(fail)s %(fail)s
} }
""" % dict(outp=outp, inp=inp, fail=sub['fail'])
# set output tensor descriptor from output tensor // build and prepare the output variable
result += c_set_tensor4d(outp, 'bn_output_' + name, 'err' + name, sub['fail']) if (CudaNdarray_prep_output(&%(outp)s, %(inp)s->nd, CudaNdarray_HOST_DIMS(%(inp)s)) != 0)
{
%(fail)s
}
// set output tensor descriptor from output tensor
if (c_set_tensorNd(%(outp)s, bn_output_%(name)s) != 0)
{
%(fail)s
}
# call cuDNN function
result += """
{ {
const float alpha = 1.; const float alpha = 1.;
const float beta = 0.; const float beta = 0.;
...@@ -2502,7 +2506,7 @@ err%(name)s = cudnnBatchNormalizationForwardInference( ...@@ -2502,7 +2506,7 @@ err%(name)s = cudnnBatchNormalizationForwardInference(
); );
} }
""" % dict(name=name, inp=inp, scale=scale, bias=bias, est_mean=est_mean, """ % dict(name=name, inp=inp, scale=scale, bias=bias, est_mean=est_mean,
est_var=est_var, outp=outp) est_var=est_var, outp=outp, fail=sub['fail'])
return result return result
...@@ -2514,7 +2518,7 @@ err%(name)s = cudnnBatchNormalizationForwardInference( ...@@ -2514,7 +2518,7 @@ err%(name)s = cudnnBatchNormalizationForwardInference(
if self.mode == 'per-activation': if self.mode == 'per-activation':
axes = (0,) axes = (0,)
elif self.mode == 'spatial': elif self.mode == 'spatial':
axes = (0, 2, 3) axes = (0,) + tuple(range(2, x.ndim))
scale, bias, est_mean, est_var = (theano.tensor.addbroadcast(t, *axes) scale, bias, est_mean, est_var = (theano.tensor.addbroadcast(t, *axes)
for t in (scale, bias, est_mean, est_var)) for t in (scale, bias, est_mean, est_var))
...@@ -2547,7 +2551,7 @@ class GpuDnnBatchNorm(GpuDnnBatchNormBase): ...@@ -2547,7 +2551,7 @@ class GpuDnnBatchNorm(GpuDnnBatchNormBase):
Note: scale and bias must follow the same tensor layout! Note: scale and bias must follow the same tensor layout!
""" """
tensor_4d_descs = ['bn_input', 'bn_output', 'bn_params'] tensor_descs = ['bn_input', 'bn_output', 'bn_params']
def infer_shape(self, node, shape): def infer_shape(self, node, shape):
# first output equals shape of x # first output equals shape of x
...@@ -2558,9 +2562,8 @@ class GpuDnnBatchNorm(GpuDnnBatchNormBase): ...@@ -2558,9 +2562,8 @@ class GpuDnnBatchNorm(GpuDnnBatchNormBase):
x = as_cuda_ndarray_variable(x) x = as_cuda_ndarray_variable(x)
scale = as_cuda_ndarray_variable(scale) scale = as_cuda_ndarray_variable(scale)
bias = as_cuda_ndarray_variable(bias) bias = as_cuda_ndarray_variable(bias)
assert x.ndim == 4 assert x.ndim == scale.ndim == bias.ndim
assert scale.ndim == 4 assert x.ndim in (4, 5)
assert bias.ndim == 4
return Apply(self, [x, scale, bias], [x.type(), scale.type(), scale.type()]) return Apply(self, [x, scale, bias], [x.type(), scale.type(), scale.type()])
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
...@@ -2572,25 +2575,31 @@ class GpuDnnBatchNorm(GpuDnnBatchNormBase): ...@@ -2572,25 +2575,31 @@ class GpuDnnBatchNorm(GpuDnnBatchNormBase):
outp, x_mean, x_invstd = outputs outp, x_mean, x_invstd = outputs
# set input tensor descriptors from input tensors # set input tensor descriptors from input tensors
result += c_set_tensor4d(inp, 'bn_input_' + name, 'err' + name, sub['fail'])
result += c_set_tensor4d(scale, 'bn_params_' + name, 'err' + name, sub['fail'])
# build and prepare the output variables
result += """ result += """
if ((CudaNdarray_prep_output(&%(outp)s, 4, CudaNdarray_HOST_DIMS(%(inp)s)) != 0) || // set input tensor descriptors from input tensors
(CudaNdarray_prep_output(&%(x_mean)s, 4, CudaNdarray_HOST_DIMS(%(scale)s)) != 0) || if (c_set_tensorNd(%(inp)s, bn_input_%(name)s) != 0)
(CudaNdarray_prep_output(&%(x_invstd)s, 4, CudaNdarray_HOST_DIMS(%(scale)s)) != 0)) {
%(fail)s
}
if (c_set_tensorNd(%(scale)s, bn_params_%(name)s) != 0)
{
%(fail)s
}
// build and prepare the output variables
if ((CudaNdarray_prep_output(&%(outp)s, %(inp)s->nd, CudaNdarray_HOST_DIMS(%(inp)s)) != 0) ||
(CudaNdarray_prep_output(&%(x_mean)s, %(inp)s->nd, CudaNdarray_HOST_DIMS(%(scale)s)) != 0) ||
(CudaNdarray_prep_output(&%(x_invstd)s, %(inp)s->nd, CudaNdarray_HOST_DIMS(%(scale)s)) != 0))
{ {
%(fail)s %(fail)s
} }
""" % dict(outp=outp, inp=inp, x_mean=x_mean, x_invstd=x_invstd, scale=scale,
fail=sub['fail'])
# set output tensor descriptor from output tensor // set output tensor descriptor from output tensor
result += c_set_tensor4d(outp, 'bn_output_' + name, 'err' + name, sub['fail']) if (c_set_tensorNd(%(outp)s, bn_output_%(name)s) != 0)
{
%(fail)s
}
# call cuDNN function
result += """
{ {
const float alpha = 1.; const float alpha = 1.;
const float beta = 0.; const float beta = 0.;
...@@ -2615,7 +2624,7 @@ err%(name)s = cudnnBatchNormalizationForwardTraining( ...@@ -2615,7 +2624,7 @@ err%(name)s = cudnnBatchNormalizationForwardTraining(
); );
} }
""" % dict(name=name, inp=inp, scale=scale, bias=bias, outp=outp, """ % dict(name=name, inp=inp, scale=scale, bias=bias, outp=outp,
x_mean=x_mean, x_invstd=x_invstd) x_mean=x_mean, x_invstd=x_invstd, fail=sub['fail'])
return result return result
...@@ -2638,7 +2647,7 @@ class GpuDnnBatchNormGrad(GpuDnnBatchNormBase): ...@@ -2638,7 +2647,7 @@ class GpuDnnBatchNormGrad(GpuDnnBatchNormBase):
Note: scale, mean and invstd must follow the same tensor layout! Note: scale, mean and invstd must follow the same tensor layout!
""" """
tensor_4d_descs = ['bn_input', 'bn_doutput', 'bn_dinput', 'bn_params'] tensor_descs = ['bn_input', 'bn_doutput', 'bn_dinput', 'bn_params']
def infer_shape(self, node, shape): def infer_shape(self, node, shape):
# first output equals shape of x # first output equals shape of x
...@@ -2651,7 +2660,8 @@ class GpuDnnBatchNormGrad(GpuDnnBatchNormBase): ...@@ -2651,7 +2660,8 @@ class GpuDnnBatchNormGrad(GpuDnnBatchNormBase):
scale = as_cuda_ndarray_variable(scale) scale = as_cuda_ndarray_variable(scale)
x_mean = as_cuda_ndarray_variable(x_mean) x_mean = as_cuda_ndarray_variable(x_mean)
x_invstd = as_cuda_ndarray_variable(x_invstd) x_invstd = as_cuda_ndarray_variable(x_invstd)
assert x.ndim == 4 and dy.ndim == 4 and scale.ndim == 4 and x_mean.ndim == 4 and x_invstd.ndim == 4 assert x.ndim == dy.ndim == scale.ndim == x_mean.ndim == x_invstd.ndim
assert x.ndim in (4, 5)
return Apply(self, [x, dy, scale, x_mean, x_invstd], [x.type(), scale.type(), scale.type()]) return Apply(self, [x, dy, scale, x_mean, x_invstd], [x.type(), scale.type(), scale.type()])
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
...@@ -2662,27 +2672,36 @@ class GpuDnnBatchNormGrad(GpuDnnBatchNormBase): ...@@ -2662,27 +2672,36 @@ class GpuDnnBatchNormGrad(GpuDnnBatchNormBase):
inp, doutp, scale, x_mean, x_invstd = inputs inp, doutp, scale, x_mean, x_invstd = inputs
dinp, dscale, dbias = outputs dinp, dscale, dbias = outputs
# set input tensor descriptors from input tensors # call cuDNN function
result += c_set_tensor4d(inp, 'bn_input_' + name, 'err' + name, sub['fail'])
result += c_set_tensor4d(doutp, 'bn_doutput_' + name, 'err' + name, sub['fail'])
result += c_set_tensor4d(scale, 'bn_params_' + name, 'err' + name, sub['fail'])
# build and prepare the output variables
result += """ result += """
if ((CudaNdarray_prep_output(&%(dinp)s, 4, CudaNdarray_HOST_DIMS(%(inp)s)) != 0) || // set input tensor descriptors from input tensors
(CudaNdarray_prep_output(&%(dscale)s, 4, CudaNdarray_HOST_DIMS(%(scale)s)) != 0) || if (c_set_tensorNd(%(inp)s, bn_input_%(name)s) != 0)
(CudaNdarray_prep_output(&%(dbias)s, 4, CudaNdarray_HOST_DIMS(%(scale)s)) != 0)) {
%(fail)s
}
if (c_set_tensorNd(%(doutp)s, bn_doutput_%(name)s) != 0)
{
%(fail)s
}
if (c_set_tensorNd(%(scale)s, bn_params_%(name)s) != 0)
{ {
%(fail)s %(fail)s
} }
""" % dict(dinp=dinp, inp=inp, dscale=dscale, scale=scale, dbias=dbias,
fail=sub['fail'])
# set output tensor descriptor from output tensor // build and prepare the output variables
result += c_set_tensor4d(dinp, 'bn_dinput_' + name, 'err' + name, sub['fail']) if ((CudaNdarray_prep_output(&%(dinp)s, %(inp)s->nd, CudaNdarray_HOST_DIMS(%(inp)s)) != 0) ||
(CudaNdarray_prep_output(&%(dscale)s, %(inp)s->nd, CudaNdarray_HOST_DIMS(%(scale)s)) != 0) ||
(CudaNdarray_prep_output(&%(dbias)s, %(inp)s->nd, CudaNdarray_HOST_DIMS(%(scale)s)) != 0))
{
%(fail)s
}
// set output tensor descriptor from output tensor
if (c_set_tensorNd(%(dinp)s, bn_dinput_%(name)s) != 0)
{
%(fail)s
}
# call cuDNN function
result += """
{ {
const float alphaData = 1.; const float alphaData = 1.;
const float betaData = 0.; const float betaData = 0.;
...@@ -2711,7 +2730,7 @@ err%(name)s = cudnnBatchNormalizationBackward( ...@@ -2711,7 +2730,7 @@ err%(name)s = cudnnBatchNormalizationBackward(
); );
} }
""" % dict(name=name, inp=inp, doutp=doutp, scale=scale, x_mean=x_mean, """ % dict(name=name, inp=inp, doutp=doutp, scale=scale, x_mean=x_mean,
x_invstd=x_invstd, dinp=dinp, dscale=dscale, dbias=dbias) x_invstd=x_invstd, dinp=dinp, dscale=dscale, dbias=dbias, fail=sub['fail'])
return result return result
...@@ -2759,11 +2778,13 @@ def dnn_batch_normalization_train(inputs, gamma, beta, mode='per-activation', ...@@ -2759,11 +2778,13 @@ def dnn_batch_normalization_train(inputs, gamma, beta, mode='per-activation',
mean = inputs.mean(axes, keepdims=True) mean = inputs.mean(axes, keepdims=True)
stdinv = T.inv(T.sqrt(inputs.var(axes, keepdims=True) + epsilon)) stdinv = T.inv(T.sqrt(inputs.var(axes, keepdims=True) + epsilon))
out = (inputs - mean) * gamma * stdinv + beta out = (inputs - mean) * gamma * stdinv + beta
For 5d tensors, the axes are (0, 2, 3, 4).
""" """
ndim = inputs.ndim ndim = inputs.ndim
if ndim > 4: if ndim > 5:
raise ValueError("dnn_batch_normalization_train currently supports " raise ValueError("dnn_batch_normalization_train currently supports "
"up to 4-dimensional tensors only, got %d" % ndim) "up to 5-dimensional tensors only, got %d" % ndim)
if gamma.ndim != ndim or beta.ndim != ndim: if gamma.ndim != ndim or beta.ndim != ndim:
raise ValueError("gamma and beta must be of the same dimensionality " raise ValueError("gamma and beta must be of the same dimensionality "
"as inputs; got %d and %d instead of %d" % "as inputs; got %d and %d instead of %d" %
...@@ -2828,11 +2849,13 @@ def dnn_batch_normalization_test(inputs, gamma, beta, mean, var, ...@@ -2828,11 +2849,13 @@ def dnn_batch_normalization_test(inputs, gamma, beta, mean, var,
gamma, beta, mean, var = (T.addbroadcast(t, *axes) gamma, beta, mean, var = (T.addbroadcast(t, *axes)
for t in (gamma, beta, mean, var)) for t in (gamma, beta, mean, var))
out = (inputs - mean) * gamma / T.sqrt(var + epsilon) + beta out = (inputs - mean) * gamma / T.sqrt(var + epsilon) + beta
For 5d tensors, the axes would be (0, 2, 3, 4).
""" """
ndim = inputs.ndim ndim = inputs.ndim
if ndim > 4: if ndim > 5:
raise ValueError("dnn_batch_normalization_test currently supports " raise ValueError("dnn_batch_normalization_test currently supports "
"up to 4-dimensional tensors only, got %d" % ndim) "up to 5-dimensional tensors only, got %d" % ndim)
if gamma.ndim != ndim or beta.ndim != ndim: if gamma.ndim != ndim or beta.ndim != ndim:
raise ValueError("gamma and beta must be of the same dimensionality " raise ValueError("gamma and beta must be of the same dimensionality "
"as inputs; got %d and %d instead of %d" % "as inputs; got %d and %d instead of %d" %
......
...@@ -729,7 +729,7 @@ def test_batchnorm_train(): ...@@ -729,7 +729,7 @@ def test_batchnorm_train():
utt.seed_rng() utt.seed_rng()
for mode in ('per-activation', 'spatial'): for mode in ('per-activation', 'spatial'):
for vartype in (T.ftensor4, T.ftensor3, T.fmatrix, T.fvector): for vartype in (T.ftensor5, T.ftensor4, T.ftensor3, T.fmatrix, T.fvector):
x, scale, bias = (vartype(n) for n in ('x', 'scale', 'bias')) x, scale, bias = (vartype(n) for n in ('x', 'scale', 'bias'))
ndim = x.ndim ndim = x.ndim
eps = 5e-3 # some non-standard value to test if it's used eps = 5e-3 # some non-standard value to test if it's used
...@@ -757,7 +757,7 @@ def test_batchnorm_train(): ...@@ -757,7 +757,7 @@ def test_batchnorm_train():
[out, x_mean, x_invstd, out2, x_mean2, x_invstd2] + [out, x_mean, x_invstd, out2, x_mean2, x_invstd2] +
grads + grads2, mode=mode_with_gpu) grads + grads2, mode=mode_with_gpu)
# run # run
for data_shape in ((10, 20, 30, 40), (4, 3, 1, 1), (1, 1, 5, 5)): for data_shape in ((5, 10, 30, 40, 10), (4, 3, 1, 1, 1), (1, 1, 5, 5, 5)):
data_shape = data_shape[:ndim] data_shape = data_shape[:ndim]
param_shape = tuple(1 if d in axes else s param_shape = tuple(1 if d in axes else s
for d, s in enumerate(data_shape)) for d, s in enumerate(data_shape))
...@@ -771,8 +771,8 @@ def test_batchnorm_train(): ...@@ -771,8 +771,8 @@ def test_batchnorm_train():
utt.assert_allclose(outputs[1], outputs[1 + 3]) # mean utt.assert_allclose(outputs[1], outputs[1 + 3]) # mean
utt.assert_allclose(outputs[2], outputs[2 + 3]) # invstd utt.assert_allclose(outputs[2], outputs[2 + 3]) # invstd
# compare gradients # compare gradients
utt.assert_allclose(outputs[6], outputs[6 + 3]) # dx utt.assert_allclose(outputs[6], outputs[6 + 3], atol=1e-4) # dx
utt.assert_allclose(outputs[7], outputs[7 + 3], rtol=3e-3) # dscale utt.assert_allclose(outputs[7], outputs[7 + 3], rtol=2e-4, atol=1e-4) # dscale
utt.assert_allclose(outputs[8], outputs[8 + 3]) # dbias utt.assert_allclose(outputs[8], outputs[8 + 3]) # dbias
...@@ -784,7 +784,7 @@ def test_batchnorm_inference(): ...@@ -784,7 +784,7 @@ def test_batchnorm_inference():
utt.seed_rng() utt.seed_rng()
for mode in ('per-activation', 'spatial'): for mode in ('per-activation', 'spatial'):
for vartype in (T.ftensor4, T.ftensor3, T.fmatrix, T.fvector): for vartype in (T.ftensor5, T.ftensor4, T.ftensor3, T.fmatrix, T.fvector):
x, scale, bias, mean, var = (vartype(n) for n in ('x', 'scale', x, scale, bias, mean, var = (vartype(n) for n in ('x', 'scale',
'bias', 'mean', 'bias', 'mean',
'var')) 'var'))
...@@ -811,7 +811,7 @@ def test_batchnorm_inference(): ...@@ -811,7 +811,7 @@ def test_batchnorm_inference():
f = theano.function([x, scale, bias, mean, var, dy], f = theano.function([x, scale, bias, mean, var, dy],
[out, out2] + grads + grads2, mode=mode_with_gpu) [out, out2] + grads + grads2, mode=mode_with_gpu)
# run # run
for data_shape in ((10, 20, 30, 40), (4, 3, 1, 1), (1, 1, 5, 5)): for data_shape in ((5, 10, 30, 40, 10), (4, 3, 1, 1, 1), (1, 1, 5, 5, 5)):
data_shape = data_shape[:ndim] data_shape = data_shape[:ndim]
param_shape = tuple(1 if d in axes else s param_shape = tuple(1 if d in axes else s
for d, s in enumerate(data_shape)) for d, s in enumerate(data_shape))
...@@ -825,11 +825,11 @@ def test_batchnorm_inference(): ...@@ -825,11 +825,11 @@ def test_batchnorm_inference():
# compare outputs # compare outputs
utt.assert_allclose(outputs[0], outputs[1]) # out utt.assert_allclose(outputs[0], outputs[1]) # out
# compare gradients # compare gradients
utt.assert_allclose(outputs[2], outputs[2 + 5]) # dx utt.assert_allclose(outputs[2], outputs[2 + 5], atol=4e-5) # dx
utt.assert_allclose(outputs[3], outputs[3 + 5]) # dscale utt.assert_allclose(outputs[3], outputs[3 + 5], atol=4e-5) # dscale
utt.assert_allclose(outputs[4], outputs[4 + 5]) # dbias utt.assert_allclose(outputs[4], outputs[4 + 5]) # dbias
utt.assert_allclose(outputs[5], outputs[5 + 5]) # dmean utt.assert_allclose(outputs[5], outputs[5 + 5]) # dmean
utt.assert_allclose(outputs[6], outputs[6 + 5], rtol=2e-3, atol=5e-5) # dvar utt.assert_allclose(outputs[6], outputs[6 + 5], rtol=2e-3, atol=4e-5) # dvar
def test_dnn_tag(): def test_dnn_tag():
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论