提交 f5a51eaa authored 作者: Gijs van Tulder's avatar Gijs van Tulder

GpuDnnBatchNorm now accepts 5d inputs.

上级 8f0b0888
......@@ -1447,9 +1447,8 @@ class GpuDnnBatchNorm(DnnBase):
scale = as_gpuarray_variable(scale, ctx_name)
bias = as_gpuarray_variable(bias, ctx_name)
epsilon = as_scalar(epsilon).astype('float64')
assert x.ndim == 4
assert scale.ndim == 4
assert bias.ndim == 4
assert x.ndim == scale.ndim == bias.ndim
assert x.ndim in (4, 5)
return Apply(self, [x, scale, bias, epsilon], [x.type(), scale.type(), scale.type()])
def grad(self, inputs, grads):
......@@ -1511,11 +1510,8 @@ class GpuDnnBatchNormInference(DnnBase):
estimated_mean = as_gpuarray_variable(estimated_mean, ctx_name)
estimated_variance = as_gpuarray_variable(estimated_variance, ctx_name)
epsilon = as_scalar(epsilon).astype('float64')
assert x.ndim == 4
assert scale.ndim == 4
assert bias.ndim == 4
assert estimated_mean.ndim == 4
assert estimated_variance.ndim == 4
assert x.ndim == scale.ndim == bias.ndim == estimated_mean.ndim == estimated_variance.ndim
assert x.ndim in (4, 5)
return Apply(self, [x, scale, bias, estimated_mean, estimated_variance, epsilon], [x.type()])
def grad(self, inputs, grads):
......@@ -1525,7 +1521,7 @@ class GpuDnnBatchNormInference(DnnBase):
if self.mode == "per-activation":
axes = (0,)
elif self.mode == "spatial":
axes = (0, 2, 3)
axes = (0,) + tuple(range(2, x.ndim))
scale, bias, est_mean, est_var = (theano.tensor.addbroadcast(t, *axes)
for t in (scale, bias, est_mean, est_var))
......@@ -1574,7 +1570,8 @@ class GpuDnnBatchNormGrad(DnnBase):
x_mean = as_gpuarray_variable(x_mean, ctx_name)
x_invstd = as_gpuarray_variable(x_invstd, ctx_name)
epsilon = as_scalar(epsilon).astype('float64')
assert x.ndim == 4 and dy.ndim == 4 and scale.ndim == 4 and x_mean.ndim == 4 and x_invstd.ndim == 4
assert x.ndim == dy.ndim == scale.ndim == x_mean.ndim == x_invstd.ndim
assert x.ndim in (4, 5)
return Apply(self, [x, dy, scale, x_mean, x_invstd, epsilon], [x.type(), scale.type(), scale.type()])
def infer_shape(self, node, shape):
......@@ -1624,11 +1621,13 @@ def dnn_batch_normalization_train(inputs, gamma, beta, mode='per-activation',
mean = inputs.mean(axes, keepdims=True)
stdinv = T.inv(T.sqrt(inputs.var(axes, keepdims=True) + epsilon))
out = (inputs - mean) * gamma * stdinv + beta
For 5d tensors, the axes are (0, 2, 3, 4).
"""
ndim = inputs.ndim
if ndim > 4:
if ndim > 5:
raise ValueError("dnn_batch_normalization_train currently supports "
"up to 4-dimensional tensors only, got %d" % ndim)
"up to 5-dimensional tensors only, got %d" % ndim)
if gamma.ndim != ndim or beta.ndim != ndim:
raise ValueError("gamma and beta must be of the same dimensionality "
"as inputs; got %d and %d instead of %d" %
......@@ -1693,11 +1692,13 @@ def dnn_batch_normalization_test(inputs, gamma, beta, mean, var,
gamma, beta, mean, var = (T.addbroadcast(t, *axes)
for t in (gamma, beta, mean, var))
out = (inputs - mean) * gamma / T.sqrt(var + epsilon) + beta
For 5d tensors, the axes would be (0, 2, 3, 4).
"""
ndim = inputs.ndim
if ndim > 4:
if ndim > 5:
raise ValueError("dnn_batch_normalization_test currently supports "
"up to 4-dimensional tensors only, got %d" % ndim)
"up to 5-dimensional tensors only, got %d" % ndim)
if gamma.ndim != ndim or beta.ndim != ndim:
raise ValueError("gamma and beta must be of the same dimensionality "
"as inputs; got %d and %d instead of %d" %
......
......@@ -994,7 +994,7 @@ def test_dnn_batchnorm_train():
utt.seed_rng()
for mode in ('per-activation', 'spatial'):
for vartype in (T.ftensor4, T.ftensor3, T.fmatrix, T.fvector):
for vartype in (T.ftensor5, T.ftensor4, T.ftensor3, T.fmatrix, T.fvector):
x, scale, bias = (vartype(n) for n in ('x', 'scale', 'bias'))
ndim = x.ndim
eps = 5e-3 # some non-standard value to test if it's used
......@@ -1022,7 +1022,7 @@ def test_dnn_batchnorm_train():
[out, x_mean, x_invstd, out2, x_mean2, x_invstd2] +
grads + grads2, mode=mode_with_gpu)
# run
for data_shape in ((10, 20, 30, 40), (4, 3, 1, 1), (1, 1, 5, 5)):
for data_shape in ((5, 10, 30, 40, 10), (4, 3, 1, 1, 1), (1, 1, 5, 5, 5)):
data_shape = data_shape[:ndim]
param_shape = tuple(1 if d in axes else s
for d, s in enumerate(data_shape))
......@@ -1036,8 +1036,8 @@ def test_dnn_batchnorm_train():
utt.assert_allclose(outputs[1], outputs[1 + 3]) # mean
utt.assert_allclose(outputs[2], outputs[2 + 3]) # invstd
# compare gradients
utt.assert_allclose(outputs[6], outputs[6 + 3]) # dx
utt.assert_allclose(outputs[7], outputs[7 + 3], rtol=3e-3) # dscale
utt.assert_allclose(outputs[6], outputs[6 + 3], atol=1e-4) # dx
utt.assert_allclose(outputs[7], outputs[7 + 3], rtol=2e-4, atol=1e-4) # dscale
utt.assert_allclose(outputs[8], outputs[8 + 3]) # dbias
......@@ -1049,7 +1049,7 @@ def test_batchnorm_inference():
utt.seed_rng()
for mode in ('per-activation', 'spatial'):
for vartype in (T.ftensor4, T.ftensor3, T.fmatrix, T.fvector):
for vartype in (T.ftensor5, T.ftensor4, T.ftensor3, T.fmatrix, T.fvector):
x, scale, bias, mean, var = (vartype(n) for n in ('x', 'scale',
'bias', 'mean',
'var'))
......@@ -1076,7 +1076,7 @@ def test_batchnorm_inference():
f = theano.function([x, scale, bias, mean, var, dy],
[out, out2] + grads + grads2, mode=mode_with_gpu)
# run
for data_shape in ((10, 20, 30, 40), (4, 3, 1, 1), (1, 1, 5, 5)):
for data_shape in ((10, 20, 30, 40, 10), (4, 3, 1, 1, 1), (1, 1, 5, 5, 5)):
data_shape = data_shape[:ndim]
param_shape = tuple(1 if d in axes else s
for d, s in enumerate(data_shape))
......@@ -1090,8 +1090,8 @@ def test_batchnorm_inference():
# compare outputs
utt.assert_allclose(outputs[0], outputs[1]) # out
# compare gradients
utt.assert_allclose(outputs[2], outputs[2 + 5]) # dx
utt.assert_allclose(outputs[3], outputs[3 + 5]) # dscale
utt.assert_allclose(outputs[2], outputs[2 + 5], atol=4e-5) # dx
utt.assert_allclose(outputs[3], outputs[3 + 5], atol=4e-5) # dscale
utt.assert_allclose(outputs[4], outputs[4 + 5]) # dbias
utt.assert_allclose(outputs[5], outputs[5 + 5]) # dmean
utt.assert_allclose(outputs[6], outputs[6 + 5], atol=2e-5) # dvar
utt.assert_allclose(outputs[6], outputs[6 + 5], rtol=2e-3, atol=4e-5) # dvar
差异被折叠。
......@@ -734,7 +734,7 @@ def test_batchnorm_train():
utt.seed_rng()
for mode in ('per-activation', 'spatial'):
for vartype in (T.ftensor4, T.ftensor3, T.fmatrix, T.fvector):
for vartype in (T.ftensor5, T.ftensor4, T.ftensor3, T.fmatrix, T.fvector):
x, scale, bias = (vartype(n) for n in ('x', 'scale', 'bias'))
ndim = x.ndim
eps = 5e-3 # some non-standard value to test if it's used
......@@ -762,7 +762,7 @@ def test_batchnorm_train():
[out, x_mean, x_invstd, out2, x_mean2, x_invstd2] +
grads + grads2, mode=mode_with_gpu)
# run
for data_shape in ((10, 20, 30, 40), (4, 3, 1, 1), (1, 1, 5, 5)):
for data_shape in ((5, 10, 30, 40, 10), (4, 3, 1, 1, 1), (1, 1, 5, 5, 5)):
data_shape = data_shape[:ndim]
param_shape = tuple(1 if d in axes else s
for d, s in enumerate(data_shape))
......@@ -776,8 +776,8 @@ def test_batchnorm_train():
utt.assert_allclose(outputs[1], outputs[1 + 3]) # mean
utt.assert_allclose(outputs[2], outputs[2 + 3]) # invstd
# compare gradients
utt.assert_allclose(outputs[6], outputs[6 + 3]) # dx
utt.assert_allclose(outputs[7], outputs[7 + 3], rtol=3e-3) # dscale
utt.assert_allclose(outputs[6], outputs[6 + 3], atol=1e-4) # dx
utt.assert_allclose(outputs[7], outputs[7 + 3], rtol=2e-4, atol=1e-4) # dscale
utt.assert_allclose(outputs[8], outputs[8 + 3]) # dbias
......@@ -789,7 +789,7 @@ def test_batchnorm_inference():
utt.seed_rng()
for mode in ('per-activation', 'spatial'):
for vartype in (T.ftensor4, T.ftensor3, T.fmatrix, T.fvector):
for vartype in (T.ftensor5, T.ftensor4, T.ftensor3, T.fmatrix, T.fvector):
x, scale, bias, mean, var = (vartype(n) for n in ('x', 'scale',
'bias', 'mean',
'var'))
......@@ -816,7 +816,7 @@ def test_batchnorm_inference():
f = theano.function([x, scale, bias, mean, var, dy],
[out, out2] + grads + grads2, mode=mode_with_gpu)
# run
for data_shape in ((10, 20, 30, 40), (4, 3, 1, 1), (1, 1, 5, 5)):
for data_shape in ((5, 10, 30, 40, 10), (4, 3, 1, 1, 1), (1, 1, 5, 5, 5)):
data_shape = data_shape[:ndim]
param_shape = tuple(1 if d in axes else s
for d, s in enumerate(data_shape))
......@@ -830,11 +830,11 @@ def test_batchnorm_inference():
# compare outputs
utt.assert_allclose(outputs[0], outputs[1]) # out
# compare gradients
utt.assert_allclose(outputs[2], outputs[2 + 5]) # dx
utt.assert_allclose(outputs[3], outputs[3 + 5]) # dscale
utt.assert_allclose(outputs[2], outputs[2 + 5], atol=4e-5) # dx
utt.assert_allclose(outputs[3], outputs[3 + 5], atol=4e-5) # dscale
utt.assert_allclose(outputs[4], outputs[4 + 5]) # dbias
utt.assert_allclose(outputs[5], outputs[5 + 5]) # dmean
utt.assert_allclose(outputs[6], outputs[6 + 5], rtol=2e-3, atol=5e-5) # dvar
utt.assert_allclose(outputs[6], outputs[6 + 5], rtol=2e-3, atol=4e-5) # dvar
def test_dnn_tag():
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论