提交 acf5edee authored 作者: f0k's avatar f0k

Add tests for cuDNN batch normalization convenience functions (replacing Op tests)

上级 e09caa8a
...@@ -715,7 +715,7 @@ class test_DnnSoftMax(test_nnet.test_SoftMax): ...@@ -715,7 +715,7 @@ class test_DnnSoftMax(test_nnet.test_SoftMax):
utt.assert_allclose(f(inp), f_ref(inp)) utt.assert_allclose(f(inp), f_ref(inp))
def test_batchnorm_train_op(): def test_batchnorm_train():
if not cuda.dnn.dnn_available(): if not cuda.dnn.dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg) raise SkipTest(cuda.dnn.dnn_available.msg)
if cuda.dnn.version() < (5000, 5000): if cuda.dnn.version() < (5000, 5000):
...@@ -723,52 +723,55 @@ def test_batchnorm_train_op(): ...@@ -723,52 +723,55 @@ def test_batchnorm_train_op():
utt.seed_rng() utt.seed_rng()
for mode in ('per-activation', 'spatial'): for mode in ('per-activation', 'spatial'):
x, scale, bias = T.tensor4s('x', 'scale', 'bias') for vartype in (T.tensor4, T.tensor3, T.matrix, T.vector):
eps = 5e-3 # some non-standard value to test if it's used x, scale, bias = (vartype(n) for n in ('x', 'scale', 'bias'))
ndim = x.ndim
# forward pass eps = 5e-3 # some non-standard value to test if it's used
batchnorm_op = cuda.dnn.GpuDnnBatchNorm(mode=mode, epsilon=eps)
out, x_mean, x_invstd = batchnorm_op(x, scale, bias) # forward pass
# reference forward pass out, x_mean, x_invstd = cuda.dnn.dnn_batch_normalization_train(
if mode == 'per-activation': x, scale, bias, mode, eps)
axes = (0,) # reference forward pass
elif mode == 'spatial': if mode == 'per-activation':
axes = (0, 2, 3) axes = (0,)
x_mean2 = x.mean(axis=axes, keepdims=True) elif mode == 'spatial':
x_invstd2 = T.inv(T.sqrt(x.var(axis=axes, keepdims=True) + eps)) axes = (0,) + tuple(range(2, ndim))
scale2 = T.addbroadcast(scale, *axes) x_mean2 = x.mean(axis=axes, keepdims=True)
bias2 = T.addbroadcast(bias, *axes) x_invstd2 = T.inv(T.sqrt(x.var(axis=axes, keepdims=True) + eps))
out2 = (x - x_mean2) * (scale2 * x_invstd2) + bias2 scale2 = T.addbroadcast(scale, *axes)
# backward pass bias2 = T.addbroadcast(bias, *axes)
dy = T.tensor4('dy') out2 = (x - x_mean2) * (scale2 * x_invstd2) + bias2
grads = T.grad(None, wrt=[x, scale, bias], known_grads={out: dy}) # backward pass
# reference backward pass dy = vartype('dy')
grads2 = T.grad(None, wrt=[x, scale, bias], known_grads={out2: dy}) grads = T.grad(None, wrt=[x, scale, bias], known_grads={out: dy})
# compile # reference backward pass
f = theano.function([x, scale, bias, dy], grads2 = T.grad(None, wrt=[x, scale, bias], known_grads={out2: dy})
[out, x_mean, x_invstd, out2, x_mean2, x_invstd2] + # compile
grads + grads2, mode=mode_with_gpu) f = theano.function([x, scale, bias, dy],
# run [out, x_mean, x_invstd, out2, x_mean2, x_invstd2] +
floatX = theano.config.floatX grads + grads2, mode=mode_with_gpu)
for data_shape in ((10, 20, 30, 40), (4, 3, 1, 1), (1, 1, 5, 5)): # run
param_shape = tuple(1 if d in axes else s floatX = theano.config.floatX
for d, s in enumerate(data_shape)) for data_shape in ((10, 20, 30, 40), (4, 3, 1, 1), (1, 1, 5, 5)):
X = 4 + 3 * numpy.random.randn(*data_shape).astype(floatX) data_shape = data_shape[:ndim]
Dy = -1 + 2 * numpy.random.randn(*data_shape).astype(floatX) param_shape = tuple(1 if d in axes else s
Scale = numpy.random.randn(*param_shape).astype(floatX) for d, s in enumerate(data_shape))
Bias = numpy.random.randn(*param_shape).astype(floatX) X = 4 + 3 * numpy.random.randn(*data_shape).astype(floatX)
outputs = f(X, Scale, Bias, Dy) Dy = -1 + 2 * numpy.random.randn(*data_shape).astype(floatX)
# compare outputs Scale = numpy.random.randn(*param_shape).astype(floatX)
utt.assert_allclose(outputs[0], outputs[0 + 3]) # out Bias = numpy.random.randn(*param_shape).astype(floatX)
utt.assert_allclose(outputs[1], outputs[1 + 3]) # mean outputs = f(X, Scale, Bias, Dy)
utt.assert_allclose(outputs[2], outputs[2 + 3]) # invstd # compare outputs
# compare gradients utt.assert_allclose(outputs[0], outputs[0 + 3]) # out
utt.assert_allclose(outputs[6], outputs[6 + 3]) # dx utt.assert_allclose(outputs[1], outputs[1 + 3]) # mean
utt.assert_allclose(outputs[7], outputs[7 + 3], rtol=1e-3) # dscale utt.assert_allclose(outputs[2], outputs[2 + 3]) # invstd
utt.assert_allclose(outputs[8], outputs[8 + 3]) # dbias # compare gradients
utt.assert_allclose(outputs[6], outputs[6 + 3]) # dx
utt.assert_allclose(outputs[7], outputs[7 + 3], rtol=3e-3) # dscale
def test_batchnorm_inf_op(): utt.assert_allclose(outputs[8], outputs[8 + 3]) # dbias
def test_batchnorm_inference():
if not cuda.dnn.dnn_available(): if not cuda.dnn.dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg) raise SkipTest(cuda.dnn.dnn_available.msg)
if cuda.dnn.version() < (5000, 5000): if cuda.dnn.version() < (5000, 5000):
...@@ -776,48 +779,53 @@ def test_batchnorm_inf_op(): ...@@ -776,48 +779,53 @@ def test_batchnorm_inf_op():
utt.seed_rng() utt.seed_rng()
for mode in ('per-activation', 'spatial'): for mode in ('per-activation', 'spatial'):
x, scale, bias, mean, var = T.tensor4s('x', 'scale', 'bias', 'mean', 'var') for vartype in (T.tensor4, T.tensor3, T.matrix, T.vector):
eps = 5e-3 # some non-standard value to test if it's used x, scale, bias, mean, var = (vartype(n) for n in ('x', 'scale',
'bias', 'mean',
# forward pass 'var'))
batchnorm_op = cuda.dnn.GpuDnnBatchNormInference(mode=mode, epsilon=eps) ndim = x.ndim
out = batchnorm_op(x, scale, bias, mean, var) eps = 5e-3 # some non-standard value to test if it's used
# reference forward pass
if mode == 'per-activation': # forward pass
axes = (0,) out = cuda.dnn.dnn_batch_normalization_test(x, scale, bias, mean,
elif mode == 'spatial': var, mode, eps)
axes = (0, 2, 3) # reference forward pass
scale2, bias2, mean2, var2 = (T.addbroadcast(t, *axes) if mode == 'per-activation':
for t in (scale, bias, mean, var)) axes = (0,)
out2 = (x - mean2) * (scale2 / T.sqrt(var2 + eps)) + bias2 elif mode == 'spatial':
# backward pass axes = (0,) + tuple(range(2, ndim))
dy = T.tensor4('dy') scale2, bias2, mean2, var2 = (T.addbroadcast(t, *axes)
grads = T.grad(None, wrt=[x, scale, bias, mean, var], known_grads={out: dy}) for t in (scale, bias, mean, var))
# reference backward pass out2 = (x - mean2) * (scale2 / T.sqrt(var2 + eps)) + bias2
grads2 = T.grad(None, wrt=[x, scale, bias, mean, var], known_grads={out2: dy}) # backward pass
# compile dy = vartype('dy')
f = theano.function([x, scale, bias, mean, var, dy], grads = T.grad(None, wrt=[x, scale, bias, mean, var], known_grads={out: dy})
[out, out2] + grads + grads2, mode=mode_with_gpu) # reference backward pass
# run grads2 = T.grad(None, wrt=[x, scale, bias, mean, var], known_grads={out2: dy})
floatX = theano.config.floatX # compile
for data_shape in ((10, 20, 30, 40), (4, 3, 1, 1), (1, 1, 5, 5)): f = theano.function([x, scale, bias, mean, var, dy],
param_shape = tuple(1 if d in axes else s [out, out2] + grads + grads2, mode=mode_with_gpu)
for d, s in enumerate(data_shape)) # run
X = 4 + 3 * numpy.random.randn(*data_shape).astype(floatX) floatX = theano.config.floatX
Dy = -1 + 2 * numpy.random.randn(*data_shape).astype(floatX) for data_shape in ((10, 20, 30, 40), (4, 3, 1, 1), (1, 1, 5, 5)):
Scale = numpy.random.randn(*param_shape).astype(floatX) data_shape = data_shape[:ndim]
Bias = numpy.random.randn(*param_shape).astype(floatX) param_shape = tuple(1 if d in axes else s
Mean = numpy.random.randn(*param_shape).astype(floatX) for d, s in enumerate(data_shape))
Var = numpy.random.rand(*param_shape).astype(floatX) X = 4 + 3 * numpy.random.randn(*data_shape).astype(floatX)
outputs = f(X, Scale, Bias, Mean, Var, Dy) Dy = -1 + 2 * numpy.random.randn(*data_shape).astype(floatX)
# compare outputs Scale = numpy.random.randn(*param_shape).astype(floatX)
utt.assert_allclose(outputs[0], outputs[1]) # out Bias = numpy.random.randn(*param_shape).astype(floatX)
# compare gradients Mean = numpy.random.randn(*param_shape).astype(floatX)
utt.assert_allclose(outputs[2], outputs[2 + 5]) # dx Var = numpy.random.rand(*param_shape).astype(floatX)
utt.assert_allclose(outputs[3], outputs[3 + 5]) # dscale outputs = f(X, Scale, Bias, Mean, Var, Dy)
utt.assert_allclose(outputs[4], outputs[4 + 5]) # dbias # compare outputs
utt.assert_allclose(outputs[5], outputs[5 + 5]) # dmean utt.assert_allclose(outputs[0], outputs[1]) # out
utt.assert_allclose(outputs[6], outputs[6 + 5]) # dvar # compare gradients
utt.assert_allclose(outputs[2], outputs[2 + 5]) # dx
utt.assert_allclose(outputs[3], outputs[3 + 5]) # dscale
utt.assert_allclose(outputs[4], outputs[4 + 5]) # dbias
utt.assert_allclose(outputs[5], outputs[5 + 5]) # dmean
utt.assert_allclose(outputs[6], outputs[6 + 5]) # dvar
def test_dnn_tag(): def test_dnn_tag():
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论