提交 e4e08782 authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #4768 from abergeron/gpua_bn

GpuArray BatchNorm
......@@ -1398,6 +1398,326 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
return Apply(self, [dy, sm], [sm.type()])
class GpuDnnBatchNorm(DnnBase):
"""
Base Op for cuDNN Batch Normalization.
Parameters
----------
mode : {'per-activation', 'spatial'}
Whether to normalize per activation (in this mode, bias and scale
tensor dimensions are 1xCxHxW) or share normalization factors across
spatial dimensions (in this mode, bias and scale tensor dimensions
are 1xCx1x1).
epsilon
Epsilon value used in the batch normalization formula. Minimum allowed
value is 1e-5 (imposed by cuDNN).
"""
__props__ = ('mode', 'epsilon')
def __init__(self, mode='per-activation', epsilon=1e-4):
DnnBase.__init__(self, ['dnn_batchnorm_base.c', 'dnn_batchnorm.c'],
'dnn_batchnorm_op')
if version() < 5000:
raise RuntimeError("cuDNN Batch Normalization requires cuDNN v5 or later")
assert (mode in ('per-activation', 'spatial'))
self.mode = mode
assert (epsilon >= 1e-5)
self.epsilon = epsilon
def get_op_params(self):
params = []
params.append(('MODE', ("CUDNN_BATCHNORM_SPATIAL"
if self.mode == "spatial"
else "CUDNN_BATCHNORM_PER_ACTIVATION")))
params.append(('EPSILON', str(self.epsilon)))
return params
def infer_shape(self, node, shape):
return [shape[0], shape[1], shape[1]]
def make_node(self, x, scale, bias):
ctx_name = infer_context_name(x, scale, bias)
x = as_gpuarray_variable(x, ctx_name)
scale = as_gpuarray_variable(scale, ctx_name)
bias = as_gpuarray_variable(bias, ctx_name)
assert x.ndim == 4
assert scale.ndim == 4
assert bias.ndim == 4
return Apply(self, [x, scale, bias], [x.type(), scale.type(), scale.type()])
def grad(self, inputs, grads):
x, scale, bias = inputs
dy = grads[0]
_, x_mean, x_invstd = self.make_node(x, scale, bias).outputs
return GpuDnnBatchNormGrad(self.mode, self.epsilon)(x, dy, scale,
x_mean, x_invstd)
class GpuDnnBatchNormInference(DnnBase):
"""
Base Op for cuDNN Batch Normalization.
Parameters
----------
mode : {'per-activation', 'spatial'}
Whether to normalize per activation (in this mode, bias and scale
tensor dimensions are 1xCxHxW) or share normalization factors across
spatial dimensions (in this mode, bias and scale tensor dimensions
are 1xCx1x1).
epsilon
Epsilon value used in the batch normalization formula. Minimum allowed
value is 1e-5 (imposed by cuDNN).
"""
__props__ = ('mode', 'epsilon')
def __init__(self, mode='per-activation', epsilon=1e-4):
DnnBase.__init__(self, ['dnn_batchnorm_base.c', 'dnn_batchnorm_inf.c'],
'dnn_batchnorm_op')
if version() < 5000:
raise RuntimeError("cuDNN Batch Normalization requires cuDNN v5 or later")
assert (mode in ('per-activation', 'spatial'))
self.mode = mode
assert (epsilon >= 1e-5)
self.epsilon = epsilon
def get_op_params(self):
params = []
params.append(('MODE', ("CUDNN_BATCHNORM_SPATIAL"
if self.mode == "spatial"
else "CUDNN_BATCHNORM_PER_ACTIVATION")))
params.append(('EPSILON', str(self.epsilon)))
return params
def infer_shape(self, node, shape):
return [shape[0]]
def make_node(self, x, scale, bias, estimated_mean, estimated_variance):
ctx_name = infer_context_name(x, scale, bias, estimated_mean,
estimated_variance)
x = as_gpuarray_variable(x, ctx_name)
scale = as_gpuarray_variable(scale, ctx_name)
bias = as_gpuarray_variable(bias, ctx_name)
estimated_mean = as_gpuarray_variable(estimated_mean, ctx_name)
estimated_variance = as_gpuarray_variable(estimated_variance, ctx_name)
assert x.ndim == 4
assert scale.ndim == 4
assert bias.ndim == 4
assert estimated_mean.ndim == 4
assert estimated_variance.ndim == 4
return Apply(self, [x, scale, bias, estimated_mean, estimated_variance], [x.type()])
def grad(self, inputs, grads):
x, scale, bias, est_mean, est_var = inputs
dy = grads[0]
if self.mode == "per-activation":
axes = (0,)
elif self.mode == "spatial":
axes = (0, 2, 3)
scale, bias, est_mean, est_var = (theano.tensor.addbroadcast(t, *axes)
for t in (scale, bias, est_mean, est_var))
# define helper expressions
est_var_eps = est_var + self.epsilon
est_std = theano.tensor.sqrt(est_var_eps)
two = theano.tensor.constant(2.)
# define and return gradients
dx = dy * (scale / est_std)
dscale = (dy * (x - est_mean)).sum(axes, keepdims=True) / est_std
dbias = dy.sum(axes, keepdims=True)
dmean = -dy.sum(axes, keepdims=True) * (scale / est_std)
dvar = -(dy * (x - est_mean)).sum(axes, keepdims=True) * (scale / (two * est_var_eps * est_std))
return [dx, dscale, dbias, dmean, dvar]
class GpuDnnBatchNormGrad(DnnBase):
__props__ = ('mode', 'epsilon')
def __init__(self, mode='per-activation', epsilon=1e-4):
DnnBase.__init__(self, ['dnn_batchnorm_base.c', 'dnn_batchnorm_grad.c'],
'dnn_batchnorm_grad')
if version() < 5000:
raise RuntimeError("cuDNN Batch Normalization requires cuDNN v5 or later")
assert (mode in ('per-activation', 'spatial'))
self.mode = mode
assert (epsilon >= 1e-5)
self.epsilon = epsilon
def get_op_params(self):
params = []
params.append(('MODE', ("CUDNN_BATCHNORM_SPATIAL"
if self.mode == "spatial"
else "CUDNN_BATCHNORM_PER_ACTIVATION")))
params.append(('EPSILON', str(self.epsilon)))
return params
def make_node(self, x, dy, scale, x_mean, x_invstd):
ctx_name = infer_context_name(x, dy, scale, x_mean, x_invstd)
x = as_gpuarray_variable(x, ctx_name)
dy = as_gpuarray_variable(dy, ctx_name)
scale = as_gpuarray_variable(scale, ctx_name)
x_mean = as_gpuarray_variable(x_mean, ctx_name)
x_invstd = as_gpuarray_variable(x_invstd, ctx_name)
assert x.ndim == 4 and dy.ndim == 4 and scale.ndim == 4 and x_mean.ndim == 4 and x_invstd.ndim == 4
return Apply(self, [x, dy, scale, x_mean, x_invstd], [x.type(), scale.type(), scale.type()])
def infer_shape(self, node, shape):
return [shape[0], shape[2], shape[2]]
def dnn_batch_normalization_train(inputs, gamma, beta, mode='per-activation',
epsilon=1e-4):
"""
Performs batch normalization of the given inputs, using the mean and
variance of the inputs.
Parameters
----------
mode : {'per-activation', 'spatial'}
Whether to normalize per activation or share normalization factors
across spatial dimensions (i.e., all dimensions past the second).
gamma : tensor
Learnable scale factors. Must match the dimensionality of `inputs`,
but have sizes of `1` for all axes normalized over (i.e., in the first
dimension for ``mode='per-activation'`, and additionally in all
dimensions past the second for ``mode='spatial'``).
beta : tensor
Learnable biases. Must match the tensor layout of `gamma`.
epsilon : float
Epsilon value used in the batch normalization formula. Minimum allowed
value is 1e-5 (imposed by cuDNN).
Returns
-------
out : tensor
Batch-normalized inputs.
mean : tensor
Means of `inputs` across the normalization axes.
stdinv : tensor
Inverse standard deviations of `inputs` across the normalization axes.
Notes
-----
Requires cuDNN 5 and Theano 0.9dev2 or more recent.
For 4d tensors, returned values are equivalent to:
.. code-block:: python
axes = 0 if mode == 'per-activation' else (0, 2, 3)
mean = inputs.mean(axes, keepdims=True)
stdinv = T.inv(T.sqrt(inputs.var(axes, keepdims=True) + epsilon))
out = (inputs - mean) * gamma * stdinv + beta
"""
ndim = inputs.ndim
if ndim > 4:
raise ValueError("dnn_batch_normalization_train currently supports "
"up to 4-dimensional tensors only, got %d" % ndim)
if gamma.ndim != ndim or beta.ndim != ndim:
raise ValueError("gamma and beta must be of the same dimensionality "
"as inputs; got %d and %d instead of %d" %
(gamma.ndim, beta.ndim, ndim))
if epsilon < 1e-5:
raise ValueError("epsilon must be at least 1e-5, got %f" % epsilon)
if ndim < 4:
inputs = theano.tensor.shape_padright(inputs, 4 - ndim)
gamma = theano.tensor.shape_padright(gamma, 4 - ndim)
beta = theano.tensor.shape_padright(beta, 4 - ndim)
batchnorm_op = GpuDnnBatchNorm(mode=mode, epsilon=epsilon)
result = tuple(batchnorm_op(gpu_contiguous(inputs), gpu_contiguous(gamma),
gpu_contiguous(beta)))
if ndim < 4:
result = tuple(theano.tensor.flatten(r, ndim) for r in result)
return result
def dnn_batch_normalization_test(inputs, gamma, beta, mean, var,
mode='per-activation', epsilon=1e-4):
"""
Performs batch normalization of the given inputs, using the given mean and
variance.
Parameters
----------
mode : {'per-activation', 'spatial'}
Whether to normalize per activation or share normalization factors
across spatial dimensions (i.e., all dimensions past the second).
gamma : tensor
Scale factors. Must match the dimensionality of `inputs`, but have
sizes of `1` for all axes normalized over (i.e., in the first dimension
for ``mode='per-activation'`, and additionally in all dimensions past
the second for ``mode='spatial'``).
beta : tensor
Biases. Must match the tensor layout of `gamma`.
mean : tensor
Means. Usually these are running averages computed during training.
Must match the tensor layout of `gamma`.
var : tensor
Variances. Usually these are running averages computed during training.
Must match the tensor layout of `gamma`.
epsilon : float
Epsilon value used in the batch normalization formula. Minimum allowed
value is 1e-5 (imposed by cuDNN).
Returns
-------
out : tensor
Batch-normalized inputs.
Notes
-----
Requires cuDNN 5 and Theano 0.9dev2 or more recent.
For 4d tensors, the returned value is equivalent to:
.. code-block:: python
axes = (0,) if mode == 'per-activation' else (0, 2, 3)
gamma, beta, mean, var = (T.addbroadcast(t, *axes)
for t in (gamma, beta, mean, var))
out = (inputs - mean) * gamma / T.sqrt(var + epsilon) + beta
"""
ndim = inputs.ndim
if ndim > 4:
raise ValueError("dnn_batch_normalization_test currently supports "
"up to 4-dimensional tensors only, got %d" % ndim)
if gamma.ndim != ndim or beta.ndim != ndim:
raise ValueError("gamma and beta must be of the same dimensionality "
"as inputs; got %d and %d instead of %d" %
(gamma.ndim, beta.ndim, ndim))
if mean.ndim != ndim or var.ndim != ndim:
raise ValueError("mean and var must be of the same dimensionality "
"as inputs; got %d and %d instead of %d" %
(mean.ndim, var.ndim, ndim))
if epsilon < 1e-5:
raise ValueError("epsilon must be at least 1e-5, got %f" % epsilon)
if ndim < 4:
inputs = theano.tensor.shape_padright(inputs, 4 - ndim)
gamma = theano.tensor.shape_padright(gamma, 4 - ndim)
beta = theano.tensor.shape_padright(beta, 4 - ndim)
mean = theano.tensor.shape_padright(mean, 4 - ndim)
var = theano.tensor.shape_padright(var, 4 - ndim)
batchnorm_op = GpuDnnBatchNormInference(mode=mode, epsilon=epsilon)
result = batchnorm_op(gpu_contiguous(inputs), gpu_contiguous(gamma),
gpu_contiguous(beta), gpu_contiguous(mean),
gpu_contiguous(var))
if ndim < 4:
result = theano.tensor.flatten(result, ndim)
return result
@register_opt2([AbstractConv2d, AbstractConv2d_gradWeights,
AbstractConv2d_gradInputs], 'fast_compile', 'conv_dnn', 'cudnn')
def local_abstractconv_cudnn_graph(op, context_name, inputs, outputs):
......
#section support_code_struct
int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
PyGpuArrayObject *bias, PyGpuArrayObject **outp,
PyGpuArrayObject **x_mean, PyGpuArrayObject **x_invstd,
PyGpuContextObject *c) {
if (c_set_tensorNd(inp, bn_input) != 0)
return 1;
if (c_set_tensorNd(scale, bn_params) != 0)
return 1;
if (theano_prep_output(outp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0)
return 1;
if (theano_prep_output(x_mean, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0)
return 1;
if (theano_prep_output(x_invstd, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0)
return 1;
if (c_set_tensorNd(*outp, bn_output) != 0)
return 1;
{
const float falpha = 1.;
const float fbeta = 0.;
const double dalpha = 1.;
const double dbeta = 0.;
void *alpha;
void *beta;
if (inp->ga.typecode == GA_DOUBLE) {
alpha = (void *)&dalpha;
beta = (void *)&dbeta;
} else {
alpha = (void *)&falpha;
beta = (void *)&fbeta;
}
cudnnStatus_t err = cudnnBatchNormalizationForwardTraining(
APPLY_SPECIFIC(_handle),
MODE,
alpha,
beta,
bn_input,
PyGpuArray_DEV_DATA(inp),
bn_output,
PyGpuArray_DEV_DATA(*outp),
bn_params,
PyGpuArray_DEV_DATA(scale),
PyGpuArray_DEV_DATA(bias),
0,
NULL, // running mean, deliberately unused
NULL, // running var, deliberately unused
EPSILON,
PyGpuArray_DEV_DATA(*x_mean),
PyGpuArray_DEV_DATA(*x_invstd)
);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "Error during batchnorm: %s\n",
cudnnGetErrorString(err));
return 1;
}
}
return 0;
}
#section init_code_struct
{
cudnnStatus_t err;
bn_input = NULL;
bn_params = NULL;
bn_output = NULL;
if ((err = cudnnCreateTensorDescriptor(&bn_input)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor descriptor "
"(bn_input): %s", cudnnGetErrorString(err));
FAIL;
}
if ((err = cudnnCreateTensorDescriptor(&bn_params)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor descriptor "
"(bn_params): %s", cudnnGetErrorString(err));
FAIL;
}
if ((err = cudnnCreateTensorDescriptor(&bn_output)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor descriptor "
"(bn_output): %s", cudnnGetErrorString(err));
FAIL;
}
}
#section cleanup_code_struct
if (bn_input != NULL)
cudnnDestroyTensorDescriptor(bn_input);
if (bn_params != NULL)
cudnnDestroyTensorDescriptor(bn_params);
if (bn_output != NULL)
cudnnDestroyTensorDescriptor(bn_output);
#section support_code_struct
cudnnTensorDescriptor_t bn_input;
cudnnTensorDescriptor_t bn_params;
cudnnTensorDescriptor_t bn_output;
#section init_code_struct
{
cudnnStatus_t err;
bn_doutput = NULL;
if ((err = cudnnCreateTensorDescriptor(&bn_doutput)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor descriptor "
"(bn_doutput): %s", cudnnGetErrorString(err));
FAIL;
}
}
#section cleanup_code_struct
if (bn_doutput != NULL)
cudnnDestroyTensorDescriptor(bn_doutput);
#section support_code_struct
cudnnTensorDescriptor_t bn_doutput;
int dnn_batchnorm_grad(PyGpuArrayObject *inp, PyGpuArrayObject *doutp,
PyGpuArrayObject *scale, PyGpuArrayObject *x_mean,
PyGpuArrayObject *x_invstd, PyGpuArrayObject **dinp,
PyGpuArrayObject **dscale, PyGpuArrayObject **dbias,
PyGpuContextObject *c) {
if (c_set_tensorNd(inp, bn_input) != 0)
return 1;
if (c_set_tensorNd(doutp, bn_doutput) != 0)
return 1;
if (c_set_tensorNd(scale, bn_params) != 0)
return 1;
if (theano_prep_output(dinp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0)
return 1;
if (theano_prep_output(dscale, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0)
return 1;
if (theano_prep_output(dbias, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0)
return 1;
if (c_set_tensorNd(*dinp, bn_output) != 0)
return 1;
{
const float falpha = 1.;
const float fbeta = 0.;
const double dalpha = 1.;
const double dbeta = 0.;
void *alphaData;
void *betaData;
void *alphaParam;
void *betaParam;
if (inp->ga.typecode == GA_DOUBLE) {
alphaData = (void *)&dalpha;
betaData = (void *)&dbeta;
alphaParam = (void *)&dalpha;
betaParam = (void *)&dbeta;
} else {
alphaData = (void *)&falpha;
betaData = (void *)&fbeta;
alphaParam = (void *)&falpha;
betaParam = (void *)&fbeta;
}
cudnnStatus_t err = cudnnBatchNormalizationBackward(
APPLY_SPECIFIC(_handle),
MODE,
alphaData,
betaData,
alphaParam,
betaParam,
bn_input,
PyGpuArray_DEV_DATA(inp),
bn_doutput,
PyGpuArray_DEV_DATA(doutp),
bn_output,
PyGpuArray_DEV_DATA(*dinp),
bn_params,
PyGpuArray_DEV_DATA(scale),
PyGpuArray_DEV_DATA(*dscale),
PyGpuArray_DEV_DATA(*dbias),
EPSILON,
PyGpuArray_DEV_DATA(x_mean),
PyGpuArray_DEV_DATA(x_invstd)
);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "Error during batchnorm: %s\n",
cudnnGetErrorString(err));
return 1;
}
}
return 0;
}
#section support_code_struct
int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
PyGpuArrayObject *bias, PyGpuArrayObject *est_mean,
PyGpuArrayObject *est_var, PyGpuArrayObject **outp,
PyGpuContextObject *c) {
if (c_set_tensorNd(inp, bn_input) != 0)
return 1;
if (c_set_tensorNd(scale, bn_params) != 0)
return 1;
if (theano_prep_output(outp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0)
return 1;
if (c_set_tensorNd(*outp, bn_output) != 0)
return 1;
{
const float falpha = 1.;
const float fbeta = 0.;
const double dalpha = 1.;
const double dbeta = 0.;
void *alpha;
void *beta;
if (inp->ga.typecode == GA_DOUBLE) {
alpha = (void *)&dalpha;
beta = (void *)&dbeta;
} else {
alpha = (void *)&falpha;
beta = (void *)&fbeta;
}
cudnnStatus_t err = cudnnBatchNormalizationForwardInference(
APPLY_SPECIFIC(_handle),
MODE,
alpha,
beta,
bn_input,
PyGpuArray_DEV_DATA(inp),
bn_output,
PyGpuArray_DEV_DATA(*outp),
bn_params,
PyGpuArray_DEV_DATA(scale),
PyGpuArray_DEV_DATA(bias),
PyGpuArray_DEV_DATA(est_mean),
PyGpuArray_DEV_DATA(est_var),
EPSILON
);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "Error during batchnorm: %s\n",
cudnnGetErrorString(err));
return 1;
}
}
return 0;
}
......@@ -973,3 +973,114 @@ class test_SoftMax(test_nnet.test_SoftMax):
# Compare the output of the function with the reference function
inp = numpy.random.normal(0, 1, (5, 6)).astype("float32")
utt.assert_allclose(f(inp), f_ref(inp))
def test_dnn_batchnorm_train():
if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg)
if dnn.version(raises=False) < 5000:
raise SkipTest("batch normalization requires cudnn v5+")
utt.seed_rng()
for mode in ('per-activation', 'spatial'):
for vartype in (T.ftensor4, T.ftensor3, T.fmatrix, T.fvector):
x, scale, bias = (vartype(n) for n in ('x', 'scale', 'bias'))
ndim = x.ndim
eps = 5e-3 # some non-standard value to test if it's used
# forward pass
out, x_mean, x_invstd = dnn.dnn_batch_normalization_train(
x, scale, bias, mode, eps)
# reference forward pass
if mode == 'per-activation':
axes = (0,)
elif mode == 'spatial':
axes = (0,) + tuple(range(2, ndim))
x_mean2 = x.mean(axis=axes, keepdims=True)
x_invstd2 = T.inv(T.sqrt(x.var(axis=axes, keepdims=True) + eps))
scale2 = T.addbroadcast(scale, *axes)
bias2 = T.addbroadcast(bias, *axes)
out2 = (x - x_mean2) * (scale2 * x_invstd2) + bias2
# backward pass
dy = vartype('dy')
grads = T.grad(None, wrt=[x, scale, bias], known_grads={out: dy})
# reference backward pass
grads2 = T.grad(None, wrt=[x, scale, bias], known_grads={out2: dy})
# compile
f = theano.function([x, scale, bias, dy],
[out, x_mean, x_invstd, out2, x_mean2, x_invstd2] +
grads + grads2, mode=mode_with_gpu)
# run
for data_shape in ((10, 20, 30, 40), (4, 3, 1, 1), (1, 1, 5, 5)):
data_shape = data_shape[:ndim]
param_shape = tuple(1 if d in axes else s
for d, s in enumerate(data_shape))
X = 4 + 3 * numpy.random.randn(*data_shape).astype('float32')
Dy = -1 + 2 * numpy.random.randn(*data_shape).astype('float32')
Scale = numpy.random.randn(*param_shape).astype('float32')
Bias = numpy.random.randn(*param_shape).astype('float32')
outputs = f(X, Scale, Bias, Dy)
# compare outputs
utt.assert_allclose(outputs[0], outputs[0 + 3]) # out
utt.assert_allclose(outputs[1], outputs[1 + 3]) # mean
utt.assert_allclose(outputs[2], outputs[2 + 3]) # invstd
# compare gradients
utt.assert_allclose(outputs[6], outputs[6 + 3]) # dx
utt.assert_allclose(outputs[7], outputs[7 + 3], rtol=3e-3) # dscale
utt.assert_allclose(outputs[8], outputs[8 + 3]) # dbias
def test_batchnorm_inference():
if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg)
if dnn.version(raises=False) < 5000:
raise SkipTest("batch normalization requires cudnn v5+")
utt.seed_rng()
for mode in ('per-activation', 'spatial'):
for vartype in (T.ftensor4, T.ftensor3, T.fmatrix, T.fvector):
x, scale, bias, mean, var = (vartype(n) for n in ('x', 'scale',
'bias', 'mean',
'var'))
ndim = x.ndim
eps = 5e-3 # some non-standard value to test if it's used
# forward pass
out = dnn.dnn_batch_normalization_test(x, scale, bias, mean,
var, mode, eps)
# reference forward pass
if mode == 'per-activation':
axes = (0,)
elif mode == 'spatial':
axes = (0,) + tuple(range(2, ndim))
scale2, bias2, mean2, var2 = (T.addbroadcast(t, *axes)
for t in (scale, bias, mean, var))
out2 = (x - mean2) * (scale2 / T.sqrt(var2 + eps)) + bias2
# backward pass
dy = vartype('dy')
grads = T.grad(None, wrt=[x, scale, bias, mean, var], known_grads={out: dy})
# reference backward pass
grads2 = T.grad(None, wrt=[x, scale, bias, mean, var], known_grads={out2: dy})
# compile
f = theano.function([x, scale, bias, mean, var, dy],
[out, out2] + grads + grads2, mode=mode_with_gpu)
# run
for data_shape in ((10, 20, 30, 40), (4, 3, 1, 1), (1, 1, 5, 5)):
data_shape = data_shape[:ndim]
param_shape = tuple(1 if d in axes else s
for d, s in enumerate(data_shape))
X = 4 + 3 * numpy.random.randn(*data_shape).astype('float32')
Dy = -1 + 2 * numpy.random.randn(*data_shape).astype('float32')
Scale = numpy.random.randn(*param_shape).astype('float32')
Bias = numpy.random.randn(*param_shape).astype('float32')
Mean = numpy.random.randn(*param_shape).astype('float32')
Var = numpy.random.rand(*param_shape).astype('float32')
outputs = f(X, Scale, Bias, Mean, Var, Dy)
# compare outputs
utt.assert_allclose(outputs[0], outputs[1]) # out
# compare gradients
utt.assert_allclose(outputs[2], outputs[2 + 5]) # dx
utt.assert_allclose(outputs[3], outputs[3 + 5]) # dscale
utt.assert_allclose(outputs[4], outputs[4 + 5]) # dbias
utt.assert_allclose(outputs[5], outputs[5 + 5]) # dmean
utt.assert_allclose(outputs[6], outputs[6 + 5], atol=2e-5) # dvar
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论