提交 8682627a authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #5028 from slefrancois/dnn_epsilon_to_in

dnn_batchnorm epsilon to input
...@@ -1420,9 +1420,9 @@ class GpuDnnBatchNorm(DnnBase): ...@@ -1420,9 +1420,9 @@ class GpuDnnBatchNorm(DnnBase):
value is 1e-5 (imposed by cuDNN). value is 1e-5 (imposed by cuDNN).
""" """
__props__ = ('mode', 'epsilon') __props__ = ('mode',)
def __init__(self, mode='per-activation', epsilon=1e-4): def __init__(self, mode='per-activation'):
DnnBase.__init__(self, ['dnn_batchnorm_base.c', 'dnn_batchnorm.c'], DnnBase.__init__(self, ['dnn_batchnorm_base.c', 'dnn_batchnorm.c'],
'dnn_batchnorm_op') 'dnn_batchnorm_op')
...@@ -1431,36 +1431,38 @@ class GpuDnnBatchNorm(DnnBase): ...@@ -1431,36 +1431,38 @@ class GpuDnnBatchNorm(DnnBase):
assert (mode in ('per-activation', 'spatial')) assert (mode in ('per-activation', 'spatial'))
self.mode = mode self.mode = mode
assert (epsilon >= 1e-5)
self.epsilon = epsilon
def get_op_params(self): def get_op_params(self):
params = [] params = []
params.append(('MODE', ("CUDNN_BATCHNORM_SPATIAL" params.append(('MODE', ("CUDNN_BATCHNORM_SPATIAL"
if self.mode == "spatial" if self.mode == "spatial"
else "CUDNN_BATCHNORM_PER_ACTIVATION"))) else "CUDNN_BATCHNORM_PER_ACTIVATION")))
params.append(('EPSILON', str(self.epsilon)))
return params return params
def infer_shape(self, node, shape): def infer_shape(self, node, shape):
return [shape[0], shape[1], shape[1]] return [shape[0], shape[1], shape[1]]
def make_node(self, x, scale, bias): def make_node(self, x, scale, bias, epsilon=1e-4):
ctx_name = infer_context_name(x, scale, bias) ctx_name = infer_context_name(x, scale, bias)
x = as_gpuarray_variable(x, ctx_name) x = as_gpuarray_variable(x, ctx_name)
scale = as_gpuarray_variable(scale, ctx_name) scale = as_gpuarray_variable(scale, ctx_name)
bias = as_gpuarray_variable(bias, ctx_name) bias = as_gpuarray_variable(bias, ctx_name)
epsilon = as_scalar(epsilon).astype('float64')
assert x.ndim == 4 assert x.ndim == 4
assert scale.ndim == 4 assert scale.ndim == 4
assert bias.ndim == 4 assert bias.ndim == 4
return Apply(self, [x, scale, bias], [x.type(), scale.type(), scale.type()]) return Apply(self, [x, scale, bias, epsilon], [x.type(), scale.type(), scale.type()])
def grad(self, inputs, grads): def grad(self, inputs, grads):
x, scale, bias = inputs x, scale, bias, epsilon = inputs
dy = grads[0] dy = grads[0]
_, x_mean, x_invstd = self.make_node(x, scale, bias).outputs _, x_mean, x_invstd = self.make_node(x, scale, bias, epsilon).outputs
return GpuDnnBatchNormGrad(self.mode, self.epsilon)(x, dy, scale, return GpuDnnBatchNormGrad(self.mode)(x, dy, scale, x_mean,
x_mean, x_invstd) x_invstd, epsilon) + [DisconnectedType()()]
def connection_pattern(self, node):
# Specificy that epsilon is not connected to outputs.
return [[True, True, True], [True, True, True], [True, True, True],
[False, False, False]]
class GpuDnnBatchNormInference(DnnBase): class GpuDnnBatchNormInference(DnnBase):
...@@ -1479,9 +1481,9 @@ class GpuDnnBatchNormInference(DnnBase): ...@@ -1479,9 +1481,9 @@ class GpuDnnBatchNormInference(DnnBase):
value is 1e-5 (imposed by cuDNN). value is 1e-5 (imposed by cuDNN).
""" """
__props__ = ('mode', 'epsilon') __props__ = ('mode',)
def __init__(self, mode='per-activation', epsilon=1e-4): def __init__(self, mode='per-activation'):
DnnBase.__init__(self, ['dnn_batchnorm_base.c', 'dnn_batchnorm_inf.c'], DnnBase.__init__(self, ['dnn_batchnorm_base.c', 'dnn_batchnorm_inf.c'],
'dnn_batchnorm_op') 'dnn_batchnorm_op')
...@@ -1490,21 +1492,17 @@ class GpuDnnBatchNormInference(DnnBase): ...@@ -1490,21 +1492,17 @@ class GpuDnnBatchNormInference(DnnBase):
assert (mode in ('per-activation', 'spatial')) assert (mode in ('per-activation', 'spatial'))
self.mode = mode self.mode = mode
assert (epsilon >= 1e-5)
self.epsilon = epsilon
def get_op_params(self): def get_op_params(self):
params = [] params = []
params.append(('MODE', ("CUDNN_BATCHNORM_SPATIAL" params.append(('MODE', ("CUDNN_BATCHNORM_SPATIAL"
if self.mode == "spatial" if self.mode == "spatial"
else "CUDNN_BATCHNORM_PER_ACTIVATION"))) else "CUDNN_BATCHNORM_PER_ACTIVATION")))
params.append(('EPSILON', str(self.epsilon)))
return params return params
def infer_shape(self, node, shape): def infer_shape(self, node, shape):
return [shape[0]] return [shape[0]]
def make_node(self, x, scale, bias, estimated_mean, estimated_variance): def make_node(self, x, scale, bias, estimated_mean, estimated_variance, epsilon=1e-4):
ctx_name = infer_context_name(x, scale, bias, estimated_mean, ctx_name = infer_context_name(x, scale, bias, estimated_mean,
estimated_variance) estimated_variance)
x = as_gpuarray_variable(x, ctx_name) x = as_gpuarray_variable(x, ctx_name)
...@@ -1512,15 +1510,16 @@ class GpuDnnBatchNormInference(DnnBase): ...@@ -1512,15 +1510,16 @@ class GpuDnnBatchNormInference(DnnBase):
bias = as_gpuarray_variable(bias, ctx_name) bias = as_gpuarray_variable(bias, ctx_name)
estimated_mean = as_gpuarray_variable(estimated_mean, ctx_name) estimated_mean = as_gpuarray_variable(estimated_mean, ctx_name)
estimated_variance = as_gpuarray_variable(estimated_variance, ctx_name) estimated_variance = as_gpuarray_variable(estimated_variance, ctx_name)
epsilon = as_scalar(epsilon).astype('float64')
assert x.ndim == 4 assert x.ndim == 4
assert scale.ndim == 4 assert scale.ndim == 4
assert bias.ndim == 4 assert bias.ndim == 4
assert estimated_mean.ndim == 4 assert estimated_mean.ndim == 4
assert estimated_variance.ndim == 4 assert estimated_variance.ndim == 4
return Apply(self, [x, scale, bias, estimated_mean, estimated_variance], [x.type()]) return Apply(self, [x, scale, bias, estimated_mean, estimated_variance, epsilon], [x.type()])
def grad(self, inputs, grads): def grad(self, inputs, grads):
x, scale, bias, est_mean, est_var = inputs x, scale, bias, est_mean, est_var, epsilon = inputs
dy = grads[0] dy = grads[0]
if self.mode == "per-activation": if self.mode == "per-activation":
...@@ -1531,7 +1530,7 @@ class GpuDnnBatchNormInference(DnnBase): ...@@ -1531,7 +1530,7 @@ class GpuDnnBatchNormInference(DnnBase):
for t in (scale, bias, est_mean, est_var)) for t in (scale, bias, est_mean, est_var))
# define helper expressions # define helper expressions
est_var_eps = est_var + self.epsilon est_var_eps = est_var + epsilon
est_std = theano.tensor.sqrt(est_var_eps) est_std = theano.tensor.sqrt(est_var_eps)
two = theano.tensor.constant(2.) two = theano.tensor.constant(2.)
...@@ -1541,13 +1540,17 @@ class GpuDnnBatchNormInference(DnnBase): ...@@ -1541,13 +1540,17 @@ class GpuDnnBatchNormInference(DnnBase):
dbias = dy.sum(axes, keepdims=True) dbias = dy.sum(axes, keepdims=True)
dmean = -dy.sum(axes, keepdims=True) * (scale / est_std) dmean = -dy.sum(axes, keepdims=True) * (scale / est_std)
dvar = -(dy * (x - est_mean)).sum(axes, keepdims=True) * (scale / (two * est_var_eps * est_std)) dvar = -(dy * (x - est_mean)).sum(axes, keepdims=True) * (scale / (two * est_var_eps * est_std))
return [dx, dscale, dbias, dmean, dvar] return [dx, dscale, dbias, dmean, dvar, DisconnectedType()()]
def connection_pattern(self, node):
# Specificy that epsilon is not connected to outputs.
return [[True], [True], [True], [True], [True], [False]]
class GpuDnnBatchNormGrad(DnnBase): class GpuDnnBatchNormGrad(DnnBase):
__props__ = ('mode', 'epsilon') __props__ = ('mode',)
def __init__(self, mode='per-activation', epsilon=1e-4): def __init__(self, mode='per-activation'):
DnnBase.__init__(self, ['dnn_batchnorm_base.c', 'dnn_batchnorm_grad.c'], DnnBase.__init__(self, ['dnn_batchnorm_base.c', 'dnn_batchnorm_grad.c'],
'dnn_batchnorm_grad') 'dnn_batchnorm_grad')
...@@ -1556,26 +1559,23 @@ class GpuDnnBatchNormGrad(DnnBase): ...@@ -1556,26 +1559,23 @@ class GpuDnnBatchNormGrad(DnnBase):
assert (mode in ('per-activation', 'spatial')) assert (mode in ('per-activation', 'spatial'))
self.mode = mode self.mode = mode
assert (epsilon >= 1e-5)
self.epsilon = epsilon
def get_op_params(self): def get_op_params(self):
params = [] params = []
params.append(('MODE', ("CUDNN_BATCHNORM_SPATIAL" params.append(('MODE', ("CUDNN_BATCHNORM_SPATIAL"
if self.mode == "spatial" if self.mode == "spatial"
else "CUDNN_BATCHNORM_PER_ACTIVATION"))) else "CUDNN_BATCHNORM_PER_ACTIVATION")))
params.append(('EPSILON', str(self.epsilon)))
return params return params
def make_node(self, x, dy, scale, x_mean, x_invstd): def make_node(self, x, dy, scale, x_mean, x_invstd, epsilon=1e-4):
ctx_name = infer_context_name(x, dy, scale, x_mean, x_invstd) ctx_name = infer_context_name(x, dy, scale, x_mean, x_invstd)
x = as_gpuarray_variable(x, ctx_name) x = as_gpuarray_variable(x, ctx_name)
dy = as_gpuarray_variable(dy, ctx_name) dy = as_gpuarray_variable(dy, ctx_name)
scale = as_gpuarray_variable(scale, ctx_name) scale = as_gpuarray_variable(scale, ctx_name)
x_mean = as_gpuarray_variable(x_mean, ctx_name) x_mean = as_gpuarray_variable(x_mean, ctx_name)
x_invstd = as_gpuarray_variable(x_invstd, ctx_name) x_invstd = as_gpuarray_variable(x_invstd, ctx_name)
epsilon = as_scalar(epsilon).astype('float64')
assert x.ndim == 4 and dy.ndim == 4 and scale.ndim == 4 and x_mean.ndim == 4 and x_invstd.ndim == 4 assert x.ndim == 4 and dy.ndim == 4 and scale.ndim == 4 and x_mean.ndim == 4 and x_invstd.ndim == 4
return Apply(self, [x, dy, scale, x_mean, x_invstd], [x.type(), scale.type(), scale.type()]) return Apply(self, [x, dy, scale, x_mean, x_invstd, epsilon], [x.type(), scale.type(), scale.type()])
def infer_shape(self, node, shape): def infer_shape(self, node, shape):
return [shape[0], shape[2], shape[2]] return [shape[0], shape[2], shape[2]]
...@@ -1640,9 +1640,9 @@ def dnn_batch_normalization_train(inputs, gamma, beta, mode='per-activation', ...@@ -1640,9 +1640,9 @@ def dnn_batch_normalization_train(inputs, gamma, beta, mode='per-activation',
inputs = theano.tensor.shape_padright(inputs, 4 - ndim) inputs = theano.tensor.shape_padright(inputs, 4 - ndim)
gamma = theano.tensor.shape_padright(gamma, 4 - ndim) gamma = theano.tensor.shape_padright(gamma, 4 - ndim)
beta = theano.tensor.shape_padright(beta, 4 - ndim) beta = theano.tensor.shape_padright(beta, 4 - ndim)
batchnorm_op = GpuDnnBatchNorm(mode=mode, epsilon=epsilon) batchnorm_op = GpuDnnBatchNorm(mode=mode)
result = tuple(batchnorm_op(gpu_contiguous(inputs), gpu_contiguous(gamma), result = tuple(batchnorm_op(gpu_contiguous(inputs), gpu_contiguous(gamma),
gpu_contiguous(beta))) gpu_contiguous(beta), epsilon=epsilon))
if ndim < 4: if ndim < 4:
result = tuple(theano.tensor.flatten(r, ndim) for r in result) result = tuple(theano.tensor.flatten(r, ndim) for r in result)
return result return result
...@@ -1715,10 +1715,10 @@ def dnn_batch_normalization_test(inputs, gamma, beta, mean, var, ...@@ -1715,10 +1715,10 @@ def dnn_batch_normalization_test(inputs, gamma, beta, mean, var,
beta = theano.tensor.shape_padright(beta, 4 - ndim) beta = theano.tensor.shape_padright(beta, 4 - ndim)
mean = theano.tensor.shape_padright(mean, 4 - ndim) mean = theano.tensor.shape_padright(mean, 4 - ndim)
var = theano.tensor.shape_padright(var, 4 - ndim) var = theano.tensor.shape_padright(var, 4 - ndim)
batchnorm_op = GpuDnnBatchNormInference(mode=mode, epsilon=epsilon) batchnorm_op = GpuDnnBatchNormInference(mode=mode)
result = batchnorm_op(gpu_contiguous(inputs), gpu_contiguous(gamma), result = batchnorm_op(gpu_contiguous(inputs), gpu_contiguous(gamma),
gpu_contiguous(beta), gpu_contiguous(mean), gpu_contiguous(beta), gpu_contiguous(mean),
gpu_contiguous(var)) gpu_contiguous(var), epsilon=epsilon)
if ndim < 4: if ndim < 4:
result = theano.tensor.flatten(result, ndim) result = theano.tensor.flatten(result, ndim)
return result return result
......
#section support_code_struct #section support_code_struct
int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale, int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
PyGpuArrayObject *bias, PyGpuArrayObject **outp, PyGpuArrayObject *bias, npy_float64 epsilon,
PyGpuArrayObject **x_mean, PyGpuArrayObject **x_invstd, PyGpuArrayObject **outp, PyGpuArrayObject **x_mean,
PyGpuContextObject *c) { PyGpuArrayObject **x_invstd, PyGpuContextObject *c) {
if (c_set_tensorNd(inp, bn_input) != 0) if (c_set_tensorNd(inp, bn_input) != 0)
return 1; return 1;
if (c_set_tensorNd(scale, bn_params) != 0) if (c_set_tensorNd(scale, bn_params) != 0)
return 1; return 1;
if (epsilon < 1e-5)
return 1;
if (theano_prep_output(outp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0) if (theano_prep_output(outp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0)
return 1; return 1;
if (theano_prep_output(x_mean, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0) if (theano_prep_output(x_mean, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0)
...@@ -48,7 +51,7 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale, ...@@ -48,7 +51,7 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
0, 0,
NULL, // running mean, deliberately unused NULL, // running mean, deliberately unused
NULL, // running var, deliberately unused NULL, // running var, deliberately unused
EPSILON, epsilon,
PyGpuArray_DEV_DATA(*x_mean), PyGpuArray_DEV_DATA(*x_mean),
PyGpuArray_DEV_DATA(*x_invstd) PyGpuArray_DEV_DATA(*x_invstd)
); );
......
...@@ -22,9 +22,9 @@ cudnnTensorDescriptor_t bn_doutput; ...@@ -22,9 +22,9 @@ cudnnTensorDescriptor_t bn_doutput;
int dnn_batchnorm_grad(PyGpuArrayObject *inp, PyGpuArrayObject *doutp, int dnn_batchnorm_grad(PyGpuArrayObject *inp, PyGpuArrayObject *doutp,
PyGpuArrayObject *scale, PyGpuArrayObject *x_mean, PyGpuArrayObject *scale, PyGpuArrayObject *x_mean,
PyGpuArrayObject *x_invstd, PyGpuArrayObject **dinp, PyGpuArrayObject *x_invstd, npy_float64 epsilon,
PyGpuArrayObject **dscale, PyGpuArrayObject **dbias, PyGpuArrayObject **dinp, PyGpuArrayObject **dscale,
PyGpuContextObject *c) { PyGpuArrayObject **dbias, PyGpuContextObject *c) {
if (c_set_tensorNd(inp, bn_input) != 0) if (c_set_tensorNd(inp, bn_input) != 0)
return 1; return 1;
if (c_set_tensorNd(doutp, bn_doutput) != 0) if (c_set_tensorNd(doutp, bn_doutput) != 0)
...@@ -32,6 +32,9 @@ int dnn_batchnorm_grad(PyGpuArrayObject *inp, PyGpuArrayObject *doutp, ...@@ -32,6 +32,9 @@ int dnn_batchnorm_grad(PyGpuArrayObject *inp, PyGpuArrayObject *doutp,
if (c_set_tensorNd(scale, bn_params) != 0) if (c_set_tensorNd(scale, bn_params) != 0)
return 1; return 1;
if (epsilon < 1e-5)
return 1;
if (theano_prep_output(dinp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0) if (theano_prep_output(dinp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0)
return 1; return 1;
if (theano_prep_output(dscale, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0) if (theano_prep_output(dscale, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0)
...@@ -79,7 +82,7 @@ int dnn_batchnorm_grad(PyGpuArrayObject *inp, PyGpuArrayObject *doutp, ...@@ -79,7 +82,7 @@ int dnn_batchnorm_grad(PyGpuArrayObject *inp, PyGpuArrayObject *doutp,
PyGpuArray_DEV_DATA(scale), PyGpuArray_DEV_DATA(scale),
PyGpuArray_DEV_DATA(*dscale), PyGpuArray_DEV_DATA(*dscale),
PyGpuArray_DEV_DATA(*dbias), PyGpuArray_DEV_DATA(*dbias),
EPSILON, epsilon,
PyGpuArray_DEV_DATA(x_mean), PyGpuArray_DEV_DATA(x_mean),
PyGpuArray_DEV_DATA(x_invstd) PyGpuArray_DEV_DATA(x_invstd)
); );
......
...@@ -2,13 +2,16 @@ ...@@ -2,13 +2,16 @@
int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale, int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
PyGpuArrayObject *bias, PyGpuArrayObject *est_mean, PyGpuArrayObject *bias, PyGpuArrayObject *est_mean,
PyGpuArrayObject *est_var, PyGpuArrayObject **outp, PyGpuArrayObject *est_var, npy_float64 epsilon,
PyGpuContextObject *c) { PyGpuArrayObject **outp, PyGpuContextObject *c) {
if (c_set_tensorNd(inp, bn_input) != 0) if (c_set_tensorNd(inp, bn_input) != 0)
return 1; return 1;
if (c_set_tensorNd(scale, bn_params) != 0) if (c_set_tensorNd(scale, bn_params) != 0)
return 1; return 1;
if (epsilon < 1e-5)
return 1;
if (theano_prep_output(outp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0) if (theano_prep_output(outp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0)
return 1; return 1;
...@@ -43,7 +46,7 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale, ...@@ -43,7 +46,7 @@ int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
PyGpuArray_DEV_DATA(bias), PyGpuArray_DEV_DATA(bias),
PyGpuArray_DEV_DATA(est_mean), PyGpuArray_DEV_DATA(est_mean),
PyGpuArray_DEV_DATA(est_var), PyGpuArray_DEV_DATA(est_var),
EPSILON epsilon
); );
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "Error during batchnorm: %s\n", PyErr_Format(PyExc_RuntimeError, "Error during batchnorm: %s\n",
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论