提交 36de8dd2 authored 作者: Nicolas Ballas's avatar Nicolas Ballas

Minor updates

上级 6ad3fded
......@@ -8,49 +8,49 @@ class BNComposite(Composite):
def __init__(self, dtype):
x = theano.scalar.Scalar(dtype=dtype).make_variable()
mean = theano.scalar.Scalar(dtype=dtype).make_variable()
var = theano.scalar.Scalar(dtype=dtype).make_variable()
std = theano.scalar.Scalar(dtype=dtype).make_variable()
gamma = theano.scalar.Scalar(dtype=dtype).make_variable()
beta = theano.scalar.Scalar(dtype=dtype).make_variable()
o = add(mul(true_div(sub(x, mean), var), gamma), beta)
inputs = [x, mean, var, gamma, beta]
outputs= [o]
o = add(mul(true_div(sub(x, mean), std), gamma), beta)
inputs = [x, mean, std, gamma, beta]
outputs = [o]
super(BNComposite, self).__init__(inputs, outputs)
def grad(self, inps, grads):
x, mean, var, gamma, beta = inps
x, mean, std, gamma, beta = inps
top, = grads
dx = (top*gamma) / var
dmean = -(top*gamma) / var
dvar = -(top * gamma * (x - mean)) / (var*var)
dgamma = top*(x - mean) / var
return [dx, dmean, dvar, dgamma, top]
dx = (top * gamma) / std
dmean = -(top * gamma) / std
dstd = -(top * gamma * (x - mean)) / (std * std)
dgamma = top * (x - mean) / std
return [dx, dmean, dstd, dgamma, top]
def batch_normalization(inputs, gamma, beta, mean, variance, axis=0):
def batch_normalization(inputs, gamma, beta, mean, std):
"""
This function will build the symbolic graph for applying batch normalization
to a set of activations.
to a set of activations. As no intermediate representations are stored for the
back-propagation, this implementation lower the memory usage, however,
it is 5-10% slower than a naive theano implementation, as it redo
some foward computations for the backprop.
Parameters
----------
inputs : symbolic tensor
Mini-batch of examples
gamma: symbolic vector
BN scale parameter, must be of same dimension that
the number of inputs channel
beta: symbolic vector
BN shift parameter, must be of same dimension that
the number of inputs channel
Mini-batch of activations
gamma: symbolic tensor
BN scale parameter, must be of same dimensionality as
inputs and broadcastable against it
beta: symbolic tensor
BN shift parameter, must be of same dimensionality as
inputs and broadcastable against it
mean: symbolic tensor
inputs means
variance: symbolic tensor
inputs variance
axis: int
channel axis
inputs means, must be of same dimensionality as
inputs and broadcastable against it
std: symbolic tensor
inputs standard deviation, must be of same dimensionality as
inputs and broadcastable against it
"""
elm_bn = theano.tensor.elemwise.Elemwise(scalar_op=BNComposite(dtype=inputs.dtype))
rval = elm_bn(inputs, mean, variance, gamma, beta)
rval = elm_bn(inputs, mean, std, gamma, beta)
return rval
......@@ -8,8 +8,8 @@ from theano.tensor.nnet.bn import batch_normalization
def test_bn():
def bn_ref(x, G, B, M, V):
n = (x-M)/V
return n*G+B
n = (x - M) / V
return n * G + B
numpy.random.seed(1234)
X = 1 + numpy.random.random([10, 20]).astype('float32')
......@@ -26,28 +26,28 @@ def test_bn():
bn_op = batch_normalization(x, g, b, m, v)
bn_ref_op = bn_ref(x, g, b, m, v)
f = theano.function([x, b, g, m ,v], [bn_op])
f_ref = theano.function([x, b, g, m ,v], [bn_ref_op])
f = theano.function([x, b, g, m, v], [bn_op])
f_ref = theano.function([x, b, g, m, v], [bn_ref_op])
res = f(X, G, B, M, V)
res_ref = f_ref(X, G, B, M, V)
utt.assert_allclose(res_ref, res)
utt.verify_grad(batch_normalization, [X, G, B, M, V])
bn_op = batch_normalization(x, g, b, x.mean(axis=0, keepdims=True), x.var(axis=0, keepdims=True))
bn_op = batch_normalization(x, g, b, x.mean(axis=0, keepdims=True), x.std(axis=0, keepdims=True))
bn_ref_op = bn_ref(x, g, b, x.mean(axis=0, keepdims=True), x.var(axis=0, keepdims=True))
f = theano.function([x, b, g], [bn_op])
f_ref = theano.function([x, b, g], [bn_ref_op])
res = f(X, G, B)
res_ref = f_ref(X, G, B)
utt.assert_allclose(res_ref, res)
utt.verify_grad(batch_normalization, [X, G, B, X.mean(axis=0, keepdims=True), X.var(axis=0, keepdims=True)])
utt.verify_grad(batch_normalization, [X, G, B, X.mean(axis=0, keepdims=True), X.std(axis=0, keepdims=True)])
def test_bn_feature_maps():
def bn_ref(x, G, B, M, V):
n = (x-M)/V
return n*G+B
n = (x - M) / V
return n * G + B
numpy.random.seed(1234)
X = 1 + numpy.random.random([10, 20, 4, 4]).astype('float32')
......@@ -62,7 +62,6 @@ def test_bn_feature_maps():
m = theano.tensor.vector('m')
v = theano.tensor.vector('v')
### Provide mean/var
bn_op = batch_normalization(x,
g.dimshuffle('x', 0, 'x', 'x'),
b.dimshuffle('x', 0, 'x', 'x'),
......@@ -73,8 +72,8 @@ def test_bn_feature_maps():
b.dimshuffle('x', 0, 'x', 'x'),
m.dimshuffle('x', 0, 'x', 'x'),
v.dimshuffle('x', 0, 'x', 'x'))
f = theano.function([x, b, g, m ,v], [bn_op])
f_ref = theano.function([x, b, g, m ,v], [bn_ref_op])
f = theano.function([x, b, g, m, v], [bn_op])
f_ref = theano.function([x, b, g, m, v], [bn_ref_op])
res = f(X, G, B, M, V)
res_ref = f_ref(X, G, B, M, V)
utt.assert_allclose(res_ref, res)
......@@ -87,4 +86,3 @@ def test_bn_feature_maps():
variance.dimshuffle('x', 0, 'x', 'x'),
axis=1)
utt.verify_grad(conv_bn, [X, G, B, M, V])
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论