提交 292383dc authored 作者: Nicolas Ballas's avatar Nicolas Ballas

add batch normalization op

上级 88eac16c
import theano
from theano.scalar import Composite
from theano.scalar import add, sub, true_div, mul
class BNComposite(Composite):
def __init__(self, dtype):
x = theano.scalar.Scalar(dtype=dtype).make_variable()
mean = theano.scalar.Scalar(dtype=dtype).make_variable()
var = theano.scalar.Scalar(dtype=dtype).make_variable()
gamma = theano.scalar.Scalar(dtype=dtype).make_variable()
beta = theano.scalar.Scalar(dtype=dtype).make_variable()
o = add(mul(true_div(sub(x, mean), var), gamma), beta)
inputs = [x, mean, var, gamma, beta]
outputs= [o]
super(BNComposite, self).__init__(inputs, outputs)
def grad(self, inps, grads):
x, mean, var, gamma, beta = inps
top, = grads
dx = (top*gamma) / var
dmean = -(top*gamma) / var
dvar = -(top * gamma * (x - mean)) / (var*var)
dgamma = top*(x - mean) / var
#dbeta = top
return [dx, dmean, dvar, dgamma, top]
def batch_normalization(inputs, gamma, beta, mean, variance, axis=0):
"""
This function will build the symbolic graph for applying batch normalization
to a set of activations.
Parameters
----------
inputs : symbolic tensor
Mini-batch of examples
gamma: symbolic vector
BN scale parameter, must be of same dimension that
the number of inputs channel
beta: symbolic vector
BN shift parameter, must be of same dimension that
the number of inputs channel
mean: symbolic tensor
inputs means
variance: symbolic tensor
inputs variance
axis: int
channel axis
"""
elm_bn = theano.tensor.elemwise.Elemwise(scalar_op=BNComposite(dtype=inputs.dtype))
rval = elm_bn(inputs, mean, variance, gamma, beta)
return rval
import theano
from theano.tests import unittest_tools as utt
import numpy
from theano.tensor.nnet.bn import batch_normalization
def test_bn():
def bn_ref(x, G, B, M, V):
n = (x-M)/V
return n*G+B
numpy.random.seed(1234)
X = 1 + numpy.random.random([10, 20]).astype('float32')
B = 1 + numpy.random.random([20]).astype('float32')
G = 1 + numpy.random.random([20]).astype('float32')
M = 1 + numpy.random.random([20]).astype('float32')
V = 1 + numpy.random.random([20]).astype('float32')
x = theano.tensor.matrix('x')
b = theano.tensor.vector('b')
g = theano.tensor.vector('g')
m = theano.tensor.vector('m')
v = theano.tensor.vector('v')
bn_op = batch_normalization(x, g, b, m, v)
bn_ref_op = bn_ref(x, g, b, m, v)
f = theano.function([x, b, g, m ,v], [bn_op])
f_ref = theano.function([x, b, g, m ,v], [bn_ref_op])
res = f(X, G, B, M, V)
res_ref = f_ref(X, G, B, M, V)
utt.assert_allclose(res_ref, res)
utt.verify_grad(batch_normalization, [X, G, B, M, V])
bn_op = batch_normalization(x, g, b, x.mean(axis=0, keepdims=True), x.var(axis=0, keepdims=True))
bn_ref_op = bn_ref(x, g, b, x.mean(axis=0, keepdims=True), x.var(axis=0, keepdims=True))
f = theano.function([x, b, g], [bn_op])
f_ref = theano.function([x, b, g], [bn_ref_op])
res = f(X, G, B)
res_ref = f_ref(X, G, B)
utt.assert_allclose(res_ref, res)
utt.verify_grad(batch_normalization, [X, G, B, X.mean(axis=0, keepdims=True), X.var(axis=0, keepdims=True)])
def test_bn_feature_maps():
def bn_ref(x, G, B, M, V):
n = (x-M)/V
return n*G+B
numpy.random.seed(1234)
X = 1 + numpy.random.random([10, 20, 4, 4]).astype('float32')
B = 1 + numpy.random.random([20]).astype('float32')
G = 1 + numpy.random.random([20]).astype('float32')
M = 1 + numpy.random.random([20]).astype('float32')
V = 1 + numpy.random.random([20]).astype('float32')
x = theano.tensor.tensor4('x')
b = theano.tensor.vector('b')
g = theano.tensor.vector('g')
m = theano.tensor.vector('m')
v = theano.tensor.vector('v')
### Provide mean/var
bn_op = batch_normalization(x,
g.dimshuffle('x', 0, 'x', 'x'),
b.dimshuffle('x', 0, 'x', 'x'),
m.dimshuffle('x', 0, 'x', 'x'),
v.dimshuffle('x', 0, 'x', 'x'), axis=1)
bn_ref_op = bn_ref(x,
g.dimshuffle('x', 0, 'x', 'x'),
b.dimshuffle('x', 0, 'x', 'x'),
m.dimshuffle('x', 0, 'x', 'x'),
v.dimshuffle('x', 0, 'x', 'x'))
f = theano.function([x, b, g, m ,v], [bn_op])
f_ref = theano.function([x, b, g, m ,v], [bn_ref_op])
res = f(X, G, B, M, V)
res_ref = f_ref(X, G, B, M, V)
utt.assert_allclose(res_ref, res)
def conv_bn(inputs, gamma, beta, mean, variance):
return batch_normalization(inputs,
gamma.dimshuffle('x', 0, 'x', 'x'),
beta.dimshuffle('x', 0, 'x', 'x'),
mean.dimshuffle('x', 0, 'x', 'x'),
variance.dimshuffle('x', 0, 'x', 'x'),
axis=1)
utt.verify_grad(conv_bn, [X, G, B, M, V])
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论