add batch normalization op

292383dc · Nicolas Ballas · 88eac16c · 292383dc · 292383dc
--- a/theano/tensor/nnet/bn.py
+++ b/theano/tensor/nnet/bn.py
+import theano
+from theano.scalar import Composite
+from theano.scalar import add, sub, true_div, mul
+
+
+class BNComposite(Composite):
+
+    def __init__(self, dtype):
+        x = theano.scalar.Scalar(dtype=dtype).make_variable()
+        mean = theano.scalar.Scalar(dtype=dtype).make_variable()
+        var = theano.scalar.Scalar(dtype=dtype).make_variable()
+        gamma = theano.scalar.Scalar(dtype=dtype).make_variable()
+        beta = theano.scalar.Scalar(dtype=dtype).make_variable()
+        o = add(mul(true_div(sub(x, mean),  var), gamma), beta)
+        inputs = [x, mean, var, gamma, beta]
+        outputs= [o]
+        super(BNComposite, self).__init__(inputs, outputs)
+
+    def grad(self, inps, grads):
+        x, mean, var, gamma, beta = inps
+        top, = grads
+        dx = (top*gamma) / var
+        dmean = -(top*gamma) / var
+        dvar = -(top * gamma * (x - mean)) / (var*var)
+        dgamma = top*(x - mean) / var
+        #dbeta = top
+        return [dx, dmean, dvar, dgamma, top]
+
+
+def batch_normalization(inputs, gamma, beta, mean, variance, axis=0):
+    """
+    This function will build the symbolic graph for applying batch normalization
+    to a set of activations.
+
+    Parameters
+    ----------
+    inputs : symbolic tensor
+        Mini-batch of examples
+    gamma: symbolic vector
+        BN scale parameter, must be of same dimension that
+        the number of inputs channel
+    beta: symbolic vector
+        BN shift parameter, must be of same dimension that
+        the number of inputs channel
+    mean: symbolic tensor
+        inputs means
+    variance: symbolic tensor
+        inputs variance
+    axis: int
+        channel axis
+    """
+    elm_bn = theano.tensor.elemwise.Elemwise(scalar_op=BNComposite(dtype=inputs.dtype))
+    rval = elm_bn(inputs, mean, variance, gamma, beta)
+    return rval
+
+
+
--- a/theano/tensor/nnet/tests/test_bn.py
+++ b/theano/tensor/nnet/tests/test_bn.py
+import theano
+from theano.tests import unittest_tools as utt
+import numpy
+
+from theano.tensor.nnet.bn import batch_normalization
+
+
+def test_bn():
+
+    def bn_ref(x, G, B, M, V):
+        n = (x-M)/V
+        return n*G+B
+
+    numpy.random.seed(1234)
+    X = 1 + numpy.random.random([10, 20]).astype('float32')
+    B = 1 + numpy.random.random([20]).astype('float32')
+    G = 1 + numpy.random.random([20]).astype('float32')
+    M = 1 + numpy.random.random([20]).astype('float32')
+    V = 1 + numpy.random.random([20]).astype('float32')
+
+    x = theano.tensor.matrix('x')
+    b = theano.tensor.vector('b')
+    g = theano.tensor.vector('g')
+    m = theano.tensor.vector('m')
+    v = theano.tensor.vector('v')
+
+    bn_op = batch_normalization(x, g, b, m, v)
+    bn_ref_op = bn_ref(x, g, b, m, v)
+    f = theano.function([x, b, g, m ,v], [bn_op])
+    f_ref = theano.function([x, b, g, m ,v], [bn_ref_op])
+    res = f(X, G, B, M, V)
+    res_ref = f_ref(X, G, B, M, V)
+    utt.assert_allclose(res_ref, res)
+    utt.verify_grad(batch_normalization, [X, G, B, M, V])
+
+    bn_op = batch_normalization(x, g, b, x.mean(axis=0, keepdims=True), x.var(axis=0, keepdims=True))
+    bn_ref_op = bn_ref(x, g, b, x.mean(axis=0, keepdims=True), x.var(axis=0, keepdims=True))
+    f = theano.function([x, b, g], [bn_op])
+    f_ref = theano.function([x, b, g], [bn_ref_op])
+    res = f(X, G, B)
+    res_ref = f_ref(X, G, B)
+    utt.assert_allclose(res_ref, res)
+    utt.verify_grad(batch_normalization, [X, G, B, X.mean(axis=0, keepdims=True), X.var(axis=0, keepdims=True)])
+
+
+def test_bn_feature_maps():
+
+    def bn_ref(x, G, B, M, V):
+        n = (x-M)/V
+        return n*G+B
+
+    numpy.random.seed(1234)
+    X = 1 + numpy.random.random([10, 20, 4, 4]).astype('float32')
+    B = 1 + numpy.random.random([20]).astype('float32')
+    G = 1 + numpy.random.random([20]).astype('float32')
+    M = 1 + numpy.random.random([20]).astype('float32')
+    V = 1 + numpy.random.random([20]).astype('float32')
+
+    x = theano.tensor.tensor4('x')
+    b = theano.tensor.vector('b')
+    g = theano.tensor.vector('g')
+    m = theano.tensor.vector('m')
+    v = theano.tensor.vector('v')
+
+    ### Provide mean/var
+    bn_op = batch_normalization(x,
+                                g.dimshuffle('x', 0, 'x', 'x'),
+                                b.dimshuffle('x', 0, 'x', 'x'),
+                                m.dimshuffle('x', 0, 'x', 'x'),
+                                v.dimshuffle('x', 0, 'x', 'x'), axis=1)
+    bn_ref_op = bn_ref(x,
+                       g.dimshuffle('x', 0, 'x', 'x'),
+                       b.dimshuffle('x', 0, 'x', 'x'),
+                       m.dimshuffle('x', 0, 'x', 'x'),
+                       v.dimshuffle('x', 0, 'x', 'x'))
+    f = theano.function([x, b, g, m ,v], [bn_op])
+    f_ref = theano.function([x, b, g, m ,v], [bn_ref_op])
+    res = f(X, G, B, M, V)
+    res_ref = f_ref(X, G, B, M, V)
+    utt.assert_allclose(res_ref, res)
+
+    def conv_bn(inputs, gamma, beta, mean, variance):
+        return batch_normalization(inputs,
+                                   gamma.dimshuffle('x', 0, 'x', 'x'),
+                                   beta.dimshuffle('x', 0, 'x', 'x'),
+                                   mean.dimshuffle('x', 0, 'x', 'x'),
+                                   variance.dimshuffle('x', 0, 'x', 'x'),
+                                   axis=1)
+    utt.verify_grad(conv_bn, [X, G, B, M, V])
+