Merge pull request #3536 from nouiz/mixed5

[ENH] Speed up bn high_mem, infer_shape, trac more stack trace.

Merge pull request #3536 from nouiz/mixed5
9a811974 · Pascal Lamblin · 35d1fa4d · bd8a32e0 · 9a811974 · 9a811974
--- a/theano/configdefaults.py
+++ b/theano/configdefaults.py
@@ -449,10 +449,14 @@ AddConfigVar(
 AddConfigVar(
    'traceback.limit',
    "The number of stack to trace. -1 mean all.",
-    # We default to 6 to be able to know where v1 + v2 is created in the
+    # We default to a number to be able to know where v1 + v2 is created in the
    # user script. The bigger this number is, the more run time it takes.
-    # We need to default to 7 to support theano.tensor.tensor(...).
+    # We need to default to 8 to support theano.tensor.tensor(...).
-    IntParam(7),
+    # import theano, numpy
+    # X = theano.tensor.matrix()
+    # y = X.reshape((5,3,1))
+    # assert y.tag.trace
+    IntParam(8),
    in_c_key=False)
 AddConfigVar('experimental.mrg',

--- a/theano/tensor/nnet/bn.py
+++ b/theano/tensor/nnet/bn.py
@@ -66,8 +66,7 @@ def batch_normalization(inputs, gamma, beta, mean, std,
        elm_bn = theano.tensor.elemwise.Elemwise(scalar_op=BNComposite(dtype=inputs.dtype))
        rval = elm_bn(inputs, mean, std, gamma, beta)
    elif mode == 'high_mem':
-        rval = (inputs - mean) / std
+        rval = (inputs - mean) * (gamma / std) + beta
-        rval = rval * gamma + beta
    else:
        raise ValueError(
            'mode must be either "low_mem", "high_mem"')

--- a/theano/tensor/nnet/nnet.py
+++ b/theano/tensor/nnet/nnet.py
@@ -1248,16 +1248,8 @@ class CrossentropyCategorical1Hot(gof.Op):
            y[i] = -numpy.log(coding[i, one_of_n[i]])
        y_out[0] = y
-# Enabling this infer_shape method make 2 tests fail:
+    def infer_shape(self, node, in_shapes):
-# theano/tensor/nnet/tests/test_nnet.py:T_CrossentropyCategorical1Hot.
+        return [(in_shapes[0][0],)]
-#     {test_softmax_grad_optimizations,test_softmax_grad_optimizations_vector}
-# This is caused by the local_fill_to_alloc that call broadcast_like
-# that look into the shape feature and return a Rebroadcast instead of an alloc.
-# I disable this infer_shape until we fix the optimizations or determine that
-# this is not needed anymore and we update the tests.
-        # see issue gh-788
-#    def infer_shape(self, node, in_shapes):
-#        return [(in_shapes[0][0],)]
    def grad(self, inp, grads):
        coding, one_of_n = inp

--- a/theano/tensor/nnet/tests/test_nnet.py
+++ b/theano/tensor/nnet/tests/test_nnet.py
@@ -380,8 +380,7 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
        tensor.verify_grad(oplike, [x_val], rng=numpy.random)
-        # see issue gh-788
+    def test_infer_shape(self):
-    def est_infer_shape(self):
        admat = matrix()
        alvec = lvector()
        rng = numpy.random.RandomState(utt.fetch_seed())
@@ -535,8 +534,6 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
        # for node in fgraph.toposort():
        #    print node.op, node.inputs
-        # the function has 9 ops because the dimshuffle and lemwise{second}
-        # aren't getting cleaned up as well as we'd like.
        has_cx1hot = False
        has_cx1hotdx = False
        has_softmax = False
@@ -550,9 +547,9 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
                has_softmax = True
            if node.op == softmax_grad:
                has_softmaxdx = True
-        assert has_cx1hot
+        assert not has_cx1hot
        assert has_cx1hotdx
-        assert not has_softmax
+        assert has_softmax
        assert not has_softmaxdx
    def test_softmax_grad_optimizations_vector(self):
@@ -577,8 +574,6 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
        # for node in fgraph.toposort():
        #    print node.op, node.inputs
-        # the function has 9 ops because the dimshuffle and elemwise{second}
-        # aren't getting cleaned up as well as we'd like.
        has_cx1hot = False
        has_cx1hotdx = False
        has_softmax = False
@@ -592,9 +587,9 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
                has_softmax = True
            if node.op == softmax_grad:
                has_softmaxdx = True
-        assert has_cx1hot
+        assert not has_cx1hot
        assert has_cx1hotdx
-        assert not has_softmax
+        assert has_softmax
        assert not has_softmaxdx
    def test_get_rid_of_advanced_indexing_version_of_xent(self):
@@ -1129,10 +1124,10 @@ class T_CrossentropyCategorical1Hot(utt.InferShapeTester):
 def test_argmax_pushdown():
    x = tensor.matrix()
-    for softmax in [softmax_graph, softmax_op]:
+    for sm in [softmax_graph, softmax_op]:
        # test that the max_and_argmax is pushed down if the max is not used
        out = tensor.max_and_argmax(
-                softmax(tensor.exp(tensor.tanh(sigmoid(x)))),
+                sm(tensor.exp(tensor.tanh(sigmoid(x)))),
                axis=-1)[1]
        fgraph = gof.FunctionGraph(
                [x],
@@ -1149,7 +1144,7 @@ def test_argmax_pushdown():
        x = tensor.matrix()
        # test that the max_and_argmax is not pushed down if the max is used
        out = tensor.max_and_argmax(
-                softmax(tensor.exp(tensor.tanh(sigmoid(x)))),
+                sm(tensor.exp(tensor.tanh(sigmoid(x)))),
                axis=-1)[0]
        fgraph = gof.FunctionGraph(
                [x],
@@ -1425,12 +1420,12 @@ def test_relu():
    X = rng.randn(20, 30).astype(config.floatX)
    # test the base case, without custom alpha value
-    y = theano.tensor.nnet.relu(x).eval({x: X})
+    y = relu(x).eval({x: X})
    assert numpy.allclose(y, numpy.maximum(X, 0))
    # test for different constant alpha values (also outside of [0, 1])
    for alpha in 0, 0.3, 1, 2, -0.3, -1, -2:
-        y = theano.tensor.nnet.relu(x, alpha).eval({x: X})
+        y = relu(x, alpha).eval({x: X})
        assert numpy.allclose(y, numpy.where(X > 0, X, alpha * X))
    # test for variable alpha (scalar, vector and matrix)
@@ -1438,7 +1433,7 @@ def test_relu():
        # create value for alpha (correct ndim and broadcastable against X)
        A = numpy.array(rng.randn(*X.shape[::-1][:alpha.ndim][::-1]),
                        dtype=config.floatX)
-        y = theano.tensor.nnet.relu(x, alpha).eval({x: X, alpha: A})
+        y = relu(x, alpha).eval({x: X, alpha: A})
        assert numpy.allclose(y, numpy.where(X > 0, X, A * X), rtol=3e-5)