Merge pull request #730 from larseeri/shape_tensor_nnet

Better infer_shape test for Softmax, SoftmaxWithBias, SoftmaxGrad, CrossentropySoftmaxArgmax1HotWithBias, ConvOp, Conv3D, ConvTransp3D, ConvGrad3D Added and tested infer_shape for CrossentropySoftmax1HotWithBiasDx,Prepend_scalar_constant_to_each_row and Prepend_scalar_to_each_row, CrossentropyCategorical1HotGrad Added disabled CrossentropyCategorical1Hot.infer_shape, see gh-788

Merge pull request #730 from larseeri/shape_tensor_nnet
28a095d6 · nouiz · bc365ed0 · b0b3dafa · 28a095d6 · 28a095d6
--- a/theano/tensor/nnet/nnet.py
+++ b/theano/tensor/nnet/nnet.py
@@ -857,6 +857,9 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
            dx[i, y_idx[i]] -= dy[i]  # scalar decrement
        output_storage[0][0] = dx
+    def infer_shape(self, node, shapes):
+        return [shapes[1]]
    def grad(self, inp, grads):
        dy, sm, y_idx = inp
        g_dx, = grads
@@ -1031,8 +1034,11 @@ class CrossentropyCategorical1HotGrad(gof.Op):
        for i in xrange(len(g_y)):
            g_coding[i, true_one_of_n[i]] = -g_y[i] / coding_dist[i,
                                                        true_one_of_n[i]]
        g_coding_strg[0] = g_coding
+    def infer_shape(self, node, in_shapes):
+        return [in_shapes[1]]
 crossentropy_categorical_1hot_grad = CrossentropyCategorical1HotGrad()
@@ -1091,6 +1097,17 @@ class CrossentropyCategorical1Hot(gof.Op):
            y[i] = -numpy.log(coding[i, one_of_n[i]])
        y_out[0] = y
+#Enabling this infer_shape method make 2 tests fail:
+#theano/tensor/nnet/tests/test_nnet.py:T_CrossentropyCategorical1Hot.
+#     {test_softmax_grad_optimizations,test_softmax_grad_optimizations_vector}
+# This is caused by the local_fill_to_alloc that call broadcast_like
+# that look into the shape feature and return a Rebroadcast instead of an alloc.
+# I disable this infer_shape until we fix the optimizations or determine that
+# this is not needed anymore and we update the tests.
+        # see issue gh-788
+#    def infer_shape(self, node, in_shapes):
+#        return [(in_shapes[0][0],)]
    def grad(self, inp, grads):
        coding, one_of_n = inp
        g_y, = grads
@@ -1121,7 +1138,7 @@ def crossentropy_to_crossentropy_with_softmax_with_bias(fgraph):
                    new_nll, new_sm, new_am = crossentropy_softmax_argmax_1hot_with_bias(x, b,
                            one_of_n)
                    fgraph.replace_all_validate([(nll, new_nll), (sm, new_sm)],
-                            reason="crossentropy_to_crossentropy_with_softmax")
+                reason="crossentropy_to_crossentropy_with_softmax_with_bias")
                    return True
        return False
@@ -1645,6 +1662,11 @@ class Prepend_scalar_constant_to_each_row(gof.Op):
        out[:, 0].fill(self.val.data)
        out[:, 1:] = mat
+    def infer_shape(self, node, in_shapes):
+        shp = (in_shapes[0][0], in_shapes[0][1] + 1)
+        return [shp]
    def grad(self, inp, grads):
        mat, = inp
        goutput, = grads
@@ -1694,6 +1716,10 @@ class Prepend_scalar_to_each_row(gof.Op):
        out[:, 0].fill(val)
        out[:, 1:] = mat
+    def infer_shape(self, node, in_shapes):
+        shp = (in_shapes[1][0], in_shapes[1][1] + 1)
+        return [shp]
    def grad(self, inp, grads):
        val, mat = inp
        goutput, = grads

--- a/theano/tensor/nnet/tests/test_conv.py
+++ b/theano/tensor/nnet/tests/test_conv.py
@@ -13,10 +13,10 @@ from theano.tensor.nnet import conv
 from theano.tensor.basic import _allclose
-class TestConv2D(unittest.TestCase):
+class TestConv2D(utt.InferShapeTester):
    def setUp(self):
-        utt.seed_rng()
+        super (TestConv2D, self).setUp()
        self.input = T.dtensor4('input')
        self.filters = T.dtensor4('filters')
@@ -368,8 +368,7 @@ class TestConv2D(unittest.TestCase):
        gcc bug. So it should not crash anymore
        """
        self.validate((1, 10, 213, 129), (46, 10, 212, 1), 'valid',
-             verify_grad=False)
+                      verify_grad=False)
-        self.validate((1, 10, 213, 129), (46, 10, 212, 1), 'valid', verify_grad=False)
    def speed(self):
        n_calls = 20000
@@ -407,3 +406,100 @@ class TestConv2D(unittest.TestCase):
                        t2 = time.time()
                        print t2 - t1,
                    print
+    def test_infer_shape(self):
+    # Note: infer_shape is incomplete and thus input and filter shapes
+    # must be provided explicitly
+        def rand(*shape):
+            r = numpy.asarray(numpy.random.rand(*shape), dtype='float64')
+            return r * 2 - 1
+        adtens = T.dtensor4()
+        bdtens = T.dtensor4()
+        aivec_val = [2, 2, 3, 3]
+        bivec_val = [2, 2, 2, 2]
+        adtens_val = rand(*aivec_val)
+        bdtens_val = rand(*bivec_val)
+        self._compile_and_check([adtens, bdtens],
+                [conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
+                border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp)
+        aivec_val = [2, 2, 3, 3]
+        bivec_val = [2, 2, 2, 2]
+        adtens_val = rand(*aivec_val)
+        bdtens_val = rand(*bivec_val)
+        self._compile_and_check([adtens, bdtens],
+                [conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
+                border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
+        aivec_val = [3, 2, 8, 8]
+        bivec_val = [4, 2, 5, 5]
+        adtens_val = rand(*aivec_val)
+        bdtens_val = rand(*bivec_val)
+        self._compile_and_check([adtens, bdtens],
+                [conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
+                border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp)
+        aivec_val = [3, 2, 8, 8]
+        bivec_val = [4, 2, 5, 5]
+        adtens_val = rand(*aivec_val)
+        bdtens_val = rand(*bivec_val)
+        self._compile_and_check([adtens, bdtens],
+                [conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
+                border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
+        aivec_val = [3, 2, 7, 5]
+        bivec_val = [5, 2, 3, 2]
+        adtens_val = rand(*aivec_val)
+        bdtens_val = rand(*bivec_val)
+        self._compile_and_check([adtens, bdtens],
+                [conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
+                border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp)
+        aivec_val = [3, 2, 7, 5]
+        bivec_val = [5, 2, 3, 2]
+        adtens_val = rand(*aivec_val)
+        bdtens_val = rand(*bivec_val)
+        self._compile_and_check([adtens, bdtens],
+                [conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
+                border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
+        aivec_val = [3, 2, 7, 5]
+        bivec_val = [5, 2, 2, 3]
+        adtens_val = rand(*aivec_val)
+        bdtens_val = rand(*bivec_val)
+        self._compile_and_check([adtens, bdtens],
+                [conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
+                border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp)
+        aivec_val = [3, 2, 7, 5]
+        bivec_val = [5, 2, 2, 3]
+        adtens_val = rand(*aivec_val)
+        bdtens_val = rand(*bivec_val)
+        self._compile_and_check([adtens, bdtens],
+                [conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
+                border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
+        aivec_val = [3, 2, 3, 3]
+        bivec_val = [4, 2, 3, 3]
+        adtens_val = rand(*aivec_val)
+        bdtens_val = rand(*bivec_val)
+        self._compile_and_check([adtens, bdtens],
+                [conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
+                border_mode='valid')], [adtens_val, bdtens_val], conv.ConvOp)
+        aivec_val = [3, 2, 3, 3]
+        bivec_val = [4, 2, 3, 3]
+        adtens_val = rand(*aivec_val)
+        bdtens_val = rand(*bivec_val)
+        self._compile_and_check([adtens, bdtens],
+                [conv.conv2d(adtens, bdtens, aivec_val, bivec_val,
+                border_mode='full')], [adtens_val, bdtens_val], conv.ConvOp)
+if __name__ == '__main__':
+    t = TestConv2D('setUp')
+    t.setUp()
+    t.test_infer_shape()
--- a/theano/tensor/nnet/tests/test_conv3d.py
+++ b/theano/tensor/nnet/tests/test_conv3d.py
--- a/theano/tensor/nnet/tests/test_nnet.py
+++ b/theano/tensor/nnet/tests/test_nnet.py