Merge pull request #446 from lamblin/fix_do_constant_folding

Do not constant-fold Alloc if it is the output

Merge pull request #446 from lamblin/fix_do_constant_folding
06aedd59 · Olivier Delalleau · 1fc6b692 · 0976b0e6 · 06aedd59 · 06aedd59
--- a/theano/sandbox/cuda/basic_ops.py
+++ b/theano/sandbox/cuda/basic_ops.py
@@ -2005,14 +2005,19 @@ class GpuAlloc(Op):
        return (3,)

    def do_constant_folding(self, node):
-        if any([isinstance(client[0].op, (
+        for client in node.outputs[0].clients:
+            if client[0] == 'output':
+                # If the output is a constant, it will have to be deepcopied
+                # each time the function is called.  So we do not fold.
+                return False
+            elif (not isinstance(client[0], basestring)
+                    and isinstance(client[0].op, (
                        tensor.IncSubtensor,
                        tensor.AdvancedIncSubtensor1,
                        GpuIncSubtensor,
                        GpuAdvancedIncSubtensor1
-                        ))
-                for client in node.outputs[0].clients]):
-            return False
+                        ))):
+                return False
        return True

 gpu_alloc = GpuAlloc()

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -2617,12 +2617,18 @@ class Alloc(gof.Op):
        return self.make_node(eval_points[0], *inputs[1:]).outputs

    def do_constant_folding(self, node):
-        if python_any([isinstance(client[0].op, (IncSubtensor,
-                                                 AdvancedIncSubtensor1,
-                                                 AdvancedIncSubtensor,
-                                                 ))
-                       for client in node.outputs[0].clients]):
-            return False
+        for client in node.outputs[0].clients:
+            if client[0] == 'output':
+                # If the output is a constant, it will have to be deepcopied
+                # each time the function is called.  So we do not fold.
+                return False
+            elif (not isinstance(client[0], basestring)
+                    and isinstance(client[0].op, (
+                        IncSubtensor,
+                        AdvancedIncSubtensor1,
+                        AdvancedIncSubtensor,
+                        ))):
+                return False
        return True



--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -1278,8 +1278,11 @@ class TestAlloc(unittest.TestCase):
    shared = staticmethod(theano.shared)
    allocs = [tensor.Alloc] * 3

+    def setUp(self):
+        self.rng = numpy.random.RandomState(seed=utt.fetch_seed())
+
    def test_alloc_constant_folding(self):
-        test_params = numpy.asarray(numpy.random.randn(50 * 60),
+        test_params = numpy.asarray(self.rng.randn(50 * 60),
                                    self.dtype)

        some_vector = vector('some_vector', dtype=self.dtype)
@@ -1312,6 +1315,20 @@ class TestAlloc(unittest.TestCase):
            fobj(test_params)
            fgrad(test_params)

+    def test_alloc_output(self):
+        val = tensor.constant(self.rng.randn(1,1), dtype=self.dtype)
+        for alloc in self.allocs:
+            # The output is the result of the alloc operation,
+            # we do not want it to be constant-folded
+            out = alloc()(val, 50, 60)
+
+            f = theano.function([], out)
+            topo = f.maker.env.toposort()
+            assert numpy.sum([isinstance(node.op, alloc)
+                              for node in topo]) == 1
+            assert not isinstance(topo[0].op,
+                    theano.compile.function_module.DeepCopyOp)
+

 def test_eye():
    def check(dtype, N, M_=None, k=0):