Merge pull request #4388 from mohammadpz/useless_alloc

Useless alloc

Merge pull request #4388 from mohammadpz/useless_alloc
1035c776 · Pascal Lamblin · GitHub · 9ea0f3f2 · 818e6c22 · 1035c776
--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -47,7 +47,8 @@ from theano.tensor.type import (values_eq_approx_remove_inf,
 from theano.gof.opt import (Optimizer, pre_constant_merge,
                            pre_greedy_local_optimizer)
 from theano.gof import toolbox
-from theano.tensor.basic import get_scalar_constant_value, ShapeError, NotScalarConstantError
+from theano.tensor.basic import (Alloc, get_scalar_constant_value, ShapeError,
+                                 extract_constant, NotScalarConstantError)
 from six import StringIO
 _logger = logging.getLogger('theano.tensor.opt')
@@ -1746,10 +1747,32 @@ def local_useless_alloc(node):
    of the input. This is not needed.
    """
-    if node.op == T.alloc:
+    op = node.op
-        if node.inputs[0].type == node.outputs[0].type:
+    if not isinstance(op, Alloc):
-            # We don't need to copy over any stack traces here
+        return False
-            return [node.inputs[0]]
+    input = node.inputs[0]
+    output = node.outputs[0]
+    # Check if dtype and broadcast remain the same.
+    if input.type == output.type:
+        # We don't need to copy over any stack traces here
+        return [input]
+    # Check if alloc adds a broadcastable dimension with shape 1.
+    output_shape = node.inputs[1:]
+    num_dims_with_size_1_added_to_left = 0
+    for i in range(len(output_shape) - input.ndim):
+        if extract_constant(output_shape[i], only_process_constants=True) == 1:
+            num_dims_with_size_1_added_to_left += 1
+        else:
+            break
+    new_output_shape = output_shape[num_dims_with_size_1_added_to_left:]
+    if num_dims_with_size_1_added_to_left > 0 and len(new_output_shape) >= input.ndim:
+        inner = op(*([input] + new_output_shape))
+        dimshuffle_new_order = (['x'] * num_dims_with_size_1_added_to_left +
+                                list(xrange(len(new_output_shape))))
+        return [DimShuffle(inner.type.broadcastable, dimshuffle_new_order)(inner)]
 # Don't register by default.

--- a/theano/tensor/tests/test_opt.py
+++ b/theano/tensor/tests/test_opt.py
@@ -21,6 +21,7 @@ import theano.scalar as scal
 from six import PY3, StringIO
 from theano import compile
 from theano.compile import deep_copy_op, DeepCopyOp
+from theano.compile import get_mode
 from theano import config
 from theano import function
 from theano import gof
@@ -31,6 +32,7 @@ import theano.tensor.opt as opt
 from theano.tensor.opt import (
        local_add_specialize,
        local_dimshuffle_lift,
+        local_useless_alloc,
        local_greedy_distributor,
        mul_canonizer,
        out2in,
@@ -3089,8 +3091,8 @@ class Test_local_elemwise_alloc(unittest.TestCase):
    dtype = config.floatX
    def setUp(self):
-        self.fast_compile_mode = 'FAST_COMPILE'
+        self.fast_compile_mode = get_mode('FAST_COMPILE')
-        self.fast_run_mode = 'FAST_RUN'
+        self.fast_run_mode = get_mode('FAST_RUN')
        self.vec = T.vector('vec', dtype=self.dtype)
        self.mat = T.matrix('mat', dtype=self.dtype)
@@ -3130,6 +3132,10 @@ class Test_local_elemwise_alloc(unittest.TestCase):
        )
    def test_remove_alloc_wo_dimshuffle(self):
+        # Exclude local_useless_alloc, since it does not introduce
+        # assert in all the same cases.
+        self.fast_run_mode = self.fast_run_mode.excluding(
+            'local_useless_alloc')
        # No optimization on alloc
        func = function(
            [self.vec, self.mat],
@@ -3672,6 +3678,57 @@ class Test_local_useless_alloc(unittest.TestCase):
        assert tensor.Alloc in op_classes
        # The correct opt removes nodes, no need for check_stack_trace
+    def test_useless_alloc_with_shape_one(self):
+        alloc_lift = out2in(local_useless_alloc)
+        x = shared(self.rng.randn(2,))
+        y = shared(self.rng.randn())
+        z = shared(self.rng.randn(1, 1))
+        w = shared(self.rng.randn(1, 1))
+        alloc_x = tensor.alloc(x, 1, 3, 2)
+        alloc_y = tensor.alloc(y, 1, 1)
+        alloc_z = tensor.alloc(z, 1, 1, 2)
+        alloc_w = tensor.alloc(w, 1, 2)
+        g = FunctionGraph([x, y, z, w], [alloc_x, alloc_y, alloc_z, alloc_w])
+        self.assertTrue(str(g) == ("[Alloc(<TensorType(float64, vector)>, "
+                                   "TensorConstant{1}, "
+                                   "TensorConstant{3}, "
+                                   "TensorConstant{2}), "
+                                   "Alloc(<TensorType(float64, scalar)>, "
+                                   "TensorConstant{1}, "
+                                   "TensorConstant{1}), "
+                                   "Alloc(<TensorType(float64, matrix)>, "
+                                   "TensorConstant{1}, "
+                                   "TensorConstant{1}, "
+                                   "TensorConstant{2}), "
+                                   "Alloc(<TensorType(float64, matrix)>, "
+                                   "TensorConstant{1}, "
+                                   "TensorConstant{2})]"))
+        alloc_lift.optimize(g)
+        self.assertTrue(str(g) == "[DimShuffle{x,0,1}"
+                                  "(Alloc(<TensorType(float64, vector)>, "
+                                  "TensorConstant{3}, "
+                                  "TensorConstant{2})), "
+                                  "DimShuffle{x,x}"
+                                  "(<TensorType(float64, scalar)>), "
+                                  "DimShuffle{x,0,1}"
+                                  "(Alloc(<TensorType(float64, matrix)>, "
+                                  "TensorConstant{1}, "
+                                  "TensorConstant{2})), "
+                                  "Alloc(<TensorType(float64, matrix)>, "
+                                  "TensorConstant{1}, "
+                                  "TensorConstant{2})]")
+        # Check stacktrace was copied over correctly after opt was applied
+        self.assertTrue(check_stack_trace(g, ops_to_check='all'))
 class Test_local_useless_inc_subtensor_alloc(unittest.TestCase):
    opt_name = 'local_useless_inc_subtensor_alloc'