Merge pull request #3521 from carriepl/scan_speedup_cgt

Scan replace Allocs with AllocEmpty

Merge pull request #3521 from carriepl/scan_speedup_cgt
ad4e2a09 · Frédéric Bastien · 9c19d300 · 6d21a3f8 · ad4e2a09 · ad4e2a09
--- a/theano/scan_module/scan.py
+++ b/theano/scan_module/scan.py
@@ -627,7 +627,7 @@ def scan(fn,
            # the initial state over. We do this using the expand function
            # defined in scan utils
            sit_sot_scan_inputs.append(
-                scan_utils.expand(
+                scan_utils.expand_empty(
                    tensor.unbroadcast(
                        tensor.shape_padleft(actual_arg), 0),
                    actual_n_steps
@@ -653,7 +653,7 @@ def scan(fn,
            idx_offset = abs(numpy.min(init_out['taps']))
            # Sequence
            mit_sot_scan_inputs.append(
-                scan_utils.expand(init_out['initial'][:mintap],
+                scan_utils.expand_empty(init_out['initial'][:mintap],
                                        actual_n_steps))

            if i in return_steps:
@@ -866,7 +866,7 @@ def scan(fn,
            if isinstance(new_var.type, ops.expandable_types):
                sit_sot_inner_inputs.append(new_var)
                sit_sot_scan_inputs.append(
-                    scan_utils.expand(
+                    scan_utils.expand_empty(
                        tensor.unbroadcast(
                            tensor.shape_padleft(input.variable), 0),
                        actual_n_steps))

--- a/theano/scan_module/scan_opt.py
+++ b/theano/scan_module/scan_opt.py
@@ -1499,7 +1499,7 @@ class ScanSaveMem(gof.Optimizer):
                                                             tmp_idx)
                            tmp = pre_constant_merge([tmp])[0]

-                            nw_input = scan_utils.expand(_nw_input, tmp)
+                            nw_input = scan_utils.expand_empty(_nw_input, tmp)
                        else:
                            tmp = tensor.as_tensor_variable(val)
                            initl = tensor.as_tensor_variable(init_l[i])
@@ -1550,7 +1550,7 @@ class ScanSaveMem(gof.Optimizer):
                                     nw_inputs[in_idx].owner.op.idx_list[0],
                                     slice))):
                                _nw_input = nw_inputs[in_idx].owner.inputs[1]
-                                nw_input = scan_utils.expand(_nw_input,
+                                nw_input = scan_utils.expand_empty(_nw_input,
                                                                   nw_steps)
                                nw_inputs[in_idx] = nw_input
                            else:

--- a/theano/scan_module/scan_utils.py
+++ b/theano/scan_module/scan_utils.py
@@ -607,19 +607,18 @@ def isNaN_or_Inf_or_None(x):
    return isNone or isNaN or isInf or isStr


-def expand(tensor_var, size):
+def expand_empty(tensor_var, size):
    """
-    Transoforms the shape of a tensor from (d1, d2 ... ) to ( d1+size, d2, ..)
-    by adding 0s at the end of the tensor.
+    Transforms the shape of a tensor from (d1, d2 ... ) to ( d1+size, d2, ..)
+    by adding uninitialized memory at the end of the tensor.

    """
-    # Corner case that I might use in an optimization
+
    if size == 0:
        return tensor_var
    shapes = [tensor_var.shape[x] for x in xrange(tensor_var.ndim)]
-    zeros_shape = [size + shapes[0]] + shapes[1:]
-    empty = tensor.zeros(zeros_shape,
-                         dtype=tensor_var.dtype)
+    new_shape = [size + shapes[0]] + shapes[1:]
+    empty = tensor.AllocEmpty(tensor_var.dtype)(*new_shape)

    return tensor.set_subtensor(empty[:shapes[0]], tensor_var)


--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -6099,3 +6099,12 @@ class AllocEmpty(gof.Op):

    def do_constant_folding(self, node):
        return False
+
+    def connection_pattern(self, node):
+        return [[False] for i in node.inputs]
+
+    def grad(self, inputs, grads):
+        return [DisconnectedType()() for i in inputs]
+
+    def R_op(self, inputs, eval_points):
+        return [zeros(inputs, self.dtype)]
--- a/theano/tests/test_printing.py
+++ b/theano/tests/test_printing.py