Merge pull request #5533 from nouiz/scan

Scan compile/grad speed up

Merge pull request #5533 from nouiz/scan
88c6d32a · Frédéric Bastien · GitHub · caf483fe · a17c6ee0 · 88c6d32a
--- a/theano/scan_module/scan_op.py
+++ b/theano/scan_module/scan_op.py
@@ -207,10 +207,10 @@ class Scan(PureOp):
        if self.info['gpu'] or self.info['gpua']:
            self._hash_inner_graph = self.info['gpu_hash']
        else:
-            tmp_in, tmp_out = scan_utils.reconstruct_graph(self.inputs,
+            # Do the missing inputs check here to have the error early.
-                                                           self.outputs)
+            for var in theano.gof.graph.inputs(self.outputs, self.inputs):
-            # This is actually required for the line just after.
+                if var not in self.inputs and not isinstance(var, theano.Constant):
-            gof.FunctionGraph(tmp_in, tmp_out, clone=False)
+                    raise theano.gof.MissingInputError("ScanOp is missing an input.")
            self._cmodule_key = gof.CLinker().cmodule_key_variables(self.inputs,
                                                                    self.outputs,
                                                                    [])
@@ -1987,10 +1987,8 @@ class Scan(PureOp):
        if self.truncate_gradient != -1:
            grad_steps = tensor.minimum(grad_steps, self.truncate_gradient)
-        rval = scan_utils.reconstruct_graph(self.inputs,
+        self_inputs = self.inputs
-                                            self.outputs)
+        self_outputs = self.outputs
-        self_inputs = rval[0]
-        self_outputs = rval[1]
        # differentiable inputs
        diff_inputs = (self.inner_seqs(self_inputs) +
                       self.inner_mitmot(self_inputs) +
@@ -2649,13 +2647,13 @@ class Scan(PureOp):
        return gradients
    def R_op(self, inputs, eval_points):
-        # Step 0. Don't work on the orignal tensor variables
+        # Step 0. Prepare some shortcut variable
-        rval = scan_utils.reconstruct_graph(self.inputs,
+        self_inputs = self.inputs
-                                            self.outputs, '_rop')
+        rop_of_inputs = (self_inputs[:self.n_seqs + self.n_outs] +
-        self_inputs = rval[0]
+                         self_inputs[self.n_seqs + self.n_outs +
-        rop_of_inputs = rval[0][:self.n_seqs + self.n_outs] + \
+                                     self.n_shared_outs:])
-            rval[0][self.n_seqs + self.n_outs + self.n_shared_outs:]
+        self_outputs = self.outputs
-        self_outputs = rval[1]
        # Step 1. Compute the R_op of the inner function
        inner_eval_points = [scan_utils.safe_new(x, '_evalpoint')
                             for x in rop_of_inputs]

--- a/theano/scan_module/tests/test_scan.py
+++ b/theano/scan_module/tests/test_scan.py
@@ -4488,9 +4488,6 @@ class T_Scan(unittest.TestCase):
                                 n_steps=n,
                                 strict=True)
-        f_strict = theano.function([x0_], ret_strict[0][-1])
-        result_strict = f_strict(x0)
    def test_monitor_mode(self):
        # Test that it is possible to pass an instance of MonitorMode
        # to the inner function

--- a/theano/tests/test_printing.py
+++ b/theano/tests/test_printing.py
@@ -693,8 +693,8 @@ def test_scan_debugprint5():
    for{cpu,scan_fn} [id F] ''
    >Elemwise{mul,no_inplace} [id CR] ''
-    > |<TensorType(float64, vector)> [id CS] -> [id H]
+    > |<TensorType(float64, vector)> [id CP] -> [id H]
-    > |A_copy [id CT] -> [id P]
+    > |A_copy [id CL] -> [id P]
    for{cpu,scan_fn} [id F] ''
    >Elemwise{mul,no_inplace} [id CR] ''