Merge pull request #5420 from nouiz/L_op

Use Scan.L_op instead of Scan.grad() to help speed up the second deri…

Merge pull request #5420 from nouiz/L_op
e345e095 · Frédéric Bastien · GitHub · fadc8be4 · dee9ca28 · e345e095
--- a/theano/scan_module/scan.py
+++ b/theano/scan_module/scan.py
@@ -81,7 +81,8 @@ def scan(fn,
         name=None,
         profile=False,
         allow_gc=None,
-         strict=False):
+         strict=False,
+         return_list=False):
    """
    This function constructs and applies a Scan op to the provided
    arguments.
@@ -333,6 +334,9 @@ def scan(fn,
        If true, all the shared variables used in ``fn`` must be provided as a
        part of ``non_sequences`` or ``sequences``.

+    return_list
+        If True, will always return a list, even if there is only 1 output.
+
    Returns
    -------
    tuple
@@ -794,7 +798,8 @@ def scan(fn,
                return_steps.get(pos, 0) != 1):
                outputs[pos] = tensor.unbroadcast(
                    tensor.shape_padleft(inner_out), 0)
-        if len(outputs) == 1:
+
+        if return_list is not True and len(outputs) == 1:
            outputs = outputs[0]

        return (outputs, updates)
@@ -1134,8 +1139,9 @@ def scan(fn,
            # refers to update rule of index -1 - `pos`.
            update_map[sit_sot_shared[abs(pos) - 1]] = _scan_out_list[idx][-1]
    scan_out_list = [x for x in scan_out_list if x is not None]
-    if len(scan_out_list) == 1:
+    if return_list is not True and len(scan_out_list) == 1:
        scan_out_list = scan_out_list[0]
    elif len(scan_out_list) == 0:
        scan_out_list = None
+
    return (scan_out_list, update_map)
--- a/theano/scan_module/scan_op.py
+++ b/theano/scan_module/scan_op.py
@@ -1931,8 +1931,7 @@ class Scan(PureOp):
        return mappings

    # GRAD FUNCTION
-    def grad(self, inputs, dC_douts):
-        outs = self(*inputs)
+    def L_op(self, inputs, outs, dC_douts):
        if not isinstance(outs, (list, tuple)):
            outs = [outs]
        # `grad_step` equals the number of steps the original scan node has

--- a/theano/scan_module/tests/test_scan.py
+++ b/theano/scan_module/tests/test_scan.py
@@ -318,12 +318,14 @@ class T_Scan(unittest.TestCase):

        state = theano.tensor.scalar('state')
        n_steps = theano.tensor.iscalar('nsteps')
+        # Test return_list at the same time.
        output, updates = theano.scan(f_pow2,
                                      [],
                                      state,
                                      [],
                                      n_steps=n_steps,
                                      truncate_gradient=-1,
+                                      return_list=True,
                                      go_backwards=False)
        my_f = theano.function([state, n_steps],
                               output,
@@ -337,7 +339,7 @@ class T_Scan(unittest.TestCase):
        numpy_values = numpy.array([state * (2 ** (k + 1)) for k
                                    in xrange(steps)])
        theano_values = my_f(state, steps)
-        utt.assert_allclose(numpy_values, theano_values)
+        utt.assert_allclose(numpy_values, theano_values[0])

    def test_subtensor_multiple_slices(self):
        # This addresses a bug reported by Matthias Zoehrer
@@ -4416,16 +4418,17 @@ class T_Scan(unittest.TestCase):
                n_steps=1,
            )
            return sum_outer + result_inner[-1]
-
+        # Also test return_list for that case.
        result_outer, _ = theano.scan(
            fn=loss_outer,
            outputs_info=tensor.as_tensor_variable(
                numpy.asarray(0, dtype=numpy.float32)),
            non_sequences=[W],
            n_steps=n_steps,
+            return_list=True,
        )

-        cost = result_outer[-1]
+        cost = result_outer[0][-1]
        H = theano.gradient.hessian(cost, W)
        print(".", file=sys.stderr)
        f = theano.function([W, n_steps], H)

--- a/theano/tests/test_printing.py
+++ b/theano/tests/test_printing.py
@@ -643,73 +643,70 @@ def test_scan_debugprint5():
    | |Subtensor{::int64} [id BL] ''
    | | |IncSubtensor{Inc;int64::} [id BM] ''
    | | | |Elemwise{second,no_inplace} [id BN] ''
-    | | | | |for{cpu,scan_fn} [id BO] ''
-    | | | | | |k [id G]
-    | | | | | |IncSubtensor{Set;:int64:} [id H] ''
-    | | | | | |A [id P]
-    | | | | |InplaceDimShuffle{x,x} [id BP] ''
-    | | | |   |TensorConstant{0.0} [id BQ]
-    | | | |IncSubtensor{Inc;int64} [id BR] ''
-    | | | | |Elemwise{second,no_inplace} [id BS] ''
-    | | | | | |Subtensor{int64::} [id BT] ''
-    | | | | | | |for{cpu,scan_fn} [id BO] ''
-    | | | | | | |Constant{1} [id BU]
-    | | | | | |InplaceDimShuffle{x,x} [id BV] ''
-    | | | | |   |TensorConstant{0.0} [id BQ]
-    | | | | |Elemwise{second} [id BW] ''
-    | | | | | |Subtensor{int64} [id BX] ''
-    | | | | | | |Subtensor{int64::} [id BT] ''
-    | | | | | | |Constant{-1} [id BY]
-    | | | | | |InplaceDimShuffle{x} [id BZ] ''
-    | | | | |   |Elemwise{second,no_inplace} [id CA] ''
-    | | | | |     |Sum{acc_dtype=float64} [id CB] ''
-    | | | | |     | |Subtensor{int64} [id BX] ''
+    | | | | |for{cpu,scan_fn} [id F] ''
+    | | | | |InplaceDimShuffle{x,x} [id BO] ''
+    | | | |   |TensorConstant{0.0} [id BP]
+    | | | |IncSubtensor{Inc;int64} [id BQ] ''
+    | | | | |Elemwise{second,no_inplace} [id BR] ''
+    | | | | | |Subtensor{int64::} [id BS] ''
+    | | | | | | |for{cpu,scan_fn} [id F] ''
+    | | | | | | |Constant{1} [id BT]
+    | | | | | |InplaceDimShuffle{x,x} [id BU] ''
+    | | | | |   |TensorConstant{0.0} [id BP]
+    | | | | |Elemwise{second} [id BV] ''
+    | | | | | |Subtensor{int64} [id BW] ''
+    | | | | | | |Subtensor{int64::} [id BS] ''
+    | | | | | | |Constant{-1} [id BX]
+    | | | | | |InplaceDimShuffle{x} [id BY] ''
+    | | | | |   |Elemwise{second,no_inplace} [id BZ] ''
+    | | | | |     |Sum{acc_dtype=float64} [id CA] ''
+    | | | | |     | |Subtensor{int64} [id BW] ''
    | | | | |     |TensorConstant{1.0} [id R]
-    | | | | |Constant{-1} [id BY]
-    | | | |Constant{1} [id BU]
-    | | |Constant{-1} [id CC]
-    | |Alloc [id CD] ''
-    | | |TensorConstant{0.0} [id BQ]
-    | | |Elemwise{add,no_inplace} [id CE] ''
+    | | | | |Constant{-1} [id BX]
+    | | | |Constant{1} [id BT]
+    | | |Constant{-1} [id CB]
+    | |Alloc [id CC] ''
+    | | |TensorConstant{0.0} [id BP]
+    | | |Elemwise{add,no_inplace} [id CD] ''
    | | | |Elemwise{sub,no_inplace} [id C] ''
    | | | |TensorConstant{1} [id Y]
-    | | |Subtensor{int64} [id CF] ''
-    | |   |Shape [id CG] ''
+    | | |Subtensor{int64} [id CE] ''
+    | |   |Shape [id CF] ''
    | |   | |A [id P]
-    | |   |Constant{0} [id CH]
+    | |   |Constant{0} [id CG]
    | |A [id P]
-    |Constant{-1} [id CI]
+    |Constant{-1} [id CH]

    Inner graphs of the scan ops:

    for{cpu,grad_of_scan_fn}.1 [id B] ''
-    >Elemwise{add,no_inplace} [id CJ] ''
-    > |Elemwise{mul} [id CK] ''
-    > | |<TensorType(float64, vector)> [id CL] -> [id BL]
-    > | |A_copy [id CM] -> [id P]
-    > |<TensorType(float64, vector)> [id CN] -> [id BL]
-    >Elemwise{add,no_inplace} [id CO] ''
-    > |Elemwise{mul} [id CP] ''
-    > | |<TensorType(float64, vector)> [id CL] -> [id BL]
-    > | |<TensorType(float64, vector)> [id CQ] -> [id Z]
-    > |<TensorType(float64, vector)> [id CR] -> [id CD]
+    >Elemwise{add,no_inplace} [id CI] ''
+    > |Elemwise{mul} [id CJ] ''
+    > | |<TensorType(float64, vector)> [id CK] -> [id BL]
+    > | |A_copy [id CL] -> [id P]
+    > |<TensorType(float64, vector)> [id CM] -> [id BL]
+    >Elemwise{add,no_inplace} [id CN] ''
+    > |Elemwise{mul} [id CO] ''
+    > | |<TensorType(float64, vector)> [id CK] -> [id BL]
+    > | |<TensorType(float64, vector)> [id CP] -> [id Z]
+    > |<TensorType(float64, vector)> [id CQ] -> [id CC]

    for{cpu,scan_fn} [id F] ''
-    >Elemwise{mul,no_inplace} [id CS] ''
-    > |<TensorType(float64, vector)> [id CT] -> [id H]
-    > |A_copy [id CU] -> [id P]
+    >Elemwise{mul,no_inplace} [id CR] ''
+    > |<TensorType(float64, vector)> [id CS] -> [id H]
+    > |A_copy [id CT] -> [id P]

    for{cpu,scan_fn} [id F] ''
-    >Elemwise{mul,no_inplace} [id CS] ''
+    >Elemwise{mul,no_inplace} [id CR] ''

    for{cpu,scan_fn} [id F] ''
-    >Elemwise{mul,no_inplace} [id CS] ''
+    >Elemwise{mul,no_inplace} [id CR] ''

-    for{cpu,scan_fn} [id BO] ''
-    >Elemwise{mul,no_inplace} [id CS] ''
+    for{cpu,scan_fn} [id F] ''
+    >Elemwise{mul,no_inplace} [id CR] ''

-    for{cpu,scan_fn} [id BO] ''
-    >Elemwise{mul,no_inplace} [id CS] ''"""
+    for{cpu,scan_fn} [id F] ''
+    >Elemwise{mul,no_inplace} [id CR] ''"""

    for truth, out in zip(expected_output.split("\n"), lines):
        assert truth.strip() == out.strip()