Merge pull request #3367 from carriepl/scan_mitmot_prealloc

Scan mitmot prealloc

Merge pull request #3367 from carriepl/scan_mitmot_prealloc
da3c8070 · Frédéric Bastien · 5bab81bb · 31ad3e30 · da3c8070 · da3c8070
--- a/theano/compile/function.py
+++ b/theano/compile/function.py
@@ -268,7 +268,6 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None,
                        "input.")
    # compute some features of the arguments:
-    uses_In = any([isinstance(i, In) for i in inputs])
    uses_tuple = any([isinstance(i, (list, tuple)) for i in inputs])
    uses_updates = bool(updates)
    uses_givens = bool(givens)
@@ -280,7 +279,7 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None,
                                   (hasattr(i, 'mutable') and i.mutable))):
            check_for_aliased_inputs = True
-    if uses_In or uses_tuple:
+    if uses_tuple:
        # we must use old semantics in this case.
        if profile:
            raise NotImplementedError("profiling not supported in old-style "

--- a/theano/compile/function_module.py
+++ b/theano/compile/function_module.py
@@ -159,10 +159,22 @@ def std_fgraph(input_specs, output_specs, accept_inplace=False):
    """
    orig_inputs = [spec.variable for spec in input_specs]
-    updates = [spec.update for spec in input_specs if spec.update]
+    # Extract the updates and the mapping between update outputs and
+    # the updated inputs.
+    updates = []
+    update_mapping = {}
+    out_idx = len(output_specs)
+    for inp_idx in range(len(input_specs)):
+        if input_specs[inp_idx].update:
+            updates.append(input_specs[inp_idx].update)
+            update_mapping[out_idx] = inp_idx
+            out_idx += 1
    orig_outputs = [spec.variable for spec in output_specs] + updates
-    fgraph = gof.fg.FunctionGraph(orig_inputs, orig_outputs)
+    fgraph = gof.fg.FunctionGraph(orig_inputs, orig_outputs,
+                                  update_mapping=update_mapping)
    for node in fgraph.apply_nodes:
        if getattr(node.op, 'destroy_map', None):

--- a/theano/compile/io.py
+++ b/theano/compile/io.py
@@ -69,6 +69,13 @@ class SymbolicInput(object):
        if self.name is not None and not isinstance(self.name, string_types):
            raise TypeError("name must be a string! (got: %s)" % self.name)
        self.update = update
+        if update is not None:
+            if not variable.type == update.type:
+                raise TypeError("Variable '%s' has type %s but an update of "
+                                "type %s. The type of the update should be "
+                                "the same as the type of the variable" %
+                                (variable, variable.type, update.type))
        if (mutable is not None):
            self.mutable = mutable
        else:

--- a/theano/compile/mode.py
+++ b/theano/compile/mode.py
@@ -161,18 +161,17 @@ class AddDestroyHandler(gof.Optimizer):
        fgraph.attach_feature(gof.DestroyHandler())
-class AddNoOutputFromInplace(gof.Optimizer):
+class AddFeatureOptimizer(gof.Optimizer):
    """
-    This optimizer adds to the fgraph a feature that will prevent outputs
+    This optimizer adds a provided feature to the function graph.
-    of a fgraph to be created by performing inplace operations on intermediary
-    variables. This is useful when the outputs of the fgraph are preallocated
-    to prevent useless copying of the data. Currently, scan preallocates its
-    outputs
    """
+    def __init__(self, feature):
+        self.feature = feature
    def add_requirements(self, fgraph):
-        super(AddNoOutputFromInplace, self).add_requirements(fgraph)
+        super(AddFeatureOptimizer, self).add_requirements(fgraph)
-        fgraph.attach_feature(gof.NoOutputFromInplace())
+        fgraph.attach_feature(self.feature)
 class PrintCurrentFunctionGraph(gof.Optimizer):
@@ -229,9 +228,6 @@ optdb.register('specialize_device', gof.EquilibriumDB(),
 optdb.register('merge2', gof.MergeOptimizer(),
               49, 'fast_run', 'merge')
-optdb.register('add_no_output_from_inplace', AddNoOutputFromInplace(),
-               49.4)
 optdb.register('add_destroy_handler', AddDestroyHandler(),
               49.5, 'fast_run', 'inplace')
@@ -321,19 +317,44 @@ class Mode(object):
                                              self.provided_optimizer)
        # N.B. opt might be a Query instance, not sure what else it might be...
        #     string? Optimizer? OptDB? who knows???
-        return self.__class__(linker=link, optimizer=opt.including(*tags))
+        return self.clone(optimizer=opt.including(*tags))
+    def register(self, *optimizations):
+        """Adds new optimization instances to a mode.
+        This method adds new optimization instances to a compilation mode. It
+        works like the `including()` method but takes as inputs optimization
+        instances to add instead of tags.
+        Parameters
+        ----------
+        optimizations :
+            Every element of `optimizations` is a tuple containing an
+            optimization instance and a floating point value indicating the
+            position at which to insert the optimization in the mode.
+        Returns
+        -------
+        Mode
+            Copy of the current Mode which includes the provided
+            optimizations.
+        """
+        link, opt = self.get_linker_optimizer(self.provided_linker,
+                                              self.provided_optimizer)
+        return self.clone(optimizer=opt.register(*optimizations))
    def excluding(self, *tags):
        link, opt = self.get_linker_optimizer(self.provided_linker,
                                              self.provided_optimizer)
-        return self.__class__(linker=link, optimizer=opt.excluding(*tags))
+        return self.clone(optimizer=opt.excluding(*tags))
    def requiring(self, *tags):
        link, opt = self.get_linker_optimizer(self.provided_linker,
                                              self.provided_optimizer)
-        return self.__class__(linker=link, optimizer=opt.requiring(*tags))
+        return self.clone(optimizer=opt.requiring(*tags))
-    def clone(self, link_kwargs=None, **kwargs):
+    def clone(self, link_kwargs=None, optimizer="", **kwargs):
        """
        Create a new instance of this Mode.
@@ -342,10 +363,14 @@ class Mode(object):
        arguments.
        """
+        if link_kwargs is None:
+            link_kwargs = {}
        new_linker = self.linker.clone(**link_kwargs)
-        new_optimizer = self.provided_optimizer
+        if optimizer == "":
+            optimizer = self.provided_optimizer
        new_mode = type(self)(linker=new_linker,
-                              optimizer=new_optimizer)
+                              optimizer=optimizer)
        return new_mode

--- a/theano/compile/monitormode.py
+++ b/theano/compile/monitormode.py
@@ -74,25 +74,7 @@ class MonitorMode(Mode):
        if self.post_func is not None:
            self.post_func(i, node, fn)
-    def including(self, *tags):
+    def clone(self, link_kwargs=None, optimizer="", **kwargs):
-        ret = super(MonitorMode, self).including(*tags)
-        ret.pre_func = self.pre_func
-        ret.post_func = self.post_func
-        return ret
-    def excluding(self, *tags):
-        ret = super(MonitorMode, self).excluding(*tags)
-        ret.pre_func = self.pre_func
-        ret.post_func = self.post_func
-        return ret
-    def requiring(self, *tags):
-        ret = super(MonitorMode, self).requiring(*tags)
-        ret.pre_func = self.pre_func
-        ret.post_func = self.post_func
-        return ret
-    def clone(self, link_kwargs=None, **kwargs):
        """
        Create a new instance of this Mode.
@@ -100,10 +82,12 @@ class MonitorMode(Mode):
        ignored, because ProfileMode needs to use its own linker.
        """
+        if optimizer == "":
+            optimizer = self.provided_optimizer
        new_mode = type(self)(pre_func=self.pre_func,
                              post_func=self.post_func,
                              linker=None,
-                              optimizer=self.provided_optimizer)
+                              optimizer=optimizer)
        return new_mode

--- a/theano/compile/pfunc.py
+++ b/theano/compile/pfunc.py
@@ -478,7 +478,19 @@ def pfunc(params, outputs=None, mode=None, updates=None, givens=None,
                'theano.clone(f(x), replace={x: g(x)}))`.'
                % x)
-    output_vars = rebuild_collect_shared(outputs,
+    # Extend the outputs with the updates on input variables so they are also
+    # cloned
+    additional_outputs = [i.update for i in inputs if i.update]
+    if outputs is None:
+        out_list = []
+    else:
+        if isinstance(outputs, (list, tuple)):
+            out_list = list(outputs)
+        else:
+            out_list = [outputs]
+    extended_outputs = out_list + additional_outputs
+    output_vars = rebuild_collect_shared(extended_outputs,
                                         in_variables,
                                         replace=givens,
                                         updates=updates,
@@ -486,12 +498,25 @@ def pfunc(params, outputs=None, mode=None, updates=None, givens=None,
                                         copy_inputs_over=True,
                                         no_default_updates=no_default_updates)
    # extracting the arguments
-    input_variables, cloned_outputs, other_stuff = output_vars
+    input_variables, cloned_extended_outputs, other_stuff = output_vars
    clone_d, update_d, update_expr, shared_inputs = other_stuff
+    # Recover only the clones of the original outputs
+    if outputs is None:
+        cloned_outputs = []
+    else:
+        if isinstance(outputs, (list, tuple)):
+            cloned_outputs = cloned_extended_outputs[:len(outputs)]
+        else:
+            cloned_outputs = cloned_extended_outputs[0]
    for i, iv in zip(inputs, input_variables):
        i.variable = iv
+        # If needed, replace the input's update by its cloned equivalent
+        if i.update:
+            i.update = clone_d[i.update]
    for sv in shared_inputs:
        # pass value of None
        # value will be stored in the resulting functions' defaults
@@ -526,6 +551,8 @@ def _pfunc_param_to_in(param, strict=False, allow_downcast=None):
            borrow=param.borrow,
            allow_downcast=param.allow_downcast,
            implicit=param.implicit)
+    elif isinstance(param, In):
+        return param
    raise TypeError('Unknown parameter type: %s' % type(param))

--- a/theano/compile/tests/test_function.py
+++ b/theano/compile/tests/test_function.py
@@ -2,10 +2,12 @@ import six.moves.cPickle as pickle
 import os
 import shutil
 import tempfile
+import unittest
 import numpy
 import theano
+from theano.compile.io import In
 def test_function_dump():
@@ -26,3 +28,167 @@ def test_function_dump():
    fct2 = theano.function(**l)
    x = [1, 2, 3]
    assert numpy.allclose(fct1(x), fct2(x))
+class TestFunctionIn(unittest.TestCase):
+    def test_in_strict(self):
+        a = theano.tensor.dvector()
+        b = theano.shared(7)
+        out = a + b
+        f = theano.function([In(a, strict=False)], out)
+        # works, rand generates float64 by default
+        f(numpy.random.rand(8))
+        # works, casting is allowed
+        f(numpy.array([1, 2, 3, 4], dtype='int32'))
+        f = theano.function([In(a, strict=True)], out)
+        try:
+            # fails, f expects float64
+            f(numpy.array([1, 2, 3, 4], dtype='int32'))
+        except TypeError:
+            pass
+    def test_explicit_shared_input(self):
+        # This is not a test of the In class per se, but the In class relies
+        # on the fact that shared variables cannot be explicit inputs
+        a = theano.shared(1.0)
+        self.assertRaises(TypeError, theano.function, [a], a + 1)
+    def test_in_shared_variable(self):
+        # Ensure that an error is raised if the In wrapped is used to wrap
+        # a shared variable
+        a = theano.shared(1.0)
+        a_wrapped = In(a, update=a + 1)
+        self.assertRaises(TypeError, theano.function, [a_wrapped])
+    def test_in_mutable(self):
+        a = theano.tensor.dvector()
+        a_out = a * 2  # assuming the op which makes this "in place" triggers
+        # using mutable=True will let f change the value in aval
+        f = theano.function([In(a, mutable=True)], a_out, mode='FAST_RUN')
+        aval = numpy.random.rand(10)
+        aval2 = aval.copy()
+        assert numpy.all(f(aval) == (aval2 * 2))
+        assert not numpy.all(aval == aval2)
+        # using mutable=False should leave the input untouched
+        f = theano.function([In(a, mutable=False)], a_out, mode='FAST_RUN')
+        aval = numpy.random.rand(10)
+        aval2 = aval.copy()
+        assert numpy.all(f(aval) == (aval2 * 2))
+        assert numpy.all(aval == aval2)
+    def test_in_update(self):
+        a = theano.tensor.dscalar('a')
+        f = theano.function([In(a, value=0.0, update=a + 1)], a,
+                            mode='FAST_RUN')
+        # Ensure that, through the executions of the function, the state of the
+        # input is persistent and is updated as it should
+        assert f() == 0.0
+        assert f() == 1.0
+        assert f() == 2.0
+    def test_in_update_wrong_dtype(self):
+        # Ensure that an error is raised if an In-wrapped variables has
+        # an update of a different type
+        a = theano.tensor.dscalar('a')
+        b = theano.tensor.dvector('b')
+        self.assertRaises(TypeError, In, a, update=b)
+    def test_in_update_shared(self):
+        # Test that using both In() with updates and shared variables with
+        # updates in the same function behaves as expected
+        shared_var = theano.shared(1.0)
+        a = theano.tensor.dscalar('a')
+        a_wrapped = In(a, value=0.0, update=shared_var)
+        f = theano.function([a_wrapped], [], updates={shared_var: a},
+                            mode='FAST_RUN')
+        # Ensure that, through the executions of the function, the state of
+        # the input and the shared variable are appropriate (after N execution,
+        # the values have swapped N times). This allows testing that the
+        # changes occur at the same time and one doesn't overwrite the other.
+        for i in range(5):
+            f()
+            assert numpy.allclose(shared_var.get_value(), i % 2)
+    def test_in_allow_downcast_int(self):
+        a = theano.tensor.wvector('a')  # int16
+        b = theano.tensor.bvector('b')  # int8
+        c = theano.tensor.bscalar('c')  # int8
+        f = theano.function([In(a, allow_downcast=True),
+                             In(b, allow_downcast=False),
+                             In(c, allow_downcast=None)],
+                            (a + b + c))
+        # Both values are in range. Since they're not ndarrays (but lists),
+        # they will be converted, and their value checked.
+        assert numpy.all(f([3], [6], 1) == 10)
+        # Values are in range, but a dtype too large has explicitly been given
+        # For performance reasons, no check of the data is explicitly performed
+        # (It might be OK to change this in the future.)
+        self.assertRaises(TypeError, f, [3], numpy.array([6], dtype='int16'),
+                          1)
+        # Value too big for a, silently ignored
+        assert numpy.all(f([2 ** 20], numpy.ones(1, dtype='int8'), 1) == 2)
+        # Value too big for b, raises TypeError
+        self.assertRaises(TypeError, f, [3], [312], 1)
+        # Value too big for c, raises TypeError
+        self.assertRaises(TypeError, f, [3], [6], 806)
+    def test_in_allow_downcast_floatX(self):
+        a = theano.tensor.fscalar('a')
+        b = theano.tensor.fscalar('b')
+        c = theano.tensor.fscalar('c')
+        f = theano.function([In(a, allow_downcast=True),
+                             In(b, allow_downcast=False),
+                             In(c, allow_downcast=None)],
+                            (a + b + c))
+        # If the values can be accurately represented, everything is OK
+        assert numpy.all(f(0, 0, 0) == 0)
+        # If allow_downcast is True, idem
+        assert numpy.allclose(f(0.1, 0, 0), 0.1)
+        # If allow_downcast is False, nope
+        self.assertRaises(TypeError, f, 0, 0.1, 0)
+        # If allow_downcast is None, it should work iff floatX=float32
+        if theano.config.floatX == 'float32':
+            assert numpy.allclose(f(0, 0, 0.1), 0.1)
+        else:
+            self.assertRaises(TypeError, f, 0, 0, 0.1)
+    def test_in_allow_downcast_vector_floatX(self):
+        a = theano.tensor.fvector('a')
+        b = theano.tensor.fvector('b')
+        c = theano.tensor.fvector('c')
+        f = theano.function([In(a, allow_downcast=True),
+                             In(b, allow_downcast=False),
+                             In(c, allow_downcast=None)],
+                            (a + b + c))
+        # If the values can be accurately represented, everything is OK
+        z = [0]
+        assert numpy.all(f(z, z, z) == 0)
+        # If allow_downcast is True, idem
+        assert numpy.allclose(f([0.1], z, z), 0.1)
+        # If allow_downcast is False, nope
+        self.assertRaises(TypeError, f, z, [0.1], z)
+        # If allow_downcast is None, like False
+        self.assertRaises(TypeError, f, z, z, [0.1])
--- a/theano/compile/tests/test_mode.py
+++ b/theano/compile/tests/test_mode.py
 import theano
-from theano.compile.mode import Mode
+from theano.compile.mode import Mode, AddFeatureOptimizer
+from theano.gof.toolbox import NoOutputFromInplace
 import theano.tensor as T
@@ -18,8 +19,8 @@ def test_no_output_from_implace():
    # Ensure that the elemwise op that produces the output is not inplace when
    # using a mode that includes the optimization
-    mode_opt = Mode(linker="cvm", optimizer="fast_run")
+    opt = AddFeatureOptimizer(NoOutputFromInplace())
-    mode_opt = mode_opt.including("add_no_output_from_inplace")
+    mode_opt = Mode(linker="cvm", optimizer="fast_run").register((opt, 49.9))
    fct_opt = theano.function([x, y], b, mode=mode_opt)
    op = fct_opt.maker.fgraph.outputs[0].owner.op

--- a/theano/gof/fg.py
+++ b/theano/gof/fg.py
@@ -109,7 +109,25 @@ class FunctionGraph(utils.object2):
    """
-    def __init__(self, inputs, outputs, features=None, clone=True):
+    def __init__(self, inputs, outputs, features=None, clone=True,
+                 update_mapping=None):
+        """
+        Create an FunctionGraph which operates on the subgraph bound by the
+        inputs and outputs sets.
+        Parameters
+        ----------
+        inputs : list of variables
+            Inputs nodes of the graph, usually declared by the user
+        outputs : list of variables
+            Outputs nodes of the graph.
+        clone : boolean
+            If true, we will clone the graph. This is useful to remove the
+            constant cache problem.
+        update_mapping : dictionnary
+            Mapping between the inputs with updates and the outputs
+            corresponding to their updates.
+        """
        if clone:
            inputs, outputs = graph.clone(inputs, outputs)
@@ -157,6 +175,7 @@ class FunctionGraph(utils.object2):
        self.node_locks = {}
        self.variable_locks = {}
        self.profile = None
+        self.update_mapping = update_mapping
    def add_input(self, input):
        if input not in self.inputs:

--- a/theano/gof/optdb.py
+++ b/theano/gof/optdb.py
 from __future__ import print_function
+import copy
 import sys
 import numpy
@@ -117,8 +118,12 @@ multiple time in a DB. Tryed to register "%s" again under the new name "%s".
        add = OrderedSet()
        for obj in variables:
            if isinstance(obj, DB):
-                sq = q.subquery.get(obj.name, q)
+                def_sub_query = q
-                if sq:
+                if q.extra_optimizations:
+                    def_sub_query = copy.copy(q)
+                    def_sub_query.extra_optimizations = []
+                sq = q.subquery.get(obj.name, def_sub_query)
                replacement = obj.query(sq)
                replacement.name = obj.name
                remove.add(obj)
@@ -173,12 +178,16 @@ class Query(object):
    """
    def __init__(self, include, require=None, exclude=None,
-                 subquery=None, position_cutoff=None):
+                 subquery=None, position_cutoff=None,
+                 extra_optimizations=None):
        self.include = OrderedSet(include)
        self.require = require or OrderedSet()
        self.exclude = exclude or OrderedSet()
        self.subquery = subquery or {}
        self.position_cutoff = position_cutoff
+        if extra_optimizations is None:
+            extra_optimizations = []
+        self.extra_optimizations = extra_optimizations
        if isinstance(self.require, (list, tuple)):
            self.require = OrderedSet(self.require)
        if isinstance(self.exclude, (list, tuple)):
@@ -186,9 +195,14 @@ class Query(object):
    def __str__(self):
        return ("Query{inc=%s,ex=%s,require=%s,subquery=%s,"
-                "position_cutoff=%d}" %
+                "position_cutoff=%d,extra_opts=%s}" %
                (self.include, self.exclude, self.require, self.subquery,
-                 self.position_cutoff))
+                 self.position_cutoff, self.extra_optimizations))
+    def __setstate__(self, state):
+        self.__dict__.update(state)
+        if not hasattr(self, 'extra_optimizations'):
+            self.extra_optimizations = []
    # add all opt with this tag
    def including(self, *tags):
@@ -196,7 +210,8 @@ class Query(object):
                     self.require,
                     self.exclude,
                     self.subquery,
-                     self.position_cutoff)
+                     self.position_cutoff,
+                     self.extra_optimizations)
    # remove all opt with this tag
    def excluding(self, *tags):
@@ -204,7 +219,8 @@ class Query(object):
                     self.require,
                     self.exclude.union(tags),
                     self.subquery,
-                     self.position_cutoff)
+                     self.position_cutoff,
+                     self.extra_optimizations)
    # keep only opt with this tag.
    def requiring(self, *tags):
@@ -212,7 +228,16 @@ class Query(object):
                     self.require.union(tags),
                     self.exclude,
                     self.subquery,
-                     self.position_cutoff)
+                     self.position_cutoff,
+                     self.extra_optimizations)
+    def register(self, *optimizations):
+        return Query(self.include,
+                     self.require,
+                     self.exclude,
+                     self.subquery,
+                     self.position_cutoff,
+                     self.extra_optimizations + list(optimizations))
 class EquilibriumDB(DB):
@@ -242,8 +267,6 @@ class EquilibriumDB(DB):
        self.__final__ = {}
    def register(self, name, obj, *tags, **kwtags):
-        # if name == 'cut_gpua_constant_transfers':
-        #     import ipdb;ipdb.set_trace()
        if 'final_opt' in kwtags:
            final_opt = kwtags['final_opt']
            kwtags.pop('final_opt', None)
@@ -306,19 +329,33 @@ class SequenceDB(DB):
        position_cutoff = kwtags.pop('position_cutoff',
                                     config.optdb.position_cutoff)
+        position_dict = self.__position__
        if len(tags) >= 1 and isinstance(tags[0], Query):
            # the call to super should have raise an error with a good message
            assert len(tags) == 1
            if getattr(tags[0], 'position_cutoff', None):
                position_cutoff = tags[0].position_cutoff
-        opts = [o for o in opts if self.__position__[o.name] < position_cutoff]
+            # The Query instance might contain extra optimizations which need
-        # We want to sort by position and then if collision by name
+            # to be added the the sequence of optimizations (don't alter the
-        # for deterministic optimization.  Since Python 2.2, sort is
+            # original dictionary)
-        # stable, so sort by name first, then by position. This give
+            if len(tags[0].extra_optimizations) > 0:
-        # the order we want.
+                position_dict = position_dict.copy()
-        opts.sort(key=lambda obj: obj.name)
+                for extra_opt in tags[0].extra_optimizations:
-        opts.sort(key=lambda obj: self.__position__[obj.name])
+                    # Give a name to the extra optimization (include both the
+                    # class name for descriptiveness and id to avoid name
+                    # collisions)
+                    opt, position = extra_opt
+                    opt.name = "%s_%i" % (opt.__class__, id(opt))
+                    # Add the extra optimization to the optimization sequence
+                    if position < position_cutoff:
+                        opts.add(opt)
+                        position_dict[opt.name] = position
+        opts = [o for o in opts if position_dict[o.name] < position_cutoff]
+        opts.sort(key=lambda obj: (position_dict[obj.name], obj.name))
        kwargs = {}
        if self.failure_callback:
            kwargs["failure_callback"] = self.failure_callback

--- a/theano/gof/toolbox.py
+++ b/theano/gof/toolbox.py
@@ -440,10 +440,18 @@ class PreserveNames(Feature):
 class NoOutputFromInplace(Feature):
+    def __init__(self, first_output_idx=0, last_output_idx=None):
+        self.first_idx = first_output_idx
+        self.last_idx = last_output_idx
    def validate(self, fgraph):
        if not hasattr(fgraph, 'destroyers'):
            return True
-        for out in list(fgraph.outputs):
+        outputs_to_validate = list(fgraph.outputs)[self.first_idx:
+                                                   self.last_idx]
+        for out in outputs_to_validate:
            if out.owner is None:
                continue

--- a/theano/scan_module/numpy_api_changes.diff
+++ b/theano/scan_module/numpy_api_changes.diff
-@@ -5808,7 +5808,7 @@
+@@ -6667,7 +6667,7 @@
  *             cdef list stack
  *             cdef int offset
  */
-  __pyx_t_4 = ((PyObject *)__pyx_v_self->descr);
+-  __pyx_t_3 = ((PyObject *)__pyx_v_self->descr);
-+  __pyx_t_4 = ((PyObject *)PyArray_DESCR(__pyx_v_self));
+  __pyx_t_3 = ((PyObject *)PyArray_DESCR(__pyx_v_self));
-   __Pyx_INCREF(__pyx_t_4);
+   __Pyx_INCREF(__pyx_t_3);
-   __pyx_v_descr = ((PyArray_Descr *)__pyx_t_4);
+   __pyx_v_descr = ((PyArray_Descr *)__pyx_t_3);
-   __pyx_t_4 = 0;
+   __pyx_t_3 = 0;
-@@ -7337,7 +7337,7 @@
+@@ -8237,7 +8237,7 @@
  *      arr.base = baseptr
  *
  */
@@ -16,7 +16,7 @@
   /* "numpy.pxd":973
  *          baseptr = <PyObject*>base
-@@ -7346,7 +7346,11 @@
+@@ -8246,7 +8246,11 @@
  *
  * cdef inline object get_array_base(ndarray arr):
  */
@@ -29,7 +29,7 @@
   __Pyx_RefNannyFinishContext();
 }
-@@ -7376,7 +7376,7 @@
+@@ -8285,7 +8285,7 @@
  *         return None
  *     else:
  */
@@ -38,7 +38,7 @@
   if (__pyx_t_1) {
     /* "numpy.pxd":977
-@@ -7400,8 +7404,8 @@
+@@ -8307,8 +8311,8 @@
  *         return <object>arr.base             # <<<<<<<<<<<<<<
  */
     __Pyx_XDECREF(__pyx_r);

--- a/theano/scan_module/scan_op.py
+++ b/theano/scan_module/scan_op.py
--- a/theano/scan_module/scan_perform.c
+++ b/theano/scan_module/scan_perform.c
--- a/theano/scan_module/scan_perform.pyx
+++ b/theano/scan_module/scan_perform.pyx
--- a/theano/scan_module/scan_perform_ext.py
+++ b/theano/scan_module/scan_perform_ext.py
--- a/theano/scan_module/tests/test_scan.py
+++ b/theano/scan_module/tests/test_scan.py
--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py