Merge pull request #1470 from nouiz/split_file

Split file

Merge pull request #1470 from nouiz/split_file
fc66c3fd · lamblin · faf7fba3 · c4185b6b · fc66c3fd · fc66c3fd
--- a/theano/compile/function_module.py
+++ b/theano/compile/function_module.py
@@ -727,12 +727,19 @@ copy_reg.pickle(Function, _pickle_Function)
 ###

 class SanityCheckFunction(Function):
+    """Deprecated. It is not used and not tested anywhere in Theano!
+
+    Also, we should remove the check_equal and related function in
+    this file, and use Type.values_equals() instead.
+
+    """

    def __init__(self, others, check_equal, *args, **kwargs):
        super(SanityCheckFunction, self).__init__(*args, **kwargs)
        self.others = others
        self.check_equal = check_equal
        # DEPRECATED?  Is this just for DualLinker?
+        warnings.warn("SanityCheckFunction is deprecated")

    def __setitem__(self, item, value):
        super(SanityCheckFunction, self).__setitem__(item, value)

--- a/theano/gof/__init__.py
+++ b/theano/gof/__init__.py
@@ -78,4 +78,4 @@ from theano.gof.type import \
    Type, Generic, generic

 from theano.gof.utils import \
-    object2, MethodNotDefined
+    hashtype, object2, MethodNotDefined
--- a/theano/gof/utils.py
+++ b/theano/gof/utils.py
@@ -22,6 +22,11 @@ def hashgen():
 hashgen.next = 0


+def hashtype(self):
+    t = type(self)
+    return hash(t.__name__) ^ hash(t.__module__)
+
+
 class MethodNotDefined(Exception):
    """
    To be raised by functions defined as part of an interface.

--- a/theano/ifelse.py
+++ b/theano/ifelse.py
@@ -437,8 +437,8 @@ acceptable_ops = (theano.tensor.basic.Dot,
                  theano.tensor.basic.Shape,
                  theano.tensor.basic.SpecifyShape,
                  theano.tensor.basic.MaxAndArgmax,
-                  theano.tensor.basic.Subtensor,
-                  theano.tensor.basic.IncSubtensor,
+                  theano.tensor.Subtensor,
+                  theano.tensor.IncSubtensor,
                  theano.tensor.basic.Rebroadcast,
                  theano.tensor.basic.Alloc,
                  theano.tensor.elemwise.Elemwise,

--- a/theano/sandbox/cuda/cuda_ndarray.cu
+++ b/theano/sandbox/cuda/cuda_ndarray.cu
@@ -798,7 +798,7 @@ __global__ void k_take_3(const int d0, const int d1, const int d2,
 // This prevent us from setting it to 0 before each use
 static int* err_var = NULL;

-// We try to be similat to the PyArray_TakeFrom function
+// We try to be similar to the PyArray_TakeFrom function
 //http://docs.scipy.org/doc/numpy/reference/c-api.array.html
 //TODO: support other clip mode then raise(clip, wrap)
 //self is the input that we copy data from.

--- a/theano/sandbox/cuda/tests/test_basic_ops.py
+++ b/theano/sandbox/cuda/tests/test_basic_ops.py
@@ -912,8 +912,9 @@ class T_Join_and_Split(theano.tensor.tests.test_basic.T_Join_and_Split):
        self.shared = cuda.shared_constructor


+import theano.tensor.tests.test_subtensor
 # This is to don't duplicate test.
-class T_subtensor(theano.tensor.tests.test_basic.T_subtensor):
+class T_subtensor(theano.tensor.tests.test_subtensor.T_subtensor):

    # This prevents nose from printing method docstrings instead of method
    # names
@@ -933,7 +934,7 @@ class T_subtensor(theano.tensor.tests.test_basic.T_subtensor):
           cuda.GpuAdvancedSubtensor1, cuda.GpuAdvancedIncSubtensor1)

    def __init__(self, name):
-        return super(theano.tensor.tests.test_basic.T_subtensor,
+        return super(theano.tensor.tests.test_subtensor.T_subtensor,
                     self).__init__(name)

    def test_adv_sub1_fast(self):

--- a/theano/sandbox/linalg/ops.py
+++ b/theano/sandbox/linalg/ops.py
@@ -831,6 +831,8 @@ det = Det()
 def trace(X):
    """
    Returns the sum of diagonal elements of matrix X.
+
+    :note: work on GPU since 0.6rc4.
    """
    return extract_diag(X).sum()


--- a/theano/scan_module/scan_opt.py
+++ b/theano/scan_module/scan_opt.py
@@ -691,7 +691,7 @@ class ScanSaveMem(gof.Optimizer):
                    break
                # 2.2 non-subtensor nodes
                #=> output needs all its intermediate values
-                elif not isinstance(cl.op, tensor.basic.Subtensor):
+                elif not isinstance(cl.op, tensor.Subtensor):
                    global_nsteps = None
                    slices[i] = None
                    break
@@ -699,7 +699,7 @@ class ScanSaveMem(gof.Optimizer):
                #=> output might need to store just a subset of its values
                else:
                    # 2.3.1 extract idx list of subtensor
-                    this_slice = tensor.basic.get_idx_list(cl.inputs,
+                    this_slice = tensor.get_idx_list(cl.inputs,
                                                     cl.op.idx_list)
                    if this_slice is None:
                        # if unable to extract idx_list
@@ -719,7 +719,7 @@ class ScanSaveMem(gof.Optimizer):
                            length = shape_of[out][0]
                        except KeyError:
                            length = out.shape[0]
-                    cf_slice = tensor.basic.get_canonical_form_slice(
+                    cf_slice = tensor.get_canonical_form_slice(
                                                    this_slice[0], length)
                    slices[i] += [(cf_slice, this_slice)]

@@ -795,12 +795,12 @@ class ScanSaveMem(gof.Optimizer):
                if type(cl) == str:
                    store_steps[i] = 0
                    break
-                elif not isinstance(cl.op, tensor.basic.Subtensor):
+                elif not isinstance(cl.op, tensor.Subtensor):
                    store_steps[i] = 0
                    break
                else:
-                    this_slice = tensor.basic.get_idx_list(cl.inputs,
-                                                         cl.op.idx_list)
+                    this_slice = tensor.get_idx_list(cl.inputs,
+                                                     cl.op.idx_list)
                    if this_slice is None:
                        store_steps[i] = 0
                        break
@@ -817,8 +817,8 @@ class ScanSaveMem(gof.Optimizer):
                            length = shape_of[out][0]
                        except KeyError:
                            length = out.shape[0]
-                    cf_slice = tensor.basic.get_canonical_form_slice(
-                                                    this_slice[0], length)
+                    cf_slice = tensor.get_canonical_form_slice(
+                        this_slice[0], length)

                    if isinstance(cf_slice[0], slice):
                        start = tensor.basic.extract_constant(
@@ -973,9 +973,9 @@ class ScanSaveMem(gof.Optimizer):
                        nw_slice = (fslice,) + tuple(old_slices[1:])
                        nw_pos = inv_compress_map[idx]

-                        subtens = tensor.basic.Subtensor(nw_slice)
+                        subtens = tensor.Subtensor(nw_slice)
                        # slice inputs
-                        sl_ins = tensor.basic.Subtensor.collapse(
+                        sl_ins = tensor.Subtensor.collapse(
                            nw_slice,
                            lambda entry: isinstance(entry,
                                                    tensor.Variable))
@@ -1014,8 +1014,8 @@ class ScanSaveMem(gof.Optimizer):
                            nw_slice = (sanitize(position),) + \
                                    tuple(old_slices[1:])

-                        subtens = tensor.basic.Subtensor(nw_slice)
-                        sl_ins = tensor.basic.Subtensor.collapse(
+                        subtens = tensor.Subtensor(nw_slice)
+                        sl_ins = tensor.Subtensor.collapse(
                            nw_slice,
                            lambda entry: isinstance(entry,
                                                     tensor.Variable))

--- a/theano/tensor/__init__.py
+++ b/theano/tensor/__init__.py
@@ -4,6 +4,8 @@ __docformat__ = "restructuredtext en"
 import warnings

 from theano.tensor.basic import *
+from theano.tensor.subtensor import *
+from theano.tensor.type_other import *

 from theano.tensor import opt
 from theano.tensor import opt_uncanonicalize

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
--- a/theano/tensor/nnet/nnet.py
+++ b/theano/tensor/nnet/nnet.py
@@ -8,6 +8,7 @@ import numpy
 import theano
 from theano import gof
 from theano.tensor import basic as tensor
+from theano.tensor import subtensor
 from theano.tensor import elemwise, dmatrix, fmatrix, dvector, fvector
 from theano.tensor import opt
 from theano.compile import optdb
@@ -1004,7 +1005,7 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
        # typically we should not need the gradient w.r.t. dy).
        y_idx_range = tensor.arange(y_idx.shape[0])
        g_dy = tensor.sum(
-                g_dx * tensor.AdvancedIncSubtensor()(
+                g_dx * subtensor.AdvancedIncSubtensor()(
                    sm, tensor.fill(dy, -1), y_idx_range, y_idx),
                axis=1)
        g_sm = dy.dimshuffle(0, 'x') * g_dx
@@ -1396,7 +1397,7 @@ def _check_rows_is_arange_len_labels(rows, labels):

        # Not sure if that case happens any more after the introduction of
        # ShapeOptimizer, but we keep it if ShapeOptimizer is not present
-        if isinstance(stop.owner.op, tensor.Subtensor):
+        if isinstance(stop.owner.op, subtensor.Subtensor):
            shape_subtensor = stop.owner
            if list(shape_subtensor.op.idx_list) == [0]:
                shape_var, = shape_subtensor.inputs
@@ -1424,7 +1425,7 @@ def local_advanced_indexing_crossentropy_onehot(node):
    log = None
    sm = None
    # First case: log(softmax(x))[rows, labels]
-    if isinstance(node.op, tensor.AdvancedSubtensor):
+    if isinstance(node.op, subtensor.AdvancedSubtensor):
        try:
            log, rows, labels = node.inputs
        except Exception:
@@ -1435,7 +1436,7 @@ def local_advanced_indexing_crossentropy_onehot(node):
    # Second case: log(softmax(x)[rows, labels])
    if node.op == tensor.log:
        pre_log = node.inputs[0].owner
-        if pre_log and isinstance(pre_log.op, tensor.AdvancedSubtensor):
+        if pre_log and isinstance(pre_log.op, subtensor.AdvancedSubtensor):
            try:
                sm, rows, labels = pre_log.inputs
            except Exception:
@@ -1524,7 +1525,7 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
    # After the check for AdvancedIncSubtensor, if anything does not fit with
    # the formula above, there's no way to fit it with the the second case,
    # so we return immediately.
-    if d_sm.owner and isinstance(d_sm.owner.op, tensor.AdvancedIncSubtensor):
+    if d_sm.owner and isinstance(d_sm.owner.op, subtensor.AdvancedIncSubtensor):
        try:
            z, incr, rows, labels = d_sm.owner.inputs
        except Exception:
@@ -1566,7 +1567,7 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
            if not denom.owner:
                return

-            if isinstance(denom.owner.op, tensor.AdvancedSubtensor):
+            if isinstance(denom.owner.op, subtensor.AdvancedSubtensor):
                # Base case
                adv_subtensor = denom
                #out_grad /= 1.
@@ -1575,7 +1576,7 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
                # and the output gradient
                for i, input in enumerate(denom.owner.inputs):
                    if input.owner and isinstance(input.owner.op,
-                                                  tensor.AdvancedSubtensor):
+                                                  subtensor.AdvancedSubtensor):
                        other_inputs = [in_ for (j,
                             in_) in enumerate(denom.owner.inputs) if j != i]
                        if len(other_inputs) == 1:
@@ -1630,7 +1631,7 @@ def local_advanced_indexing_crossentropy_onehot_grad(node):
            return

        # Check the numerator (AdvancedIncSubtensor)
-        if num.owner and isinstance(num.owner.op, tensor.AdvancedIncSubtensor):
+        if num.owner and isinstance(num.owner.op, subtensor.AdvancedIncSubtensor):
            try:
                z, incr, rows, labels = num.owner.inputs
            except Exception:

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -24,6 +24,8 @@ from theano.gof.python25 import maxsize
 from theano.gof.utils import MethodNotDefined
 from theano.configparser import config
 from theano.tensor.elemwise import Elemwise, DimShuffle
+from theano.tensor.subtensor import (get_idx_list, get_canonical_form_slice,
+                                     Subtensor, IncSubtensor, AdvancedIncSubtensor1)
 from theano import scalar
 from theano.tensor import basic as T
 from theano import compile  # to register the optimizer built by this file
@@ -1217,13 +1219,13 @@ def local_track_shape_i(node):

 @register_specialize
 @register_canonicalize
-@gof.local_optimizer([T.Subtensor])
+@gof.local_optimizer([Subtensor])
 def local_subtensor_make_vector(node):
    # replace all subtensor(make_vector) like:
    # [a,b,c][0] -> a
    # [a,b,c][0:2] -> [a,b]
    # we can do this for constant indexes
-    if isinstance(node.op, T.Subtensor):
+    if isinstance(node.op, Subtensor):
        # This optimization needs ShapeOpt and fgraph.shape_feature
        x = node.inputs[0]
        if x.owner and x.owner.op == make_vector:
@@ -1591,12 +1593,12 @@ def local_upcast_elemwise_constant_inputs(node):

 @register_canonicalize
 @register_specialize
-@gof.local_optimizer([T.Subtensor])
+@gof.local_optimizer([Subtensor])
 def local_useless_subtensor(node):
    """
    Remove Subtensor if it takes the full input
    """
-    if isinstance(node.op, T.Subtensor):
+    if isinstance(node.op, Subtensor):
        # This optimization needs ShapeOpt and fgraph.shape_feature
        if not hasattr(node.fgraph, 'shape_feature'):
            return
@@ -1677,7 +1679,7 @@ def local_subtensor_lift(node):
      when x,... are broadcasted scalar or not broadcasted at all
    rebroadcast(x)[idx] => rebroadcast(x[idx])
    """
-    if isinstance(node.op, T.Subtensor):
+    if isinstance(node.op, Subtensor):
        u = node.inputs[0]
        if not u.owner or len(u.clients) > 1:
            return False
@@ -1736,7 +1738,7 @@ def local_subtensor_lift(node):
                new_axis += [(j, u.broadcastable[i])]
                j += 1

-            subt_x = T.Subtensor(node.op.idx_list)(u.owner.inputs[0])
+            subt_x = Subtensor(node.op.idx_list)(u.owner.inputs[0])
            rbcast_subt_x = T.Rebroadcast(*new_axis)(subt_x)

            return [rbcast_subt_x]
@@ -1764,8 +1766,8 @@ def merge_two_slices(slice1, len1, slice2, len2):
    if type(slice1) is not slice:
        raise ValueError(('First provided slice should actually be of type'
                         'slice and not an index !'), slice1)
-    sl1, reverse1 = T.get_canonical_form_slice(slice1, len1)
-    sl2, reverse2 = T.get_canonical_form_slice(slice2, len2)
+    sl1, reverse1 = get_canonical_form_slice(slice1, len1)
+    sl2, reverse2 = get_canonical_form_slice(slice2, len2)

    if type(sl2) is not slice:
        if reverse1 is None:
@@ -1885,15 +1887,15 @@ def local_subtensor_merge(node):
    expresses all slices in a canonical form, and then merges them together.
    """

-    if isinstance(node.op, T.Subtensor):
+    if isinstance(node.op, Subtensor):
        u = node.inputs[0]
-        if u.owner and isinstance(u.owner.op, T.Subtensor):
+        if u.owner and isinstance(u.owner.op, Subtensor):
            # We can merge :)
            # x actual tensor on which we are picking slices
            x = u.owner.inputs[0]
            # slices of the first applied subtensor
-            slices1 = T.get_idx_list(u.owner.inputs, u.owner.op.idx_list)
-            slices2 = T.get_idx_list(node.inputs, node.op.idx_list)
+            slices1 = get_idx_list(u.owner.inputs, u.owner.op.idx_list)
+            slices2 = get_idx_list(node.inputs, node.op.idx_list)
            # Get the shapes of the vectors !
            try:
                # try not to introduce new shape into the graph
@@ -1927,8 +1929,8 @@ def local_subtensor_merge(node):
            else:
                merged_slices += slices1[pos_1:]

-            subtens = T.Subtensor(merged_slices)
-            sl_ins = T.Subtensor.collapse(
+            subtens = Subtensor(merged_slices)
+            sl_ins = Subtensor.collapse(
                merged_slices,
                lambda x: isinstance(x, T.Variable))
            out = subtens.make_node(x, *sl_ins).outputs[0]
@@ -1941,14 +1943,14 @@ def local_subtensor_merge(node):
 @gof.local_optimizer([])
 def local_subtensor_of_alloc(node):
    """alloc[x:y] -> alloc"""
-    if not isinstance(node.op, T.Subtensor):
+    if not isinstance(node.op, Subtensor):
        return False
    u = node.inputs[0]
    if u.owner is None:
        return False
    if not isinstance(u.owner.op, T.Alloc):
        return False
-    slices = T.get_idx_list(node.inputs, node.op.idx_list)
+    slices = get_idx_list(node.inputs, node.op.idx_list)
    val = u.owner.inputs[0]
    dims = u.owner.inputs[1:]
    assert len(slices) <= len(dims)
@@ -1972,7 +1974,7 @@ def local_subtensor_of_alloc(node):
            else:
                val_slices.append(sl)

-        csl, _ = T.get_canonical_form_slice(sl, dim)
+        csl, _ = get_canonical_form_slice(sl, dim)
        if type(csl) is not slice:
            # That dimension is removed.
            pass
@@ -2026,7 +2028,7 @@ def local_IncSubtensor_serialize(node):
    def movable(i):
        # Return True iff this is a incsubtensor that we can move
        return i.owner \
-                and isinstance(i.owner.op, T.IncSubtensor) \
+                and isinstance(i.owner.op, IncSubtensor) \
                and i.type == o_type \
                and len(i.clients) == 1 \
                and not i.owner.op.set_instead_of_inc
@@ -2060,7 +2062,7 @@ def local_inplace_setsubtensor(node):
    """
    Also work for GpuIncSubtensor
    """
-    if isinstance(node.op, T.IncSubtensor) and not node.op.inplace:
+    if isinstance(node.op, IncSubtensor) and not node.op.inplace:
        new_op = node.op.__class__(
       node.op.idx_list, inplace=True,
       set_instead_of_inc=node.op.set_instead_of_inc,
@@ -2077,7 +2079,7 @@ compile.optdb.register('inplace_setsubtensor',
 @gof.local_optimizer([None])
 def local_inplace_incsubtensor1(node):
    """ also work for GpuAdvancedIncSubtensor1 """
-    if isinstance(node.op, T.AdvancedIncSubtensor1) and not node.op.inplace:
+    if isinstance(node.op, AdvancedIncSubtensor1) and not node.op.inplace:
        new_op = node.op.__class__(
            inplace=True, set_instead_of_inc=node.op.set_instead_of_inc)
        new_node = new_op(*node.inputs)
@@ -2097,7 +2099,7 @@ def local_incsubtensor_of_allocs(node):
    """
    IncSubtensor(x, zeros, idx) -> x
    """
-    if isinstance(node.op, T.IncSubtensor) and not node.op.set_instead_of_inc:
+    if isinstance(node.op, IncSubtensor) and not node.op.set_instead_of_inc:
        x = node.inputs[0]
        y = node.inputs[1]
        replace = False
@@ -2122,7 +2124,7 @@ def local_setsubtensor_of_allocs(node):

    when x is constant or alloc.
    """
-    if isinstance(node.op, T.IncSubtensor) and node.op.set_instead_of_inc:
+    if isinstance(node.op, IncSubtensor) and node.op.set_instead_of_inc:
        x = node.inputs[0]
        y = node.inputs[1]
        replace_x = None

--- a/theano/tensor/subtensor.py
+++ b/theano/tensor/subtensor.py
+from copy import copy
+from itertools import izip
+import sys
+from textwrap import dedent
+import warnings
+import logging
+_logger = logging.getLogger("theano.tensor.subtensor")
+
+import numpy
+
+import theano
+from theano.compat.six import StringIO
+from theano.gradient import DisconnectedType
+from theano import gof
+from theano.gof import Apply, Constant, hashtype, Op, Type, MethodNotDefined
+from theano.gof.python25 import maxsize
+from theano.printing import pprint
+from theano import scalar as scal
+from theano.tensor.basic import (addbroadcast, clip, sum, exp,
+                                 ARange, TensorType)
+from theano.tensor.elemwise import DimShuffle
+from theano.tensor.type_other import NoneConst, SliceType, make_slice
+from theano import config
+
+inplace_increment = None
+if config.cxx:
+    import theano.gof.cutils  # needed to import cutils_ext
+    try:
+        from cutils_ext.cutils_ext import inplace_increment
+    except ImportError:
+        pass
+
+
+# Do a lazy import of the sparse module
+sparse_module_ref = None
+
+
+class AdvancedIndexingError(TypeError):
+    """
+    Raised when Subtensor is asked to perform advanced indexing.
+    """
+
+    def __init__(self, *args):
+        TypeError.__init__(self, *args)
+
+
+##########
+# Helpful functions to deal with Subtensor and IncSubtensor
+##########
+
+def get_idx_list(inputs, idx_list):
+    '''
+    Given a list of inputs to the subtensor and its idx_list reorders
+    the inputs according to the idx list to get the right values
+    '''
+
+    # The subtensor (or idx_list) does not depend on the inputs.
+    if len(inputs) == 1:
+        return tuple(idx_list)
+    indices = list(reversed(list(inputs[1:])))
+
+    # General case
+    def convert(entry):
+        if isinstance(entry, gof.Type):
+            return indices.pop()
+        elif isinstance(entry, slice):
+            return slice(convert(entry.start),
+                     convert(entry.stop),
+                     convert(entry.step))
+        else:
+            return entry
+    cdata = tuple(map(convert, idx_list))
+    return cdata
+
+
+def get_canonical_form_slice(theslice, length):
+    '''
+    Given a slice [start:stop:step] transform it into a canonical form
+    that respects the conventions imposed by python and numpy.
+
+    In a canonical form a slice is represented by a canonical form slice,
+    in which 0 <= start <= stop <= length and step > 0, and a flag which says
+    if the resulting set of numbers needs to be reversed or not.
+    '''
+    from theano.tensor import switch, lt, ge, sgn
+    if isinstance(theslice, slice):
+
+        def analyze(x):
+            try:
+                x_constant = theano.tensor.get_scalar_constant_value(x)
+                is_constant = True
+            except theano.tensor.NotScalarConstantError:
+                x_constant = theano.tensor.extract_constant(x)
+                is_constant = False
+            return x_constant, is_constant
+
+        start, is_start_constant = analyze(theslice.start)
+        stop, is_stop_constant = analyze(theslice.stop)
+        step, is_step_constant = analyze(theslice.step)
+        length, is_length_constant = analyze(length)
+
+        if step is None:
+            step = 1
+
+        # First handle the easier and common case where `step` is 1 and
+        # either `start` or `stop` is a range boundary. More specializations
+        # could be added later. This makes the resulting graph smaller than
+        # in the generic case below.
+        if step == 1:
+            is_start_0 = (
+                    start in [None, 0] or
+                    (is_start_constant and is_length_constant and
+                     start < 0 and start + length <= 0))
+            is_stop_length = (
+                    stop in [None, length, maxsize] or
+                    (is_stop_constant and is_length_constant and
+                     stop >= length))
+            if is_start_0:
+                # 0:stop:1
+                if is_stop_length:
+                    # Full slice.
+                    return slice(0, length, 1), 1
+                if is_stop_constant and stop >= 0:
+                    return (slice(0, switch(lt(stop, length), stop, length),
+                                  1), 1)
+                stop_plus_len = stop + length
+                stop = switch(
+                        lt(stop, 0),
+                        # stop < 0
+                        switch(
+                            lt(stop_plus_len, 0),
+                            # stop + len < 0
+                            0,
+                            # stop + len >= 0
+                            stop_plus_len),
+                        # stop >= 0: use min(stop, length)
+                        switch(lt(stop, length), stop, length))
+                return slice(0, stop, 1), 1
+            elif is_stop_length:
+                # start:length:1
+                if is_start_constant and start >= 0:
+                    return slice(switch(lt(start, length), start, length),
+                                 length, 1), 1
+                start_plus_len = start + length
+                start = switch(
+                        lt(start, 0),
+                        # start < 0
+                        switch(
+                            lt(start_plus_len, 0),
+                            # start + len < 0
+                            0,
+                            # start + len >= 0
+                            start_plus_len),
+                        # start >= 0: use min(start, length)
+                        switch(lt(start, length), start, length))
+                return slice(start, length, 1), 1
+
+        # This is the generic case.
+
+        if is_step_constant:
+            # When we know the sign of `step`, the graph can be made simpler.
+            assert step != 0
+            if step > 0:
+                def switch_neg_step(a, b):
+                    return b
+                abs_step = step
+                sgn_step = 1
+            else:
+                def switch_neg_step(a, b):
+                    return a
+                abs_step = -step
+                sgn_step = -1
+        else:
+            is_step_neg = lt(step, 0)
+
+            def switch_neg_step(a, b):
+                return switch(is_step_neg, a, b)
+            abs_step = abs(step)
+            sgn_step = sgn(step)
+
+        defstart = switch_neg_step(length - 1, 0)
+        defstop = switch_neg_step(-1, length)
+        if start is None:
+            start = defstart
+        else:
+            start = switch(lt(start, 0), start + length, start)
+            start = switch(lt(start, 0), switch_neg_step(-1, 0), start)
+            start = switch(ge(start, length),
+                           switch_neg_step(length - 1, length),
+                           start)
+        if stop in [None, maxsize]:
+            # The special "maxsize" case is probably not needed here,
+            # as slices containing maxsize are not generated by
+            # __getslice__ anymore.
+            stop = defstop
+        else:
+            stop = switch(lt(stop, 0), stop + length, stop)
+            stop = switch(lt(stop, 0), -1, stop)
+            stop = switch(ge(stop, length), length, stop)
+
+        nw_stop = switch_neg_step(start + 1, stop)
+        slice_len = (start - stop - 1) // abs_step + 1
+        slice_len = switch(lt(slice_len, 0), 0, slice_len)
+        neg_start = nw_stop - (slice_len - 1) * abs_step - 1
+        neg_start = switch(lt(neg_start, 0), (nw_stop - 1), neg_start)
+        nw_start = switch_neg_step(neg_start, start)
+        nw_start = switch(lt(nw_start, 0), 0, nw_start)
+        nw_stop = switch(lt(nw_stop, 0), 0, nw_stop)
+        # Ensure start <= stop.
+        nw_start = switch(lt(nw_start, nw_stop), nw_start, nw_stop)
+
+        nw_step = abs_step
+        if step != 1:
+            reverse = sgn_step
+            return slice(nw_start, nw_stop, nw_step), reverse
+        else:
+            return slice(nw_start, nw_stop, nw_step), 1
+    else:
+        value = theano.tensor.extract_constant(theslice)
+        value = switch(lt(value, 0), (value + length), value)
+
+        return value, 1
+
+
+class Subtensor(Op):
+    """Return a subtensor view
+
+    The inputs array is the tensor x, followed by scalar integer types.
+    TODO: WRITEME: how are the scalar integer variables formatted?
+
+    This class uses a relatively complex internal representation of the inputs
+    to remember how the input tensor x should be sliced.
+
+    idx_list: instance variable TODO: WRITEME: is this a list or a tuple?
+                                        (old docstring gives two conflicting
+                                        descriptions)
+              elements are either integers, theano scalar types, or slices.
+              one element per "explicitly named dimension"
+                TODO: WRITEME: what is an "explicitly named dimension" ?
+
+              if integer:
+                  indexes into the inputs array
+              if slice:
+                  start/stop/step members of each slice are integer indices
+                  into the inputs array or None
+                  integer indices be actual integers or theano scalar types
+
+    Note that the idx_list defines the Op, so two Subtensor instances are
+    considered to be different Ops if they have different idx_list fields.
+    This means that the entries in it are theano Types, not theano Variables.
+
+    @todo: add support for advanced tensor indexing (in Subtensor_dx too).
+
+    """
+    e_invalid = ('The index list is longer (size %d) than the number of '
+                 'dimensions of the tensor(namely %d). You are asking for '
+                 'a dimension of the tensor that does not exist! You might '
+                 'need to use dimshuffle to add extra dimension to your '
+                 'tensor.')
+    e_subslice = 'nested slicing is not supported'
+    e_indextype = "Invalid index type or slice for Subtensor"
+    debug = 0
+
+    view_map = {0: [0]}
+
+    @staticmethod
+    def collapse(idxs, cond):
+        """
+
+        idxs: a list of indices or slices.
+        cond: a callable that returns a bool
+
+        returns: idxs, with the slices flattened out into a list.
+                if cond is true for an entry, does not flatten it.
+
+        """
+        ret = []
+
+        def helper(entry):
+            if cond(entry):
+                ret.append(entry)
+            elif isinstance(entry, slice):
+                helper(entry.start)
+                helper(entry.stop)
+                helper(entry.step)
+
+        for idx in idxs:
+            helper(idx)
+
+        return ret
+
+    @staticmethod
+    def convert(entry, slice_ok=True):
+        """
+        The "idx_list" field is unique to each Subtensor instance.
+        It is not unique to each Apply node, so it should not refer to
+        specific Variables. This method changes references to Variables
+        into references to Types.
+        TODO: WRITEME: This method also accepts "entry" already being a Type;
+            when would that happen?
+        """
+        invalid_scal_types = [scal.float64, scal.float32]
+        scal_types = [scal.int64, scal.int32, scal.int16, scal.int8]
+        tensor_types = [theano.tensor.lscalar, theano.tensor.iscalar,
+                        theano.tensor.wscalar, theano.tensor.bscalar]
+        invalid_tensor_types = [theano.tensor.fscalar, theano.tensor.dscalar,
+                                theano.tensor.cscalar, theano.tensor.zscalar]
+        if (isinstance(entry, gof.Variable)
+                and (entry.type in invalid_scal_types
+                     or entry.type in invalid_tensor_types)):
+            raise TypeError("Expected an integer")
+
+        if isinstance(entry, gof.Variable) and entry.type in scal_types:
+            return entry.type
+        elif isinstance(entry, gof.Type) and entry in scal_types:
+            return entry
+
+        if (isinstance(entry, gof.Variable)
+                and entry.type in tensor_types
+                and numpy.all(entry.type.broadcastable)):
+            return scal.Scalar(entry.type.dtype)
+        elif (isinstance(entry, gof.Type)
+                and entry in tensor_types
+                and numpy.all(entry.broadcastable)):
+            return scal.Scalar(entry.dtype)
+        elif slice_ok and isinstance(entry, slice):
+            a = entry.start
+            b = entry.stop
+            c = entry.step
+
+            if a is not None:
+                slice_a = Subtensor.convert(a, False)
+            else:
+                slice_a = None
+
+            if b is not None and b != maxsize:
+                # The special "maxsize" case is probably not needed here,
+                # as slices containing maxsize are not generated by
+                # __getslice__ anymore.
+                slice_b = Subtensor.convert(b, False)
+            else:
+                slice_b = None
+
+            if c is not None:
+                slice_c = Subtensor.convert(c, False)
+            else:
+                slice_c = None
+
+            return slice(slice_a, slice_b, slice_c)
+        # There is a bug in numpy that results in isinstance(x, int) returning
+        # False for numpy integers.
+        # See <http://projects.scipy.org/numpy/ticket/2235>.
+        elif isinstance(entry, numpy.integer):
+            return entry
+        # On Windows 64-bit, shapes are returned as Python long, as they can
+        # be bigger than what a Python int can hold.
+        # Shapes should always fit in a numpy.int64, and we support them better
+        # 2) In Python3, long replaced int. So we must assert it fit in int64.
+        elif isinstance(entry, (int, long)):
+            entry64 = numpy.int64(entry)
+            return entry64
+        else:
+            raise AdvancedIndexingError(Subtensor.e_indextype, entry)
+
+    def __init__(self, idx_list):
+        self.idx_list = tuple(map(self.convert, idx_list))
+        self.perform_cache_cdata = None
+
+    @staticmethod
+    def my_as_scalar(a):
+        # Since scal.as_scalar does not know about tensor types (it would
+        # create a circular import) , this method converts either a
+        # TensorVariable or a ScalarVariable to a scalar.
+        if isinstance(a, gof.Variable) and isinstance(a.type, TensorType):
+            return theano.tensor.scalar_from_tensor(a)
+        else:
+            return scal.as_scalar(a)
+
+    def make_node(self, x, *inputs):
+        """
+            x: the tensor to take a subtensor of
+            inputs: a list of theano Scalars
+        """
+        x = theano.tensor.as_tensor_variable(x)
+        inputs = tuple(self.my_as_scalar(a) for a in inputs)
+
+        idx_list = list(self.idx_list)
+        if len(idx_list) > x.type.ndim:
+            exception = ValueError(Subtensor.e_invalid % (
+                len(idx_list), x.type.ndim))
+            exception.subtensor_invalid = True
+            raise exception
+
+        # infer the broadcasting pattern
+        padded = (idx_list
+                + [slice(None, None, None)] * (x.type.ndim - len(idx_list)))
+        broadcastable = [bc for p, bc in izip(padded, x.type.broadcastable)
+                if isinstance(p, slice)]
+
+        input_types = Subtensor.collapse(idx_list,
+                lambda entry: isinstance(entry, gof.Type))
+        if len(inputs) != len(input_types):
+            raise IndexError(
+                    "Not enough inputs to fill in the Subtensor template.",
+                    inputs, idx_list)
+        for input, expected_type in izip(inputs, input_types):
+            if input.type != expected_type:
+                raise TypeError(
+                    "Wrong type for Subtensor template. Expected %s, got %s."
+                    % (input.type, expected_type))
+
+        return gof.Apply(self,
+                         (x, ) + inputs,
+                         [theano.tensor.tensor(dtype=x.type.dtype,
+                                 broadcastable=broadcastable)])
+
+    def perform(self, node, inputs, out_):
+        out, = out_
+        x = inputs[0]
+
+        # The subtensor (or idx_list) does not depend on the inputs.
+        # (and cdata was cached on initial call)
+        if self.perform_cache_cdata is not None:
+            out[0] = numpy.asarray(x.__getitem__(self.perform_cache_cdata))
+            return
+
+        cdata = get_idx_list(inputs, self.idx_list)
+        if len(cdata) == 1:
+            cdata = cdata[0]
+        # (first call caches cdata here)
+        if len(inputs) == 1:
+            self.perform_cache_cdata = cdata
+
+        out[0] = numpy.asarray(x.__getitem__(cdata))
+
+    def infer_shape(self, node, shapes):
+        xshp = shapes[0]
+        assert len(xshp) == node.inputs[0].ndim
+        outshp = []
+        actual_idx_list = list(get_idx_list(node.inputs, self.idx_list))
+        padded = (actual_idx_list +
+                  [slice(None, None, None)] * (len(xshp) - len(self.idx_list)))
+        i = 0
+        for idx, xl in izip(padded, xshp):
+            if isinstance(idx, slice):
+                # If it is the default (None, None, None) slice, or a variant,
+                # the shape will be xl
+                if ((idx.start in [None, 0])
+                    and (idx.stop in [None, maxsize])
+                    and (idx.step is None or idx.step == 1)):
+                    outshp.append(xl)
+                else:
+                    cnf = get_canonical_form_slice(idx, xl)[0]
+                    if cnf.step == 1:
+                        length = cnf.stop - cnf.start
+                    else:
+                        length = (cnf.stop - cnf.start - 1) // cnf.step + 1
+                    outshp.append(length)
+                i += 1
+            else:
+                # That dimension is dropped
+                pass
+        assert i == node.outputs[0].ndim
+        assert len(outshp) == node.outputs[0].ndim
+        return [outshp]
+
+    def grad(self, inputs, grads):
+        gz, = grads
+        x = inputs[0]
+        rest = inputs[1:]
+        output = self(*inputs)
+        if output.dtype.find('int') != -1:
+            first = x.zeros_like().astype(theano.config.floatX)
+        else:
+            first = IncSubtensor(self.idx_list)(x.zeros_like(), gz, *rest)
+        return ([first]
+                + [DisconnectedType()()] * len(rest))
+
+    def connection_pattern(self, node):
+
+        rval = [[True]]
+
+        for ipt in node.inputs[1:]:
+            rval.append([False])
+
+        return rval
+
+    def __eq__(self, other):
+        return type(self) == type(other) and self.idx_list == other.idx_list
+
+    def __hash__(self):
+        # TODO: optimize by cache this hash value
+        msg = []
+        for entry in self.idx_list:
+            if isinstance(entry, slice):
+                msg += [(entry.start, entry.stop, entry.step)]
+            else:
+                msg += [entry]
+
+        idx_list = tuple(msg)
+        # backport
+        # idx_list = tuple((entry.start, entry.stop, entry.step)
+        #                 if isinstance(entry, slice)
+        #                 else entry
+        #                 for entry in self.idx_list)
+        return hash(idx_list)
+
+    @staticmethod
+    def str_from_slice(entry):
+        msg = []
+        for x in [entry.start, entry.stop, entry.step]:
+            if x is None:
+                msg.append("")
+            else:
+                msg.append(str(x))
+        return ":".join(msg)
+
+    def __str__(self):
+        indices = []
+        for entry in self.idx_list:
+            if isinstance(entry, slice):
+                indices.append(self.str_from_slice(entry))
+            else:
+                indices.append(str(entry))
+        return "%s{%s}" % (self.__class__.__name__, ", ".join(indices))
+
+    @staticmethod
+    def default_helper_c_code_args():
+        """
+        Returns a dictionary of default arguments to
+        helper_c_code
+        """
+
+        return {
+                "c_prefix": "PyArray",
+                "update_flags": ("PyArray_UpdateFlags(%(view_name)s,"
+                " NPY_ARRAY_C_CONTIGUOUS|"
+                "NPY_ARRAY_F_CONTIGUOUS);"),
+                "set_data": "PyArray_set_data",
+                "set_dim": "PyArray_set_dim",
+                "set_stride": "PyArray_set_stride",
+                "strides_mul": 1,
+                "view_name": "xview"}
+
+    @staticmethod
+    def helper_c_code(node, name, inputs, outputs, sub, idx_list,
+                      c_prefix=None,
+                      update_flags=None,
+                      set_data=None,
+                      set_dim=None,
+                      set_stride=None,
+                      strides_mul=None,
+                      view_name=None
+                  ):
+        """
+        The parameters c_prefix, update_flags, set_data, set_dim,
+        set_stride and strides_mul are there to allow reusing this
+        function on PyArray and CudaNdarray object.
+        """
+
+        default_args = Subtensor.default_helper_c_code_args()
+
+        if update_flags is None:
+            update_flags = default_args['update_flags']
+
+        if set_data is None:
+            set_data = default_args['set_data']
+
+        if set_dim is None:
+            set_dim = default_args['set_dim']
+
+        if set_stride is None:
+            set_stride = default_args['set_stride']
+
+        if strides_mul is None:
+            strides_mul = default_args['strides_mul']
+
+        if c_prefix is None:
+            c_prefix = default_args['c_prefix']
+
+        if view_name is None:
+            view_name = default_args['view_name']
+
+        #update_flags may depend on view_name
+        update_flags = update_flags % locals()
+
+        #
+        # two arrays are created in C code:
+        # is_slice: len == ndim, 0 means int, 1 means slice
+        # subtensor_spec: len = n_ints + 3 * n_slices
+        #
+        fail = sub['fail']
+        init_cmds = []  # initialization for subtensor_spec
+        is_slice = []
+        # TODO: change that, it might lead to unexpected results,
+        # see assembla-#767
+        NONE_CODE = maxsize - 1
+
+        pos = [0, 1]  # annoying version of global variable for init_entry
+
+        def inc_spec_pos(amt):
+            pos[0] += amt
+
+        def inc_input_pos(amt):
+            pos[1] += amt
+
+        def spec_pos():
+            return pos[0]
+
+        def input_pos():
+            return pos[1]
+
+        def init_entry(entry, depth=0):
+            if isinstance(entry, (numpy.integer, int)):
+                init_cmds.append(
+                        "subtensor_spec[%i] = %i;" % (spec_pos(),
+                            entry))
+                inc_spec_pos(1)
+                if depth == 0:
+                    is_slice.append(0)
+            elif isinstance(entry, Type):
+                init_cmds.append(
+                        "subtensor_spec[%i] = %s;" % (spec_pos(),
+                            inputs[input_pos()]))
+                inc_spec_pos(1)
+                inc_input_pos(1)
+                if depth == 0:
+                    is_slice.append(0)
+            elif entry is None:
+                init_cmds.append(
+                        "subtensor_spec[%i] = %i;" % (spec_pos(),
+                            NONE_CODE))
+                inc_spec_pos(1)
+                if depth == 0:
+                    is_slice.append(0)
+            elif depth == 0 and isinstance(entry, slice):
+                init_entry(entry.start, depth + 1)
+                init_entry(entry.stop, depth + 1)
+                init_entry(entry.step, depth + 1)
+                is_slice.append(1)
+            else:
+                assert 0, entry
+
+        for entry in idx_list:
+            init_entry(entry)
+        # make sure we used all inputs
+        assert input_pos() == len(inputs), input_pos()
+        assert len(is_slice) <= node.inputs[0].ndim, node.inputs[0].ndim
+
+        len_is_slice = len(is_slice)
+
+        len_subtensor_spec = spec_pos()
+
+        is_slice_init = ",".join([str(s) for s in is_slice])
+        subtensor_init = "\n".join(init_cmds)
+
+        x, = inputs[:1]
+        z, = outputs
+
+        xview = view_name
+
+        rval = """
+        #define PyArray_set_dim(obj, idx, d) PyArray_DIMS(obj)[idx]=d
+        #define PyArray_set_stride(obj, idx, d) PyArray_STRIDES(obj)[idx]=d
+        #define PyArray_set_data(obj, ptr, base) PyArray_BYTES(obj)=ptr
+
+        // The subtensor is created by iterating over the dimensions
+        // and updating stride, shape, and data pointers
+
+        int is_slice[] = {%(is_slice_init)s};
+        npy_intp subtensor_spec[%(len_subtensor_spec)s];
+        %(subtensor_init)s;
+        int spec_pos = 0; //position in subtensor_spec
+        int inner_ii = 0; // the current dimension of zview
+        int outer_ii = 0; // current dimension of z
+
+        char* ptr = (char*) %(c_prefix)s_BYTES(%(xview)s);
+
+        if ((%(c_prefix)s_DIMS(%(xview)s) == %(c_prefix)s_DIMS(%(x)s))
+            && (%(c_prefix)s_DIMS(%(x)s) != NULL))
+        {
+            PyErr_Format(PyExc_ValueError, "x and %(xview)s"
+                         "(with %%d dims) have the same dimensions"
+                         " pointers: %%p and %%p",
+                         %(c_prefix)s_NDIM(%(x)s),
+                         %(c_prefix)s_DIMS(%(xview)s),
+                         %(c_prefix)s_DIMS(%(x)s));
+            Py_XDECREF(%(xview)s);
+            %(fail)s;
+        }
+        if (%(c_prefix)s_STRIDES(%(xview)s) == %(c_prefix)s_STRIDES(%(x)s)
+            && (%(c_prefix)s_DIMS(%(x)s) != NULL))
+        {
+            PyErr_Format(PyExc_ValueError, "x and %(xview)s"
+                         "(with %%d dims) have the same strides"
+                         " pointers: %%p and %%p",
+                         %(c_prefix)s_NDIM(%(x)s),
+                         %(c_prefix)s_STRIDES(%(xview)s),
+                         %(c_prefix)s_STRIDES(%(x)s));
+            Py_XDECREF(%(xview)s);
+            %(fail)s;
+        }
+
+        for (; outer_ii < %(len_is_slice)s; ++outer_ii)
+        {
+            if (is_slice[outer_ii])
+            {
+                npy_intp length = %(c_prefix)s_DIMS(%(x)s)[outer_ii];
+                npy_intp slicelength;
+                npy_intp start = subtensor_spec[spec_pos+0];
+                npy_intp stop  = subtensor_spec[spec_pos+1];
+                npy_intp step  = subtensor_spec[spec_pos+2];
+                if (step == %(NONE_CODE)s) step = 1;
+
+                npy_intp defstart = step < 0 ? length-1 : 0;
+                npy_intp defstop = step < 0 ? -1 : length;
+
+                // logic adapted from
+                // PySlice_GetIndicesEx in python source
+                if (!step)
+                {
+                    Py_DECREF(%(xview)s);
+                    PyErr_Format(PyExc_ValueError,
+                                 "slice step cannot be zero");
+                    Py_XDECREF(%(xview)s);
+                    %(fail)s;
+                }
+
+                if (start == %(NONE_CODE)s)
+                {
+                    start = defstart;
+                }
+                else
+                {
+                    if (start < 0) start += length;
+                    if (start < 0) start = (step < 0) ? -1 : 0;
+                    if (start >= length)
+                        start = (step < 0) ? length - 1 : length;
+                }
+
+                if (stop == %(NONE_CODE)s)
+                {
+                    stop = defstop;
+                }
+                else
+                {
+                    if (stop < 0) stop += length;
+                    if (stop < 0) stop = (step < 0) ? -1 : 0;
+                    if (stop >= length)
+                        stop = (step < 0) ? length - 1 : length;
+                }
+
+                if ((step < 0 && stop >= start)
+                    || (step > 0 && start >= stop)) {
+                    slicelength = 0;
+                }
+                else if (step < 0) {
+                    slicelength = (stop-start+1)/step+1;
+                }
+                else {
+                    slicelength = (stop-start-1)/step+1;
+                }
+
+                if (0){
+                    fprintf(stdout, "start %%zi\\n", start);
+                    fprintf(stdout, "stop %%zi\\n", stop);
+                    fprintf(stdout, "step %%zi\\n", step);
+                    fprintf(stdout, "length %%zi\\n", length);
+                    fprintf(stdout, "slicelength %%zi\\n", slicelength);
+                }
+
+                assert (slicelength <= length);
+
+                ptr += %(c_prefix)s_STRIDES(%(x)s)[outer_ii] * start *
+                       %(strides_mul)s;
+                %(set_dim)s(%(xview)s, inner_ii, slicelength);
+                %(set_stride)s(%(xview)s, inner_ii,
+                               %(c_prefix)s_STRIDES(%(x)s)[outer_ii] * step);
+
+                inner_ii += 1;
+                spec_pos += 3;
+            }
+            else // tuple coord `outer_ii` is an int
+            {
+                int idx = subtensor_spec[spec_pos];
+                if (idx < 0) idx += %(c_prefix)s_DIMS(%(x)s)[outer_ii];
+                if (idx >= 0)
+                {
+                    if (idx < %(c_prefix)s_DIMS(%(x)s)[outer_ii])
+                    {
+                        ptr += %(c_prefix)s_STRIDES(%(x)s)[outer_ii] * idx *
+                               %(strides_mul)s;
+                    }
+                    else
+                    {
+                        PyErr_Format(PyExc_IndexError,"index out of bounds");
+                        Py_XDECREF(%(xview)s);
+                        %(fail)s;
+                    }
+                }
+                else
+                {
+                    PyErr_Format(PyExc_IndexError,"index out of bounds");
+                    Py_XDECREF(%(xview)s);
+                    %(fail)s;
+                }
+
+                spec_pos += 1;
+            }
+        }
+        %(set_data)s(%(xview)s, ptr, (PyObject*)NULL);
+        assert (inner_ii <= %(c_prefix)s_NDIM(%(xview)s));
+        while (inner_ii < %(c_prefix)s_NDIM(%(xview)s))
+        {
+            assert (outer_ii < %(c_prefix)s_NDIM(%(x)s));
+            %(set_dim)s(%(xview)s, inner_ii,
+                        %(c_prefix)s_DIMS(%(x)s)[outer_ii]);
+            %(set_stride)s(%(xview)s, inner_ii,
+                           %(c_prefix)s_STRIDES(%(x)s)[outer_ii]);
+            inner_ii += 1;
+            outer_ii += 1;
+        }
+        %(update_flags)s
+        """ % locals()
+        # print rval
+        return rval
+
+    @staticmethod
+    def helper_c_code_cache_version():
+        return (5,)
+
+    def c_code(self, node, name, inputs, outputs, sub):  # DEBUG
+        if not isinstance(node.inputs[0].type, theano.tensor.TensorType):
+            raise NotImplementedError()
+
+        x = inputs[0]
+        z, = outputs
+        view_ndim = node.outputs[0].ndim
+        fail = sub['fail']
+
+        build_view = """
+        //TODO: give this Op a second output so that this view can be cached
+        //TODO: alternatively, fix the memory leak on failure
+        Py_INCREF(PyArray_DESCR(%(x)s));
+        PyArrayObject * xview = (PyArrayObject*)PyArray_NewFromDescr(
+                &PyArray_Type,
+                PyArray_DESCR(%(x)s),
+                %(view_ndim)s,
+                PyArray_DIMS(%(x)s),
+                PyArray_STRIDES(%(x)s),
+                PyArray_DATA(%(x)s),
+                %(x)s->flags,
+                NULL);
+        if (!xview)
+        {
+            %(fail)s;
+        }
+        """ % locals()
+        get_xview = self.helper_c_code(node, name, inputs, outputs, sub,
+                self.idx_list)
+
+        finish_view = """
+        if (%(z)s) Py_DECREF(%(z)s);
+        Py_INCREF(py_%(x)s);
+        PyArray_BASE(xview) = py_%(x)s;
+        assert(py_%(x)s == (PyObject*)%(x)s);
+        %(z)s = xview;
+        """ % locals()
+
+        return build_view + "{" + get_xview + "}" + finish_view
+
+    def c_code_cache_version(self):
+        hv = self.helper_c_code_cache_version()
+        # If `helper_c_code_cache_version` is not versioned we do not want to
+        # have a versioned version of this op's C code.
+        if len(hv) == 0:
+            return ()
+        return (2, hv)
+
+    def R_op(self, inputs, eval_points):
+        # Subtensor is not differentiable wrt to its indices, therefore we
+        # do not even need to consider the eval_points provided for those
+        # (they should be defaulted to zeros_like by the global R_op)
+        if eval_points[0] is None:
+            return [None]
+        return self.make_node(eval_points[0], *inputs[1:]).outputs
+
+
+class SubtensorPrinter:
+
+    def process(self, r, pstate):
+        if r.owner is None:
+            raise TypeError("Can only print Subtensor.")
+        elif isinstance(r.owner.op, Subtensor):
+            idxs = r.owner.op.idx_list
+            inputs = list(r.owner.inputs)
+            input = inputs.pop()
+            sidxs = []
+            inbrack_pstate = pstate.clone(precedence=-1000)
+            for entry in idxs:
+                if isinstance(entry, int):
+                    sidxs.append(str(entry))
+                elif isinstance(entry, scal.Scalar):
+                    sidxs.append(inbrack_pstate.pprinter.process(inputs.pop()))
+                elif isinstance(entry, slice):
+                    if entry.start is None or entry.start == 0:
+                        msg1 = ""
+                    else:
+                        msg1 = entry.start
+
+                    if entry.stop is None or entry.stop == maxsize:
+                        msg2 = ""
+                    else:
+                        msg2 = entry.stop
+
+                    if entry.step is None:
+                        msg3 = ""
+                    else:
+                        msg3 = ":%s" % entry.step
+
+                    sidxs.append("%s:%s%s" % (msg1, msg2, msg3))
+            return "%s[%s]" % (pstate.pprinter.process(
+                input,
+                pstate.clone(precedence=1000)),
+                ", ".join(sidxs))
+        else:
+            raise TypeError("Can only print Subtensor.")
+
+pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Subtensor),
+        SubtensorPrinter())
+
+
+def set_subtensor(x, y, inplace=False,
+        tolerate_inplace_aliasing=False):
+    """Return x with the given subtensor overwritten by y.
+
+    Example: To replicate the numpy expression "r[10:] = 5", type
+
+    >>> new_r = set_subtensor(r[10:], 5)
+
+    :param x: symbolic variable for the lvalue of = operation
+    :param y: symbolic variable for the rvalue of = operation
+    :param tolerate_inplace_aliasing: see inc_subtensor for documentation.
+    """
+    return inc_subtensor(x, y, inplace, set_instead_of_inc=True,
+            tolerate_inplace_aliasing=tolerate_inplace_aliasing)
+
+
+def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,
+        tolerate_inplace_aliasing=False):
+    """Return x with the given subtensor incremented by y.
+
+    :param x: the symbolic result of a Subtensor operation.
+    :param y: the amount by which to increment ths subtensor in question
+    :param tolerate_inplace_aliasing: allow x and y to be views of a single
+        underlying array even while working inplace.  For correct results,
+        x and y must not be overlapping views; if they overlap, the result
+        of this Op will generally be incorrect. This value has no effect if
+        inplace=False.
+
+    Example: To replicate the numpy expression "r[10:] += 5", type
+
+    >>> new_r = inc_subtensor(r[10:], 5)
+    """
+    # First of all, y cannot have a higher dimension than x,
+    # nor have non-broadcastable dimensions where x is broadcastable.
+
+    x = theano.tensor.as_tensor_variable(x)
+    y = theano.tensor.as_tensor_variable(y)
+
+    if y.ndim > x.ndim:
+        raise TypeError(("Trying to increment a %d-dimensional "
+            "subtensor with a %d-dimensional value.") % (x.ndim, y.ndim))
+
+    for dim in range(y.ndim):
+        dim_offset = x.ndim - y.ndim
+        if (x.broadcastable[dim + dim_offset]
+                and not y.broadcastable[dim]):
+            # It is acceptable to try to increment a subtensor with a
+            # broadcastable dim with a tensor that is not broadcastable
+            # on that dimension. However, its length must then be 1.
+            # We insert a Rebroadcast Op to make sure it is the case.
+            y = addbroadcast(y, dim)
+
+    if not x.owner:
+        raise TypeError('x must be the result of a subtensor operation')
+
+    # retrieve idx_list from x.owner
+    if isinstance(x.owner.op, Subtensor):
+        if tolerate_inplace_aliasing:
+            destroyhandler_tolerate_aliased = [[0, 1]]
+        else:
+            destroyhandler_tolerate_aliased = []
+        the_op = IncSubtensor(x.owner.op.idx_list, inplace, set_instead_of_inc,
+                destroyhandler_tolerate_aliased=destroyhandler_tolerate_aliased
+                )
+        real_x = x.owner.inputs[0]
+        real_idxargs = x.owner.inputs[1:]
+        return the_op(real_x, y, *real_idxargs)
+    elif isinstance(x.owner.op, AdvancedSubtensor1):
+        real_x = x.owner.inputs[0]
+        ilist = x.owner.inputs[1]
+        the_op = AdvancedIncSubtensor1(inplace,
+                                       set_instead_of_inc=set_instead_of_inc)
+        return the_op(real_x, y, ilist)
+    elif isinstance(x.owner.op, AdvancedSubtensor):
+        real_x = x.owner.inputs[0]
+        ilist = x.owner.inputs[1:]
+
+        the_op = AdvancedIncSubtensor(inplace,
+                                      set_instead_of_inc=set_instead_of_inc)
+        return the_op(real_x, y, *ilist)
+    elif isinstance(x.owner.op, DimShuffle):
+        inner_x = x.owner.inputs[0]
+        # In the dimshuffle case, there are in fact two dimshuffles:
+        # one to make the indexed dimension the last one,
+        # and one to put it back where it was. So, in the case where we have
+        # inc_subtensor(x[:,i], y), the graph is actually
+        # inc_subtensor((x.T)[i].T, y).
+        # We could get all the way to x, and then get rid of the dimshuffles
+        # completely, but the problem is that advanced_inc_subtensor1 can only
+        # work on the first (outer-most, left-most) dimension of x,
+        # just like advanced_subtensor1.
+        # So we call advanced_inc_subtensor1(x.T, i, y), but then we need to
+        # return something that has the same shape as x, not as x.T (inner_x).
+        # So re-apply the outer dimshuffle on the new inc_subtensor,
+        # and return advanced_inc_subtensor1(x.T, i, y).T.
+        inner_incsubtensor = inc_subtensor(inner_x, y,
+                inplace=inplace,
+                set_instead_of_inc=set_instead_of_inc,
+                tolerate_inplace_aliasing=tolerate_inplace_aliasing)
+        return x.owner.op(inner_incsubtensor, *x.owner.inputs[1:])
+    elif isinstance(x.owner.op, theano.tensor.Reshape):
+        inner_x = x.owner.inputs[0]
+        # Try to apply inc_subtensor on inner_x.
+        # If it works, there is no need to reshape, as the inc_subtensor
+        # will have the same shape as inner_x, which is what we want.
+        inner_incsubtensor = inc_subtensor(inner_x, y.flatten(),
+                inplace=inplace,
+                set_instead_of_inc=set_instead_of_inc,
+                tolerate_inplace_aliasing=tolerate_inplace_aliasing)
+        return inner_incsubtensor
+    else:
+        raise TypeError('x must be the result of a subtensor operation')
+
+
+class IncSubtensor(Op):
+    """Increment a subtensor.
+
+    This is like numpy's
+
+        x[i,j,k] += y
+
+    It is used internally to implement the gradient on SubTensor.
+
+    :param set_instead_of_inc: if True set the subtensor to the value instead
+    of incrementing it by that value.
+    """
+
+    def __init__(self, idx_list, inplace=False, set_instead_of_inc=False,
+            destroyhandler_tolerate_aliased=None):
+        if destroyhandler_tolerate_aliased is None:
+            destroyhandler_tolerate_aliased = []
+        self.idx_list = map(Subtensor.convert, idx_list)
+        self.inplace = inplace
+        if inplace:
+            self.destroy_map = {0: [0]}
+        self.destroyhandler_tolerate_aliased = list(
+                destroyhandler_tolerate_aliased)
+        self.set_instead_of_inc = set_instead_of_inc
+
+    def __eq__(self, other):
+        return type(self) == type(other) \
+                and self.idx_list == other.idx_list \
+                and self.inplace == other.inplace \
+                and self.set_instead_of_inc == other.set_instead_of_inc
+
+    def __hash__(self):
+        msg = []
+        for entry in self.idx_list:
+            if isinstance(entry, slice):
+                msg += [(entry.start, entry.stop, entry.step)]
+            else:
+                msg += [entry]
+
+        idx_list = tuple(msg)
+        # backport
+        #idx_list = tuple((entry.start, entry.stop, entry.step)
+        #                 if isinstance(entry, slice)
+        #                 else entry
+        #                 for entry in self.idx_list)
+        return hashtype(self) ^ hash(idx_list) ^ hash(self.inplace) \
+                        ^ hash(self.set_instead_of_inc)
+
+    def __str__(self):
+        indices = []
+        for entry in self.idx_list:
+            if isinstance(entry, slice):
+                indices.append(Subtensor.str_from_slice(entry))
+            else:
+                indices.append(str(entry))
+        if self.inplace:
+            msg = 'Inplace'
+        else:
+            msg = ''
+        if not self.set_instead_of_inc:
+            msg += 'Inc'
+        else:
+            msg += 'Set'
+        return  "%s{%s;%s}" % (
+                self.__class__.__name__,
+                msg,
+                ", ".join(indices))
+
+    def make_node(self, x, y, *inputs):
+        """
+            x: the tensor to increment
+            y: the value to increment by
+            inputs: TODO WRITEME
+        """
+        x, y = map(theano.tensor.as_tensor_variable, [x, y])
+        if y.ndim > x.ndim:
+            raise ValueError(("Trying to increment a %d-dimensional "
+                "subtensor with a %d-dimensional value.") % (x.ndim,
+                    y.ndim))
+        inputs = tuple(map(Subtensor.my_as_scalar, inputs))
+
+        idx_list = list(self.idx_list)
+        if len(idx_list) > x.type.ndim:
+            exception = ValueError(
+                    Subtensor.e_invalid % (
+                        len(idx_list),
+                        x.type.ndim))
+            exception.subtensor_invalid = True
+            raise exception
+
+        input_types = Subtensor.collapse(idx_list,
+                lambda entry: isinstance(entry, gof.Type))
+        if len(inputs) != len(input_types):
+            raise IndexError(
+                    "Not enough inputs to fill in the Subtensor template.",
+                    inputs, idx_list)
+        for input, expected_type in izip(inputs, input_types):
+            if input.type != expected_type:
+                raise TypeError(
+                    "Wrong type for Subtensor template. Expected %s, got %s."
+                    % (input.type, expected_type))
+
+        return gof.Apply(self,
+                         (x, y) + inputs,
+                         [x.type()])
+
+    def perform(self, node, inputs, out_):
+        out, = out_
+        x, y = inputs[:2]
+        indices = list(reversed(inputs[2:]))
+
+        def convert(entry):
+            if isinstance(entry, gof.Type):
+                rval = indices.pop()
+                if sys.version_info < (2, 5):
+                    # Before Python 2.5, PySlice_GetIndicesEx requires
+                    # Python int to be passed.
+                    rval_ = int(rval)
+                    if rval_ != rval:
+                        raise IndexError((
+                            "Invalid value for indexing: %s. "
+                            "That value may be too big.") % rval)
+                    return rval_
+                return rval
+            elif isinstance(entry, slice):
+                return slice(convert(entry.start),
+                             convert(entry.stop),
+                             convert(entry.step))
+            else:
+                return entry
+
+        cdata = tuple(map(convert, self.idx_list))
+        if len(cdata) == 1:
+            cdata = cdata[0]
+        if not self.inplace:
+            x = x.copy()
+        sub_x = x.__getitem__(cdata)
+        if sub_x.shape:
+            # we've sliced out an N-D tensor with N > 0
+            if not self.set_instead_of_inc:
+                sub_x += y
+            else:
+                #sub_x += -sub_x + y
+                x.__setitem__(cdata, y)
+        else:
+            # scalar case
+            if not self.set_instead_of_inc:
+                x.__setitem__(cdata, sub_x + y)
+            else:
+                x.__setitem__(cdata, y)
+        out[0] = x
+
+    def c_code(self, node, name, inputs, outputs, sub):
+
+        # This method delegates much of the work to helper
+        # methods. This method implements the main logic
+        # but subclasses may override the helper methods
+        # to change the particulars, e.g. GpuIncSubtensor
+        # turns the view/copy operations on numpy arrays
+        # into the same operations on cuda arrays.
+
+        self.do_type_checking(node)
+
+        if self.inplace:  # convert bool to int
+            inplace = 1
+        else:
+            inplace = 0
+        x = inputs[0]
+        y = inputs[1]
+        z, = outputs
+        if self.set_instead_of_inc:  # convert bool to int
+            op_is_set = 1
+        else:
+            op_is_set = 0
+        fail = sub['fail']
+        view_ndim = (node.inputs[0].ndim -
+                     numpy.sum([not isinstance(idx, slice)
+                                for idx in self.idx_list]))
+
+        copy_of_x = self.copy_of_x(x)
+
+        copy_input_if_necessary = """
+        if (%(inplace)s)
+        {
+            if (%(x)s != %(z)s)
+            {
+                Py_XDECREF(%(z)s);
+                Py_INCREF(%(x)s);
+                %(z)s = %(x)s;
+            }
+        }
+        else
+        {
+            if (%(z)s) Py_DECREF(%(z)s);
+            %(z)s = %(copy_of_x)s;
+        }
+        """ % locals()
+
+        alloc_zview = self.make_view_array(z, view_ndim)
+        # On GPU, it takes two steps to make a view
+        link_zview = self.link_view_array(z, fail)
+
+        #Make a first view on the output, as we will write into it.
+        build_view = """
+        //TODO: give this Op a second output so that this view can be cached
+        //TODO: alternatively, fix the memory leak on failure
+        %(alloc_zview)s;
+        if (!zview)
+        {
+            %(fail)s;
+        }
+        %(link_zview)s;
+        """ % locals()
+        # make zview actually a view of %(z)s
+        helper_args = self.get_helper_c_code_args()
+        helper_args['view_name'] = 'zview'
+        get_zview = self.define_set_data() + \
+                Subtensor.helper_c_code(
+                node=node,
+                name=name,
+                inputs=outputs[:1] + inputs[2:],
+                outputs=outputs,
+                sub=sub,
+                idx_list=self.idx_list,
+                ** helper_args
+                )
+
+        copy_into = self.copy_into("zview", y)
+
+        add_to_zview = self.add_to_zview(y, fail)
+
+        make_modification = """
+        if (%(op_is_set)s)
+        {
+            if (%(copy_into)s) // does broadcasting
+            {
+                Py_DECREF(zview);
+                %(fail)s;
+            }
+        }
+        else
+        {
+            %(add_to_zview)s
+        }
+        """ % locals()
+
+        return (copy_input_if_necessary
+                + build_view
+                + "{" + get_zview + "}"
+                + make_modification
+                + "Py_DECREF(zview);"
+                )
+
+    def do_type_checking(self, node):
+        """ Should raise NotImplementedError if c_code does not support
+        the types involved in this node.
+        """
+
+        if not isinstance(node.inputs[0].type, theano.tensor.TensorType):
+            raise NotImplementedError()
+
+    def c_code_cache_version(self):
+        hv = Subtensor.helper_c_code_cache_version()
+        if hv:
+            return (1, hv)
+        else:
+            return ()
+
+    def copy_of_x(self, x):
+        """
+            :param x: a string giving the name of a C variable
+                pointing to an array
+
+            :return: C code expression to make a copy of x
+
+            Base class uses PyArrayObject *, subclasses may override for
+            different types of arrays.
+        """
+        # Parameters of PyArrary_FromAny are:
+        # array
+        # dtype: we pass NULL to say any dtype is acceptable, so the existing
+        #        dtype will be copied
+        # min_depth: we pass 0 to have this parameter ignored
+        # max_depth: we pass 0 to have this parameter ignored
+        # requirements: here we pass NPY_ARRAY_ENSURECOPY to force a copy
+        # context: this is almost always NULL, I'm not sure what it's used for
+        return """(PyArrayObject*)PyArray_FromAny(py_%(x)s, NULL, 0, 0,
+                NPY_ARRAY_ENSURECOPY, NULL)""" % locals()
+
+    def make_view_array(self, x, view_ndim):
+        """
+            :param x: a string identifying an array to be viewed
+            :param view_ndim: a string specifying the number of dimensions
+                to have in the view
+
+            This doesn't need to actually set up the view with the
+            right indexing; we'll do that manually later.
+        """
+
+        return """Py_INCREF(PyArray_DESCR(%(x)s));
+        PyArrayObject * zview =
+                (PyArrayObject*)PyArray_NewFromDescr(
+                &PyArray_Type,
+                PyArray_DESCR(%(x)s),
+                %(view_ndim)s,
+                PyArray_DIMS(%(x)s),
+                PyArray_STRIDES(%(x)s),
+                PyArray_DATA(%(x)s),
+                %(x)s->flags,
+                NULL)""" % locals()
+
+    def get_helper_c_code_args(self):
+        """ Return a dictionary of arguments to pass to helper_c_code."""
+        return Subtensor.default_helper_c_code_args()
+
+    def copy_into(self, view, source):
+        """
+            view: string, C code expression for an array
+            source: string, C code expression for an array
+
+            returns a C code expression to copy source into view, and
+            return 0 on success
+        """
+        return """PyArray_CopyInto(%(view)s, %(source)s)""" % locals()
+
+    def define_set_data(self):
+        """ Returns C code used to define any macros used in the
+        set data argument to the helper C code. """
+        return ""
+
+    def link_view_array(self, x, fail):
+        """ Returns code to complete making zview a view of x"""
+
+        # On CPU there is nothing to do, make_view_array already did this
+        return ""
+
+    def set_view_base(self, x, fail):
+        """ Returns code to make zview be a correct view of x,
+        after helper_c_code is done messing with x"""
+
+        # On CPU there is nothing to do
+        return ""
+
+    def add_to_zview(self, x, fail):
+        """ Return C code to add x to zview. Should DECREF zview if the
+        add fails."""
+
+        return """
+            PyArrayObject * add_rval = (PyArrayObject*)PyNumber_InPlaceAdd(
+                    (PyObject*)zview, py_%(x)s);
+            if (add_rval)
+            {
+                assert (PyArray_Check((PyObject*)add_rval));
+                assert (PyArray_DATA(add_rval) == PyArray_DATA(zview));
+                Py_DECREF(add_rval);
+            }
+            else
+            {
+                Py_DECREF(zview);
+                %(fail)s;
+            }""" % locals()
+
+    def infer_shape(self, node, shapes):
+        return [shapes[0]]
+
+    def R_op(self, inputs, eval_points):
+        if eval_points[0] is None or eval_points[1] is None:
+            return [None]
+        # Again we ignore eval points for indices because incsubtensor is
+        # not differentiable wrt to those
+        return self.make_node(eval_points[0], eval_points[1],
+                            *inputs[2:]).outputs
+
+    def connection_pattern(self, node):
+
+        rval = [[True], [True]]
+
+        for ipt in node.inputs[2:]:
+            rval.append([False])
+
+        return rval
+
+    def grad(self, inputs, grads):
+        g_output, = grads
+        x, y = inputs[:2]
+        idx_list = inputs[2:]
+
+        if self.set_instead_of_inc:
+            gx = set_subtensor(
+                Subtensor(idx_list=self.idx_list)(g_output, *idx_list),
+                theano.tensor.zeros_like(y))
+        else:
+            gx = g_output
+        gy = Subtensor(idx_list=self.idx_list)(g_output, *idx_list)
+
+        return [gx, gy] + [DisconnectedType()()] * len(idx_list)
+
+
+#########################
+# Advanced indexing
+#########################
+#
+# Should reproduce numpy's behaviour, see url:
+# docs.scipy.org/doc/numpy/reference/arrays.indexing.html#advanced-indexing
+
+
+class AdvancedSubtensor1(Op):
+    """Implement x[ilist] where ilist is a vector of integers."""
+
+    def __init__(self, sparse_grad=False):
+        self.sparse_grad = sparse_grad
+
+    def __hash__(self):
+        return hash(type(self))
+
+    def __eq__(self, other):
+        # Don't check the sparse_grad attribute as
+        # This don't change the output of this op
+        # So we want the merge optimier to merge two op
+        # that differ from there sparse_grad attribute.
+        return type(self) == type(other)
+
+    def __str__(self):
+        return self.__class__.__name__
+
+    def make_node(self, x, ilist):
+        x_ = theano.tensor.as_tensor_variable(x)
+        ilist_ = theano.tensor.as_tensor_variable(ilist)
+        if ilist_.type.dtype[:3] not in ('int', 'uin'):
+            raise TypeError('index must be integers')
+        if ilist_.type.ndim != 1:
+            raise TypeError('index must be vector')
+        if x_.type.ndim == 0:
+            raise TypeError('cannot index into a scalar')
+        return Apply(self, [x_, ilist_], [x_.type()])
+
+    def perform(self, node, inp, out_):
+        x, i = inp
+        out, = out_
+        # Copy always implied by numpy advanced indexing semantic.
+        if out[0] is not None and out[0].shape == (len(i),) + x.shape[1:]:
+            o = out[0]
+        else:
+            o = None
+
+        # If i.dtype is more precise than numpy.intp (int32 on 32-bit machines,
+        # int64 on 64-bit machines), numpy may raise the following error:
+        # TypeError: array cannot be safely cast to required type.
+        # We need to check if values in i can fit in numpy.intp, because
+        # if they don't, that should be an error (no array can have that
+        # many elements on a 32-bit arch).
+        if i.dtype != numpy.intp:
+            i_ = theano._asarray(i, dtype=numpy.intp)
+            if not numpy.can_cast(i.dtype, numpy.intp):
+                # Check if there was actually an incorrect conversion
+                if numpy.any(i != i_):
+                    raise IndexError('index contains values that are bigger '
+                            'than the maximum array size on this system.', i)
+            i = i_
+
+        out[0] = x.take(i, axis=0, out=o)
+
+    def connection_pattern(self, node):
+        rval = [[True]]
+
+        for ipt in node.inputs[1:]:
+            rval.append([False])
+
+        return rval
+
+    def grad(self, inputs, grads):
+        global sparse_module_ref
+        x, ilist = inputs
+        gz, = grads
+        assert len(inputs) == 2
+        sparse = False
+        if getattr(x.type, 'sparse_grad', False):
+            sparse = True
+            warnings.warn(
+                "DEPRECATION WARNING: AdvancedSubtensor1, you are using"
+                " an old interface to the sparse grad. You should use"
+                " theano.sparse_grad(a_tensor[an_int_vector]). ")
+        if sparse or self.sparse_grad:
+            if x.type.ndim != 2:
+                raise TypeError(
+                    "AdvancedSubtensor1: you can't take the sparse grad"
+                    " from a tensor with ndim != 2. ndim is " +
+                    str(x.type.ndim))
+            if sparse_module_ref is None:
+                import theano.sparse as sparse_module_ref
+
+            rval1 = [sparse_module_ref.construct_sparse_from_list(x, gz,
+                                                                  ilist)]
+        else:
+            rval1 = [advanced_inc_subtensor1(x.zeros_like(), gz, ilist)]
+        return rval1 + [DisconnectedType()()] * (len(inputs) - 1)
+
+    def R_op(self, inputs, eval_points):
+        if eval_points[0] is None:
+            return [None]
+        return self.make_node(eval_points[0], *inputs[1:]).outputs
+
+    def infer_shape(self, node, ishapes):
+        x, ilist = ishapes
+        return [ilist + x[1:]]
+
+    def c_support_code(self):
+        # In some versions of numpy, NPY_MIN_INTP is defined as MIN_LONG,
+        # which is not defined. It should be NPY_MIN_LONG instead in that case.
+        return dedent("""\
+                #ifndef MIN_LONG
+                #define MIN_LONG NPY_MIN_LONG
+                #endif""")
+
+    def c_code(self, node, name, input_names, output_names, sub):
+        if self.__class__ is not AdvancedSubtensor1:
+            raise MethodNotDefined(
+                "c_code defined for AdvancedSubtensor1,"
+                " not for child class", type(self))
+        a_name, i_name = input_names[0], input_names[1]
+        output_name = output_names[0]
+        fail = sub['fail']
+        return """
+            PyObject *indices;
+            int i_type = PyArray_TYPE(%(i_name)s);
+            if (i_type != NPY_INTP) {
+                // Cast %(i_name)s to NPY_INTP (expected by PyArray_TakeFrom),
+                // if all values fit.
+                if (!PyArray_CanCastSafely(i_type, NPY_INTP)) {
+                    npy_int64 min_val, max_val;
+                    PyObject* py_min_val = PyArray_Min(%(i_name)s, NPY_MAXDIMS,
+                                                       NULL);
+                    if (py_min_val == NULL) {
+                        %(fail)s;
+                    }
+                    min_val = PyLong_AsLongLong(py_min_val);
+                    Py_DECREF(py_min_val);
+                    if (min_val == -1 && PyErr_Occurred()) {
+                        %(fail)s;
+                    }
+                    PyObject* py_max_val = PyArray_Max(%(i_name)s, NPY_MAXDIMS,
+                                                       NULL);
+                    if (py_max_val == NULL) {
+                        %(fail)s;
+                    }
+                    max_val = PyLong_AsLongLong(py_max_val);
+                    Py_DECREF(py_max_val);
+                    if (max_val == -1 && PyErr_Occurred()) {
+                        %(fail)s;
+                    }
+                    if (min_val < NPY_MIN_INTP || max_val > NPY_MAX_INTP) {
+                        PyErr_SetString(PyExc_IndexError,
+                                     "Index contains values "
+                                     "that are bigger than the maximum array "
+                                     "size on this system.");
+                        %(fail)s;
+                    }
+                }
+                indices = PyArray_Cast(%(i_name)s, NPY_INTP);
+                if (indices == NULL) {
+                    %(fail)s;
+                }
+            }
+            else {
+                 indices = (PyObject *)%(i_name)s;
+                 Py_INCREF(indices);
+            }
+            if (%(output_name)s != NULL) {
+                npy_intp nd, i, *shape;
+                nd = PyArray_NDIM(%(a_name)s) + PyArray_NDIM(indices) - 1;
+                if (PyArray_NDIM(%(output_name)s) != nd) {
+                    Py_CLEAR(%(output_name)s);
+                }
+                else {
+                    shape = PyArray_DIMS(%(output_name)s);
+                    for (i = 0; i < PyArray_NDIM(indices); i++) {
+                        if (shape[i] != PyArray_DIMS(indices)[i]) {
+                            Py_CLEAR(%(output_name)s);
+                            break;
+                        }
+                    }
+                    if (%(output_name)s != NULL) {
+                        for (; i < nd; i++) {
+                            if (shape[i] != PyArray_DIMS(%(a_name)s)[
+                                                i-PyArray_NDIM(indices)+1]) {
+                                Py_CLEAR(%(output_name)s);
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
+            %(output_name)s = (PyArrayObject*)PyArray_TakeFrom(
+                        %(a_name)s, indices, 0, %(output_name)s, NPY_RAISE);
+            Py_DECREF(indices);
+            if (%(output_name)s == NULL) %(fail)s;
+        """ % locals()
+
+    def c_code_cache_version(self):
+        return (0, 1, 1)
+
+advanced_subtensor1 = AdvancedSubtensor1()
+
+
+class AdvancedIncSubtensor1(Op):
+    """Increments a subtensor using advanced slicing (list of index)"""
+    def __init__(self, inplace=False, set_instead_of_inc=False):
+        self.inplace = inplace
+        self.set_instead_of_inc = set_instead_of_inc
+        if inplace:
+            self.destroy_map = {0: [0]}
+
+    def __hash__(self):
+        return hash((type(self), self.inplace, self.set_instead_of_inc))
+
+    def __eq__(self, other):
+        return (type(self) == type(other)
+                and self.inplace == other.inplace
+                and self.set_instead_of_inc == other.set_instead_of_inc)
+
+    def __str__(self):
+        if self.inplace:
+            msg = "inplace"
+        else:
+            msg = "no_inplace"
+        if self.set_instead_of_inc:
+            msg += ",set"
+        else:
+            msg += ",inc"
+
+        return self.__class__.__name__ + "{%s}" % msg
+
+    def make_node(self, x, y, ilist):
+        x_ = theano.tensor.as_tensor_variable(x)
+        y_ = theano.tensor.as_tensor_variable(y)
+        ilist_ = theano.tensor.as_tensor_variable(ilist)
+
+        if ilist_.type.dtype[:3] not in ('int', 'uin'):
+            raise TypeError('index must be integers')
+        if ilist_.type.ndim != 1:
+            raise TypeError('index must be vector')
+        if x_.type.ndim == 0:
+            raise TypeError('cannot index into a scalar')
+        if y_.type.ndim > x_.type.ndim:
+            if self.set_instead_of_inc:
+                opname = 'set'
+            else:
+                opname = 'increment'
+            raise TypeError('cannot %s x subtensor with ndim=%s'
+            ' by y with ndim=%s to x subtensor with ndim=%s ' % (
+                opname, x_.type.ndim, y_.type.ndim))
+
+        return Apply(self, [x_, y_, ilist_], [x_.type()])
+
+    def perform(self, node, inp, out_):
+        # TODO opt to make this inplace
+        x, y, idx = inp
+        out, = out_
+        if not self.inplace:
+            x = x.copy()
+        # In Numpy, x[idx] += y doesn't work if the same index is present
+        # many times: it does it only once. Is it a bug? In any case, for
+        # this reason we implement our own 'inc' iteration.
+        if self.set_instead_of_inc:
+            x[idx] = y
+        else:
+            increment = inplace_increment
+            if increment is None:
+                increment = self.inplace_increment1d_slow
+
+            increment(x, idx, y)
+
+        out[0] = x
+
+    def inplace_increment1d_slow(self, x, idx, y):
+        # If `y` has as many dimensions as `x`, then we want to iterate
+        # jointly on `x` and `y`. Otherwise, it means `y` should be
+        # broadcasted to fill all relevant rows of `x`.
+        assert y.ndim <= x.ndim   # Should be guaranteed by `make_node`
+        if y.ndim == x.ndim:
+            assert len(y) == len(idx)
+            for (j, i) in enumerate(idx):
+                x[i] += y[j]
+        else:
+            for i in idx:
+                x[i] += y
+
+    def infer_shape(self, node, ishapes):
+        x, y, ilist = ishapes
+        return [x]
+
+    def R_op(self, inputs, eval_points):
+        if None in eval_points[:2]:
+            return [None]
+        return self.make_node(eval_points[0], eval_points[1],
+                              *inputs[2:]).outputs
+
+    def connection_pattern(self, node):
+
+        rval = [[True], [True], [False]]
+        return rval
+
+    def grad(self, inputs, grads):
+        g_output, = grads
+        x, y = inputs[:2]
+        idx_list = inputs[2:]
+
+        gx = g_output
+        gy = advanced_subtensor1(g_output, *idx_list)
+
+        return [gx, gy] + [DisconnectedType()()] * len(idx_list)
+
+advanced_inc_subtensor1 = AdvancedIncSubtensor1()
+
+
+def as_index_variable(idx):
+    if idx is None:
+        return NoneConst
+    if isinstance(idx, slice):
+        return make_slice(idx)
+    idx = theano.tensor.as_tensor_variable(idx)
+    if idx.type.dtype[:3] not in ('int', 'uin'):
+        raise TypeError('index must be integers')
+    return idx
+
+
+def adv_index_broadcastable_pattern(a, idx):
+    """
+    This function is only used to determine the broadcast pattern for
+    AdvancedSubtensor output variable.
+
+    For this, we make a fake ndarray and a fake idx and call use ask numpy
+    the output. From this, we find the output broadcast pattern.
+    """
+
+    def replace_slice(v):
+        if isinstance(v, gof.Apply):
+            if len(v.outputs) != 1:
+                raise ValueError(
+                    "It is ambiguous which output of a multi-output Op has"
+                    " to be fetched.", v)
+            else:
+                v = v.outputs[0]
+
+        if NoneConst.equals(v):
+            return None
+        if isinstance(v.type, SliceType):
+            return slice(None, None)
+
+        return numpy.zeros((2,) * v.ndim, int)
+
+    newidx = tuple(map(replace_slice, idx))
+
+    #2 - True = 1; 2 - False = 2
+    fakeshape = [2 - bc for bc in a.broadcastable]
+    retshape = numpy.empty(fakeshape)[newidx].shape
+    return tuple([dim == 1 for dim in retshape])
+
+
+class AdvancedSubtensor(Op):
+    """Return a subtensor copy, using advanced indexing.
+    """
+    # Should be used by __getitem__ and __getslice__, as follow:
+    # AdvancedSubtensor()(self, *args),
+    # if args contains and advanced indexing pattern
+
+    def __eq__(self, other):
+        return self.__class__ == other.__class__
+
+    def __hash__(self):
+        return hash(self.__class__)
+
+    def __str__(self):
+        return self.__class__.__name__
+
+    def make_node(self, x, *index):
+        x = theano.tensor.as_tensor_variable(x)
+
+        index = tuple(map(as_index_variable, index))
+        bcast = adv_index_broadcastable_pattern(x, index)
+        return gof.Apply(self,
+                         (x,) + index,
+                         [theano.tensor.tensor(dtype=x.type.dtype,
+                                 broadcastable=bcast)])
+
+    def R_op(self, inputs, eval_points):
+        if eval_points[0] is None:
+            return [None]
+        return self.make_node(eval_points[0], *inputs[1:]).outputs
+
+    def infer_shape(self, node, ishapes):
+        # Really special case
+        if len(ishapes) == 3:
+            xshp, ind1shp, ind2shp = ishapes
+            if len(xshp) == 2 and len(ind1shp) == 1 and len(ind2shp) == 1:
+                # if the graph is correct, we can assume ind1shp[0] and
+                # ind2shp[0] will have the same value.
+                # Try to return the one closest to the graph input.
+                if node.inputs[2].owner is None:
+                    return [ind2shp]
+                else:
+                    return [ind1shp]
+        # Default case, we don't know
+        return node.fgraph.shape_feature.default_infer_shape(node, ishapes)
+
+    def perform(self, node, inputs, out_):
+        out, = out_
+        # TODO: in general, we need to re-pack the inputs into a valid
+        # index, just like subtensor
+        out[0] = inputs[0].__getitem__(inputs[1:])
+        if (numpy.__version__ <= '1.6.1' and
+                out[0].size != numpy.uint32(out[0].size)):
+            warnings.warn(
+                    'Numpy versions 1.6.1 and below have a bug preventing '
+                    'advanced indexing from correctly filling arrays that '
+                    'are too big (>= 2^32 elements). It is possible that '
+                    'out[0] (%s), with shape %s, is not correctly filled.'
+                    % (out[0], out[0].shape))
+        # return
+        #raise NotImplementedError()
+
+    def connection_pattern(self, node):
+
+        rval = [[True]]
+
+        for ipt in node.inputs[1:]:
+            rval.append([False])
+
+        return rval
+
+    def grad(self, inputs, grads):
+        gz, = grads
+        x = inputs[0]
+        rest = inputs[1:]
+        return [advanced_inc_subtensor(theano.tensor.zeros_like(x), gz,
+                                       *rest)] + \
+            [DisconnectedType()()] * len(rest)
+
+
+class AdvancedIncSubtensor(Op):
+    """Increments a subtensor using advanced indexing.
+
+    :note: We need the numpy.inplace_increment() function currently
+        numpy's PR 326 to be able to make an inplace version of this
+        op.
+
+    """
+
+    def __init__(self, inplace=False, set_instead_of_inc=False):
+        self.inplace = inplace
+        self.set_instead_of_inc = set_instead_of_inc
+        # The assert is needed as in the pass the first argument was
+        # something else that was not used.
+        assert isinstance(inplace, bool)
+        if self.inplace:
+            raise NotImplementedError('In place computation is not'
+                                      ' implemented')
+
+        self.allow_legacy_perform = False
+
+    def __hash__(self):
+        return hash((type(self), self.inplace, self.set_instead_of_inc))
+
+    def __eq__(self, other):
+        return (type(self) == type(other)
+                and self.inplace == other.inplace
+                and self.set_instead_of_inc == other.set_instead_of_inc)
+
+    def __str__(self):
+        return "%s{%s, %s}" % (self.__class__.__name__,
+                "inplace=" + str(self.inplace),
+                " set_instead_of_inc=" + str(self. set_instead_of_inc))
+
+    def make_node(self, x, y, *inputs):
+        x = theano.tensor.as_tensor_variable(x)
+        y = theano.tensor.as_tensor_variable(y)
+
+        op = self
+        # If we are incrementing, but the increment compiled function is not
+        # available, we need to support legacy cases.
+        if not self.set_instead_of_inc and inplace_increment is None:
+            legacy_conditions = False
+            if x.ndim == 2 and y.ndim == 1 and len(inputs) == 2:
+                ind1 = theano.tensor.as_tensor_variable(inputs[0])
+                ind2 = theano.tensor.as_tensor_variable(inputs[1])
+                if ind1.ndim == 1 and ind2.ndim == 1:
+                    if ind1.owner and isinstance(ind1.owner.op, ARange):
+                        legacy_conditions = True
+                    elif isinstance(ind1, Constant):
+                        # Make sure no index is duplicated
+                        val = ind1.value
+                        if numpy.unique(val).size == val.size:
+                            legacy_conditions = True
+                    elif ind2.owner and isinstance(ind2.owner.op, ARange):
+                        legacy_conditions = True
+                    elif isinstance(ind2, Constant):
+                        # Make sure no index is duplicated
+                        val = ind2.value
+                        if numpy.unique(val).size == val.size:
+                            legacy_conditions = True
+            if legacy_conditions:
+                op = copy(self)
+                op.allow_legacy_perform = True
+            else:
+                raise NotImplementedError(
+                        'Could not import inplace_increment, so some advanced '
+                        'indexing features are disabled. They will be '
+                        'available if you update NumPy to version 1.8 or '
+                        'later, or to the latest development version.')
+
+        return gof.Apply(op,
+                        (x, y) + inputs,
+                        [theano.tensor.tensor(dtype=x.type.dtype,
+                            broadcastable=x.type.broadcastable)])
+
+    def perform(self, node, inputs, out_):
+        # TODO: 1. opt to make this in place 2. generalize as described in
+        # AdvancedSubtensor's perform TODO
+
+        out, = out_
+        if not self.inplace:
+            out[0] = inputs[0].copy()
+        else:
+            out[0] = inputs[0]
+
+        if self.set_instead_of_inc:
+            out[0][inputs[2:]] = inputs[1]
+        elif inplace_increment is not None:
+            inplace_increment(out[0], tuple(inputs[2:]), inputs[1])
+        elif self.allow_legacy_perform:
+            out[0][inputs[2:]] += inputs[1]
+        else:
+            raise NotImplementedError(
+                    'Could not import inplace_increment, so some advanced '
+                    'indexing features are disabled. They will be '
+                    'available if you update NumPy to version 1.8 or '
+                    'later, or to the latest development version.')
+
+        if (numpy.__version__ <= '1.6.1' and
+                out[0].size != numpy.uint32(out[0].size)):
+            warnings.warn(
+                    'Numpy versions 1.6.1 and below have a bug preventing '
+                    'advanced indexing from correctly filling arrays that '
+                    'are too big (>= 2^32 elements). It is possible that '
+                    'out[0] (%s), with shape %s, is not correctly filled.'
+                    % (out[0], out[0].shape))
+
+    def infer_shape(self, node, ishapes):
+        return [ishapes[0]]
+
+    def connection_pattern(self, node):
+
+        rval = [[True], [True]]
+
+        for ipt in node.inputs[2:]:
+            rval.append([False])
+
+        return rval
+
+    def grad(self, inpt, output_gradients):
+        x, y = inpt[:2]
+        idxs = inpt[2:]
+        outgrad, = output_gradients
+        d_x_wrt_C = outgrad
+        d_y_wrt_C = AdvancedSubtensor()(outgrad, *idxs)
+        return [d_x_wrt_C, d_y_wrt_C] + \
+            [DisconnectedType()() for _ in idxs]
+
+    def R_op(self, inputs, eval_points):
+        if None in eval_points[:2]:
+            return [None]
+        return self.make_node(eval_points[0], eval_points[1],
+                              *inputs[2:]).outputs
+advanced_inc_subtensor = AdvancedIncSubtensor()
+
+
+def take(a, indices, axis=None, mode='raise'):
+    a = theano.tensor.as_tensor_variable(a)
+    indices = theano.tensor.as_tensor_variable(indices)
+    # Reuse advanced_subtensor1 if indices is a vector
+    if indices.ndim == 1:
+        if mode == 'clip':
+            indices = clip(indices, 0, a.shape[axis] - 1)
+        elif mode == 'wrap':
+            indices = indices % a.shape[axis]
+        if axis is None:
+            return advanced_subtensor1(a.flatten(), indices)
+        elif axis == 0:
+            return advanced_subtensor1(a, indices)
+        else:
+            if axis < 0:
+                axis += a.ndim
+            assert axis >= 0
+            shuffle = range(a.ndim)
+            shuffle[0] = axis
+            shuffle[axis] = 0
+            return advanced_subtensor1(
+                a.dimshuffle(shuffle), indices).dimshuffle(shuffle)
+    if axis is None:
+        shape = indices.shape
+        ndim = indices.ndim
+    else:
+        shape = theano.tensor.concatenate(
+                        [a.shape[:axis], indices.shape, a.shape[axis + 1:]])
+        ndim = a.ndim + indices.ndim - 1
+    return take(a, indices.flatten(), axis, mode).reshape(shape, ndim)
--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -25,11 +25,11 @@ from theano.compile import DeepCopyOp
 from theano.compile.mode import get_default_mode
 from theano.gof.python25 import any, all, combinations
 from theano.tensor import (_shared, wvector, bvector, autocast_float_as,
-        argmin, max_and_argmax, cscalar, Subtensor, ctensor3, join,
+        argmin, max_and_argmax, cscalar, ctensor3, join,
        horizontal_stack, vertical_stack, argmax, get_vector_length,
        fscalar, zeros_like, sum, tensor3, vector, add, addbroadcast,
        alloc, as_tensor_variable, tensor_from_scalar, ARange, autocast_float,
-        clip, constant, default, dot, inc_subtensor, set_subtensor,
+        clip, constant, default, dot,
        dmatrix, dscalar, dvector, eq, eye, fill, flatten, inverse_permutation,
        tensor4, permute_row_elements, Flatten, fmatrix, fscalars, grad,
        inplace, iscalar, matrix, minimum, matrices, maximum, mul, neq,
@@ -38,13 +38,14 @@ from theano.tensor import (_shared, wvector, bvector, autocast_float_as,
        var, Join, shape, MaxAndArgmax, lscalar, zvector, exp,
        get_scalar_constant_value, ivector, reshape, scalar_from_tensor, scal,
        iscalars, arange, dscalars, fvector, imatrix, numeric_grad,
-        opt, ComplexError, lvector, lmatrix, true_div, max, min, Split, roll,
+        opt, lvector, lmatrix, true_div, max, min, Split, roll,
        tile, patternbroadcast, Eye, Shape, Dot, PermuteRowElements,
        ScalarFromTensor, TensorFromScalar, dtensor4, Rebroadcast, Alloc,
-        dtensor3, SpecifyShape, Mean, IncSubtensor, AdvancedIncSubtensor1,
-        itensor3, Tile, AdvancedIncSubtensor, switch, Diagonal, Diag,
-        nonzero, flatnonzero, nonzero_values, inplace_increment,
+        dtensor3, SpecifyShape, Mean,
+        itensor3, Tile, switch, Diagonal, Diag,
+        nonzero, flatnonzero, nonzero_values,
        stacklists)
+
 from theano.tests import unittest_tools as utt


@@ -2806,1114 +2807,6 @@ class T_outer(unittest.TestCase):
            utt.verify_grad(tensor.outer, [data0, data1])


-class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
-    """
-    This is build in a way that allow to reuse it to test the
-    equivalent gpu op.
-    """
-    def __init__(self, name, shared=_shared,
-                 sub=tensor.Subtensor,
-                 inc_sub=tensor.IncSubtensor,
-                 adv_sub1=tensor.AdvancedSubtensor1,
-                 adv_incsub1=tensor.AdvancedIncSubtensor1,
-                 mode=None,
-                 dtype=theano.config.floatX,
-                 ignore_topo=DeepCopyOp):
-        self.shared = shared
-        self.sub = sub
-        self.inc_sub = inc_sub
-        self.adv_sub1 = adv_sub1
-        self.adv_incsub1 = adv_incsub1
-        if mode is None:
-            mode = theano.compile.mode.get_default_mode()
-        self.mode = mode
-        self.dtype = dtype
-        self.ignore_topo = ignore_topo
-        self.fast_compile = theano.config.mode == 'FAST_COMPILE'
-        self.ops = (sub, inc_sub, adv_sub1, adv_incsub1)
-        return super(T_subtensor, self).__init__(name)
-
-    def function(self, inputs, outputs, accept_inplace=False,
-                 op=None, mode=None, N=1, N_fast=None):
-        """ wrapper around theano.function that also check the output
-
-        :param N: the number of op expected in the toposort
-                  if tuple of length 2, (expected if fast_compile,
-                                         if not fast_compile)
-        """
-        if self.fast_compile and N_fast is not None:
-            N = N_fast
-        if mode is None:
-            mode = self.mode
-        if op is None:
-            op = self.sub
-
-        f = theano.function(inputs, outputs, mode=mode,
-                            accept_inplace=accept_inplace)
-        self.assertFunctionContainsClassN(f, op, N)
-        return f
-
-    def setUp(self):
-        Subtensor.debug = False
-        utt.seed_rng()
-
-    def eval_output_and_check(self, t, list=False):
-        f = inplace_func([], t, mode=self.mode)
-        topo = f.maker.fgraph.toposort()
-        topo_ = [node for node in topo if not isinstance(node.op,
-             self.ignore_topo)]
-        assert len(topo_) == 1
-        if not list:
-            assert isinstance(topo_[0].op, self.sub)
-        else:
-            assert isinstance(topo_[0].op, self.adv_sub1)
-        tval = f()
-        return tval
-
-    def test0_err_invalid(self):
-        #it is impossible to retrieve a view of a 0-d tensor
-        n = self.shared(numpy.ones((), dtype=self.dtype))
-        try:
-            t = n[0]
-        except ValueError, e:
-            self.assertTrue(hasattr(e, 'subtensor_invalid'))
-            return
-        self.fail()
-
-    def test1_err_bounds(self):
-        n = self.shared(numpy.ones(3, dtype=self.dtype))
-        ctv_backup = config.compute_test_value
-        config.compute_test_value = 'off'
-        try:
-            t = n[7]
-        finally:
-            config.compute_test_value = ctv_backup
-        self.assertTrue(isinstance(t.owner.op, Subtensor))
-        # Silence expected error messages
-        _logger = logging.getLogger('theano.gof.opt')
-        oldlevel = _logger.level
-        _logger.setLevel(logging.CRITICAL)
-        try:
-            try:
-                self.eval_output_and_check(t)
-                assert 0
-            except Exception, e:
-                if 'out of bounds' not in exc_message(e):
-                    raise
-        finally:
-            _logger.setLevel(oldlevel)
-
-    def test1_err_subslice(self):
-        n = self.shared(numpy.ones(3, dtype=self.dtype))
-        try:
-            t = n[slice(0, slice(1, 2, None), None)]
-        except Exception, e:
-            ### Relax constraint on the type of Exception,
-            ### since this might be handled by AvancedSubtensor
-            #if e[0] != Subtensor.e_indextype:
-            #    raise
-            return
-        self.fail()
-
-    def test1_ok_range_finite(self):
-        n = self.shared(numpy.arange(3, dtype=self.dtype))
-        t = n[0:2]
-        self.assertTrue(isinstance(t.owner.op, Subtensor))
-        tval = self.eval_output_and_check(t)
-        self.assertTrue(tval.shape == (2,))
-        self.assertTrue((tval == [0, 1]).all())
-
-    def test2_ok_range_finite(self):
-        n = self.shared(numpy.arange(12, dtype=self.dtype).reshape((3, 4)))
-        # Also check negative index
-        for idx in [(slice(0, 2), 3), ((slice(0, 2), -1)), (slice(0, 2), -4)]:
-            t = n[idx]  # l]#0:2,3]
-            self.assertTrue(isinstance(t.owner.op, Subtensor))
-            tval = self.eval_output_and_check(t)
-            self.assertTrue(tval.shape == (2,))
-            self.assertTrue(numpy.allclose(tval, n.get_value()[idx]))
-
-    def test1_0_dims(self):
-        n = self.shared(numpy.ones((), dtype=self.dtype))
-        t = theano.tensor.Subtensor([])(n)
-        self.assertTrue(isinstance(t.owner.op, Subtensor))
-        mode = self.mode
-        self.mode = mode.excluding("local_useless_subtensor")
-        try:
-            self.eval_output_and_check(t)
-        finally:
-            self.mode = mode
-
-    def test1_err_invalid(self):
-        n = self.shared(numpy.ones(1, dtype=self.dtype))
-        try:
-            t = n[0, 0]
-        except ValueError, e:
-            self.assertTrue(hasattr(e, 'subtensor_invalid'))
-            return
-        self.fail()
-
-    def test1_ok_elem(self):
-        n = self.shared(numpy.ones(1, dtype=self.dtype) * 5)
-        t = n[0]
-        self.assertTrue(isinstance(t.owner.op, Subtensor))
-        tval = self.eval_output_and_check(t)
-        self.assertTrue(tval.shape == ())
-        self.assertTrue(tval == 5.0)
-
-    def test1_ok_range_infinite(self):
-        #Subtensor.debug = True
-        n = self.shared(numpy.arange(3, dtype=self.dtype))
-        t = n[1:]
-        self.assertTrue(isinstance(t.owner.op, Subtensor))
-        tval = self.eval_output_and_check(t)
-        self.assertTrue(tval.shape == (2,))
-        self.assertTrue((tval == [1.0, 2.0]).all())
-
-    def test1_ok_strided(self):
-        n = self.shared(numpy.arange(5, dtype=self.dtype))
-        t = n[1::2]
-        self.assertTrue(isinstance(t.owner.op, Subtensor))
-        tval = self.eval_output_and_check(t)
-        self.assertTrue(tval.shape == (2,))
-        self.assertTrue((tval == [1.0, 3.0]).all())
-
-        t = n[0:-1:2]  # 0 to 1 from the end stepping by 2
-        tval = self.eval_output_and_check(t)
-        self.assertTrue(tval.shape == (2,))
-        self.assertTrue((tval == [0.0, 2.0]).all())
-
-    def test2_err_bounds0(self):
-        n = self.shared(numpy.ones((2, 3), dtype=self.dtype) * 5)
-        ctv_backup = config.compute_test_value
-        config.compute_test_value = 'off'
-        try:
-            for idx in [(0, 4), (0, -4)]:
-                t = n[idx]
-                self.assertTrue(isinstance(t.owner.op, Subtensor))
-                # Silence expected warnings
-                _logger = logging.getLogger('theano.gof.opt')
-                oldlevel = _logger.level
-                _logger.setLevel(logging.CRITICAL)
-                try:
-                    self.assertRaises(IndexError,
-                                      self.eval_output_and_check, [t])
-                finally:
-                    _logger.setLevel(oldlevel)
-        finally:
-            config.compute_test_value = ctv_backup
-
-    def test2_err_bounds1(self):
-        n = self.shared((numpy.ones((2, 3), dtype=self.dtype) * 5))
-        t = n[4:5, 3]
-        self.assertTrue(isinstance(t.owner.op, Subtensor))
-        old_stderr = sys.stderr
-        sys.stderr = StringIO()
-        try:
-            self.assertRaises(IndexError,
-                              self.eval_output_and_check, [t])
-        finally:
-            sys.stderr = old_stderr
-
-    def test2_ok_elem(self):
-        n = self.shared(numpy.arange(6, dtype=self.dtype).reshape((2, 3)))
-        t = n[0, 2]
-        self.assertTrue(isinstance(t.owner.op, Subtensor))
-        tval = self.eval_output_and_check(t)
-        self.assertTrue(tval.shape == ())
-        self.assertTrue(numpy.all(tval == 2))
-
-    def test2_ok_row(self):
-        n = self.shared(numpy.arange(6, dtype=self.dtype).reshape((2, 3)))
-        t = n[1]
-        self.assertFalse(any(n.type.broadcastable))
-        self.assertTrue(isinstance(t.owner.op, Subtensor))
-        tval = self.eval_output_and_check(t)
-        self.assertTrue(tval.shape == (3,))
-        self.assertTrue(numpy.all(tval == [3, 4, 5]))
-
-    def test2_ok_col(self):
-        n = self.shared(numpy.arange(6, dtype=self.dtype).reshape((2, 3)))
-        t = n[:, 0]
-        self.assertTrue(isinstance(t.owner.op, Subtensor))
-        self.assertFalse(any(n.type.broadcastable))
-        tval = self.eval_output_and_check(t)
-        self.assertTrue(tval.shape == (2,))
-        self.assertTrue(numpy.all(tval == [0, 3]))
-
-    def test2_ok_rows_finite(self):
-        n = self.shared(numpy.arange(12, dtype=self.dtype).reshape((4, 3)))
-        t = n[1:3, 0]
-        self.assertTrue(isinstance(t.owner.op, Subtensor))
-        tval = self.eval_output_and_check(t)
-        self.assertTrue(tval.shape == (2,))
-        self.assertTrue(numpy.all(tval == [3, 6]))
-
-    def test2_ok_cols_infinite(self):
-        n = self.shared(numpy.arange(12, dtype=self.dtype).reshape((4, 3)))
-        t = n[1, 2:]
-        self.assertTrue(isinstance(t.owner.op, Subtensor))
-        tval = self.eval_output_and_check(t)
-        self.assertTrue(tval.shape == (1,))
-        self.assertTrue(numpy.all(tval == 5))
-
-    def test2_ok_strided(self):
-        n = self.shared(numpy.arange(20, dtype=self.dtype).reshape((4, 5)))
-        t = n[1:4:2, 1:5:2]
-        self.assertTrue(isinstance(t.owner.op, Subtensor))
-        tval = self.eval_output_and_check(t)
-        self.assertTrue(tval.shape == (2, 2))
-        self.assertTrue(numpy.all(tval == [[6, 8], [16, 18]]))
-
-    def test3_ok_mat(self):
-        n = self.shared(numpy.arange(24, dtype=self.dtype).reshape((2, 3, 4)))
-        t = n[0, 0, 0]
-        self.assertTrue(isinstance(t.owner.op, Subtensor))
-        tval = self.eval_output_and_check(t)
-        self.assertTrue(tval.shape == ())
-        self.assertTrue(numpy.all(tval == 0))
-
-    def test_long(self):
-        n = self.shared(numpy.arange(12, dtype=self.dtype).reshape((4, 3)))
-        t = n[1L:4L:2L, 1L]
-        self.assertTrue(isinstance(t.owner.op, Subtensor))
-        tval = self.eval_output_and_check(t)
-        self.assertTrue(tval.shape == (2,))
-        self.assertTrue(numpy.all(tval == [4, 10]))
-
-    def test_long_too_big(self):
-        # Currently, we cast Python longs to int64 when used for indexing.
-        # This test checks that using a long that does not fit raises an error.
-        n = self.shared(numpy.arange(12, dtype=self.dtype).reshape((4, 3)))
-        self.assertRaises(Exception, lambda: n[:(2L ** 63)])
-
-    def test_newaxis(self):
-        """
-        newaxis support comes from logic in the __getitem__ of TensorType
-        Variables, which currently inserts dimshuffle to get the right number
-        of dimensions, and adjusts the slice tuple accordingly.
-
-        So testing is done via square-bracket notation rather than direct
-        interaction with the Subtensor Op (which has no support of its own for
-        newaxis).
-        """
-        newaxis = numpy.newaxis
-
-        n = self.shared(numpy.arange(24, dtype=self.dtype).reshape((2, 3, 4)))
-        assert n.ndim == 3
-
-        n4 = n[newaxis, :, :, :]
-        assert n4.broadcastable == (True, False, False, False), n4
-
-        n4 = n[:, newaxis, :, :]
-        assert n4.broadcastable == (False, True, False, False), n4
-
-        n4 = n[:, :, newaxis, :]
-        assert n4.broadcastable == (False, False, True, False), n4
-
-        n4 = n[:, :, :, newaxis]
-        assert n4.broadcastable == (False, False, False, True), n4
-
-        n3 = n.flatten()[newaxis, :, newaxis]
-        assert n3.broadcastable == (True, False, True), n3
-
-        s = cscalar()
-        s1 = s[newaxis]
-        assert s1.broadcastable == (True,), s1
-
-        vs1, vn3, vn4 = theano.function([s], [s1, n3, n4])(-2.0)
-
-        assert numpy.all(vs1 == [-2.0])
-        assert numpy.all(vn3
-                == numpy.arange(24)[newaxis, :, newaxis])
-        assert numpy.all(vn4
-                == numpy.arange(24).reshape((2, 3, 4))[:, :, :, newaxis])
-
-    def test_grad_1d(self):
-        subi = 0
-        data = numpy.asarray(rand(2, 3), dtype=self.dtype)
-        n = self.shared(data)
-        z = scal.constant(subi)
-        t = n[z:, z]
-        gn = grad(sum(exp(t)), n)
-
-        f = inplace_func([], gn, mode=self.mode)
-        topo = f.maker.fgraph.toposort()
-        topo_ = [node for node in topo if not isinstance(node.op,
-             self.ignore_topo)]
-        if not self.fast_compile:
-            assert len(topo_) == 6
-        assert numpy.sum([isinstance(node.op, self.inc_sub)
-             for node in topo_]) == 1
-        assert numpy.sum([isinstance(node.op, self.sub)
-             for node in topo_]) == 1
-        gval = f()
-
-        good = numpy.zeros_like(data)
-        good[subi:, subi] = numpy.exp(data[subi:, subi])
-        self.assertTrue(numpy.allclose(gval, good), (gval, good))
-
-    def test_grad_0d(self):
-        data = numpy.asarray(rand(2, 3), dtype=self.dtype)
-        n = self.shared(data)
-        t = n[1, 0]
-        gn = grad(sum(exp(t)), n)
-        f = self.function([], gn)
-        topo = f.maker.fgraph.toposort()
-        topo_ = [node for node in topo if not isinstance(node.op,
-             self.ignore_topo)]
-        if not self.fast_compile:
-            assert len(topo_) == 6
-        assert numpy.sum([isinstance(node.op, self.inc_sub)
-             for node in topo_]) == 1
-        assert numpy.sum([isinstance(node.op, self.sub)
-             for node in topo_]) == 1
-
-        gval = f()
-        good = numpy.zeros_like(data)
-        good[1, 0] = numpy.exp(data[1, 0])
-        self.assertTrue(numpy.allclose(gval, good), (gval, good))
-
-    def test_ok_list(self):
-        for data, idx in [(rand(4), [1, 0]),
-                          (rand(4, 5), [2, 3]),
-                          (rand(4, 2, 3), [0, 3]),
-                          (rand(4, 2, 3), [3, 3, 1, 1, 2, 2, 0, 0]),
-                          (rand(4, 2, 3), [3, 3,
-                               1, 1, 2, 2, 0, 0, -1, -2, -3, -4]),
-                          # Test 4 dims as gpu code use another algo in that case
-                          # This new algo is not as much optimized for that case.
-                          (rand(4, 4, 2, 3), [3,
-                               3, 1, 1, 2, 2, 0, 0, -1, -2, -3, -4]),
-                          # Test with TensorConstant index.
-                          (rand(4, 2, 3), constant([3, 3, 1, 1, 2, 2, 0, 0])),
-                          ]:
-            data = numpy.asarray(data, dtype=self.dtype)
-            n = self.shared(data)
-            t = n[idx]
-
-            # We test again AdvancedSubtensor1 as we transfer data to the cpu.
-            self.assertTrue(isinstance(t.owner.op, tensor.AdvancedSubtensor1))
-
-            val = self.eval_output_and_check(t, list=True)
-            if isinstance(idx, list):
-                good = data[idx]
-            else:
-                good = data[idx.data]
-            self.assertTrue(val.ndim == data.ndim)
-            self.assertTrue(numpy.allclose(val, good), (val, good))
-
-            # Test reuse of output memory
-            if isinstance(self.adv_sub1, tensor.AdvancedSubtensor1):
-                op = self.adv_sub1()
-                # When idx is a TensorConstant.
-                if hasattr(idx, "data"):
-                    idx = idx.data
-                test_out = [[None]]
-                op.perform(None, [data, idx], test_out)
-                out1 = test_out[0][0]
-                op.perform(None, [data, idx], test_out)
-                out2 = test_out[0][0]
-                assert out1 is out2
-
-    def test_err_invalid_list(self):
-        n = self.shared(numpy.asarray(5, dtype=self.dtype))
-        self.assertRaises(TypeError, n.__getitem__, [0, 0])
-
-
-    def test_err_invalid_2list_dtype(self):
-        n = self.shared(numpy.ones((3, 3), dtype=self.dtype) * 5)
-        self.assertRaises(TypeError, n.__getitem__, ([0., 0], [1, 1]))
-
-    def test_err_bound_list(self):
-        n = self.shared(numpy.ones((2, 3), dtype=self.dtype) * 5)
-        l = lvector()
-        t = n[l]
-        # We test again AdvancedSubtensor1 as we transfer data to the cpu.
-        self.assertTrue(isinstance(t.owner.op, tensor.AdvancedSubtensor1))
-
-        f = self.function([l], t, op=self.adv_sub1)
-        topo = f.maker.fgraph.toposort()
-        topo_ = [node for node in topo if not isinstance(node.op,
-             self.ignore_topo)]
-        assert len(topo_) == 1
-        self.assertTrue(isinstance(topo_[0].op, self.adv_sub1))
-        for shp in [[0, 4], [0, -3], [-10]]:
-            self.assertRaises(IndexError, f, shp)
-
-    def test_adv_sub1_broadcast(self):
-        ones = numpy.ones((1, 3), dtype=self.dtype)
-        n = self.shared(ones * 5, broadcastable=(True, False))
-        idx = tensor.lvector()
-        t = n[idx]
-        self.assertTrue(isinstance(t.owner.op, tensor.AdvancedSubtensor1))
-
-        f = self.function([idx], t, op=self.adv_sub1)
-        topo = f.maker.fgraph.toposort()
-        topo_ = [node for node in topo if not isinstance(node.op,
-             self.ignore_topo)]
-        assert len(topo_) == 1
-        self.assertTrue(isinstance(topo_[0].op, self.adv_sub1))
-        self.assertTrue(numpy.allclose(f([0]), ones[0] * 5))
-        self.assertRaises(IndexError, f, [0, 1])
-
-    def test_adv_sub1_idx_broadcast(self):
-        # The idx can be a broadcastable vector.
-        ones = numpy.ones((4, 3), dtype=self.dtype)
-        n = self.shared(ones * 5)
-        idx = tensor.TensorType(dtype='int64', broadcastable=(True,))()
-        assert idx.type.broadcastable == (True,)
-        t = n[idx]
-        self.assertTrue(isinstance(t.owner.op, tensor.AdvancedSubtensor1))
-
-        f = self.function([idx], t, op=self.adv_sub1)
-        topo = f.maker.fgraph.toposort()
-        topo_ = [node for node in topo if not isinstance(node.op,
-             self.ignore_topo)]
-        assert len(topo_) == 1
-        self.assertTrue(isinstance(topo_[0].op, self.adv_sub1))
-        self.assertTrue(numpy.allclose(f([0]), ones[0] * 5))
-
-    def test_shape_i_const(self):
-        # Each axis is treated independently by shape_i/shape operators
-
-        mode_opt = self.mode.including("fast_run")
-        data = self.shared(numpy.array(numpy.arange(5), dtype=self.dtype))
-        for start in [None] + [-8, -5, -1, 0, 1, 5, 8]:
-            outs = []
-            shapes = []
-            for stop in [None] + [-8, -5, -1, 0, 1, 5, 8]:
-                for step in [None] + [-3, -1, 2]:
-                    outs += [data[start:stop:step].shape]
-                    shapes += [data.get_value(
-                        borrow=True)[start:stop:step].shape]
-            f = self.function([], outs, mode=mode_opt,
-                              op=self.ops, N=0)
-            t_shapes = f()
-            for t_shape, shape in zip(t_shapes, shapes):
-                assert numpy.all(t_shape == shape)
-            assert tensor.Subtensor not in [x.op for x in
-                                           f.maker.fgraph.toposort()]
-
-    def test_shape_i_scalar(self):
-        # Each axis is treated independently by shape_i/shape operators
-
-        mode_opt = self.mode.including("fast_run")
-
-        v_data = numpy.array(numpy.arange(5), dtype=self.dtype)
-        t_data = self.shared(v_data)
-        start = tensor.iscalar('b')
-        stop = tensor.iscalar('e')
-        step = tensor.iscalar('s')
-        f = self.function([start, stop, step],
-                          t_data[start:stop:step].shape,
-                          mode=mode_opt,
-                          op=self.ops,
-                          N=0)
-        assert tensor.Subtensor not in [x.op for x in f.maker.
-            fgraph.toposort()]
-        for start in [-8, -5, -4, -1, 0, 1, 4, 5, 8]:
-            for stop in [-8, -5, -4, -1, 0, 1, 4, 5, 8]:
-                for step in [-3, -1, 2, 5]:
-                    assert numpy.all(f(start, stop, step) ==
-                                     v_data[start:stop:step].shape)
-
-    def test_slice_canonical_form_0(self):
-        start = tensor.iscalar('b')
-        stop = tensor.iscalar('e')
-        step = tensor.iscalar('s')
-        length = tensor.iscalar('l')
-        cnf = tensor.get_canonical_form_slice(slice(start, stop, step), length)
-        f = self.function([start, stop, step, length], [
-            tensor.as_tensor_variable(cnf[0].start),
-            tensor.as_tensor_variable(cnf[0].stop),
-            tensor.as_tensor_variable(cnf[0].step),
-            tensor.as_tensor_variable(cnf[1])], N=0, op=self.ops)
-
-        length = 5
-        a = numpy.arange(length)
-        for start in [-8, -5, -4, -1, 0, 1, 4, 5, 8]:
-            for stop in  [-8, -5, -4, -1, 0, 1, 4, 5, 8]:
-                for step in [-6, -3, -1, 2, 5]:
-                    out = f(start, stop, step, length)
-                    t_out = a[out[0]:out[1]:out[2]][::out[3]]
-                    v_out = a[start:stop:step]
-                    assert numpy.all(t_out == v_out)
-                    assert numpy.all(t_out.shape == v_out.shape)
-
-    def test_slice_canonical_form_1(self):
-        stop = tensor.iscalar('e')
-        step = tensor.iscalar('s')
-        length = tensor.iscalar('l')
-        cnf = tensor.get_canonical_form_slice(slice(None, stop, step), length)
-        f = self.function([stop, step, length], [
-            tensor.as_tensor_variable(cnf[0].start),
-            tensor.as_tensor_variable(cnf[0].stop),
-            tensor.as_tensor_variable(cnf[0].step),
-            tensor.as_tensor_variable(cnf[1])], N=0, op=self.ops)
-
-        length = 5
-        a = numpy.arange(length)
-        for stop in  [-8, -5, -4, -1, 0, 1, 4, 5, 8]:
-            for step in [-6, -3, -1, 2, 5]:
-                out = f(stop, step, length)
-                t_out = a[out[0]:out[1]:out[2]][::out[3]]
-                v_out = a[:stop:step]
-                assert numpy.all(t_out == v_out)
-                assert numpy.all(t_out.shape == v_out.shape)
-
-    def test_slice_canonical_form_2(self):
-        start = tensor.iscalar('b')
-        step = tensor.iscalar('s')
-        length = tensor.iscalar('l')
-        cnf = tensor.get_canonical_form_slice(slice(start, None, step), length)
-        f = self.function([start, step, length], [
-            tensor.as_tensor_variable(cnf[0].start),
-            tensor.as_tensor_variable(cnf[0].stop),
-            tensor.as_tensor_variable(cnf[0].step),
-            tensor.as_tensor_variable(cnf[1])], N=0, op=self.ops)
-
-        length = 5
-        a = numpy.arange(length)
-        for start in [-8, -5, -4, -1, 0, 1, 4, 5, 8]:
-            for step in [-6, -3, -1, 2, 5]:
-                out = f(start, step, length)
-                t_out = a[out[0]:out[1]:out[2]][::out[3]]
-                v_out = a[start:None:step]
-                assert numpy.all(t_out == v_out)
-                assert numpy.all(t_out.shape == v_out.shape)
-
-    def test_slice_canonical_form_3(self):
-        start = tensor.iscalar('b')
-        stop = tensor.iscalar('e')
-        length = tensor.iscalar('l')
-        cnf = tensor.get_canonical_form_slice(slice(start, stop, None), length)
-        f = self.function([start, stop, length], [
-            tensor.as_tensor_variable(cnf[0].start),
-            tensor.as_tensor_variable(cnf[0].stop),
-            tensor.as_tensor_variable(cnf[0].step),
-            tensor.as_tensor_variable(cnf[1])], N=0, op=self.ops)
-
-        length = 5
-        a = numpy.arange(length)
-        for start in [-8, -5, -4, -1, 0, 1, 4, 5, 8]:
-            for stop in  [-8, -5, -4, -1, 0, 1, 4, 5, 8]:
-                out = f(start, stop, length)
-                t_out = a[out[0]:out[1]:out[2]][::out[3]]
-                v_out = a[start:stop:None]
-                assert numpy.all(t_out == v_out)
-                assert numpy.all(t_out.shape == v_out.shape)
-
-    def test_slice_canonical_form_4(self):
-        step = tensor.iscalar('s')
-        length = tensor.iscalar('l')
-        cnf = tensor.get_canonical_form_slice(slice(None, None, step), length)
-        f = self.function([step, length], [
-            tensor.as_tensor_variable(cnf[0].start),
-            tensor.as_tensor_variable(cnf[0].stop),
-            tensor.as_tensor_variable(cnf[0].step),
-            tensor.as_tensor_variable(cnf[1])], N=0, op=self.ops)
-
-        length = 5
-        a = numpy.arange(length)
-        for step in [-6, -3, -1, 2, 5]:
-            out = f(step, length)
-            t_out = a[out[0]:out[1]:out[2]][::out[3]]
-            v_out = a[None:None:step]
-            assert numpy.all(t_out == v_out)
-            assert numpy.all(t_out.shape == v_out.shape)
-
-    def test_slice_canonical_form_5(self):
-        start = tensor.iscalar('b')
-        length = tensor.iscalar('l')
-        cnf = tensor.get_canonical_form_slice(slice(start, None, None), length)
-        f = self.function([start, length], [
-            tensor.as_tensor_variable(cnf[0].start),
-            tensor.as_tensor_variable(cnf[0].stop),
-            tensor.as_tensor_variable(cnf[0].step),
-            tensor.as_tensor_variable(cnf[1])], N=0, op=self.ops)
-
-        length = 5
-        a = numpy.arange(length)
-        for start in [-8, -5, -4, -1, 0, 1, 4, 5, 8]:
-            out = f(start, length)
-            t_out = a[out[0]:out[1]:out[2]][::out[3]]
-            v_out = a[start:None:None]
-            assert numpy.all(t_out == v_out)
-            assert numpy.all(t_out.shape == v_out.shape)
-
-    def test_slice_canonical_form_6(self):
-        stop = tensor.iscalar('e')
-        length = tensor.iscalar('l')
-        cnf = tensor.get_canonical_form_slice(slice(None, stop, None), length)
-        f = self.function([stop, length], [
-            tensor.as_tensor_variable(cnf[0].start),
-            tensor.as_tensor_variable(cnf[0].stop),
-            tensor.as_tensor_variable(cnf[0].step),
-            tensor.as_tensor_variable(cnf[1])], N=0, op=self.ops)
-
-        length = 5
-        a = numpy.arange(length)
-        for stop in  [-8, -5, -4, -1, 0, 1, 4, 5, 8]:
-            out = f(stop, length)
-            t_out = a[out[0]:out[1]:out[2]][::out[3]]
-            v_out = a[None:stop:None]
-            assert numpy.all(t_out == v_out)
-            assert numpy.all(t_out.shape == v_out.shape)
-
-    def grad_list_(self, idxs, data):
-        n = self.shared(data)
-
-        for idx in idxs:
-            # Should stay on the cpu.
-            idx_ = _shared(numpy.asarray(idx))
-            t = n[idx_]
-            gn = grad(sum(exp(t)), n)
-            f = self.function([], [gn, gn.shape], op=self.adv_incsub1)
-            topo = f.maker.fgraph.toposort()
-            if not self.fast_compile:
-                assert any([isinstance(node.op, self.
-                    adv_incsub1) and node.op.inplace for node in topo])
-            else:
-                assert any([isinstance(node.op, self.
-                    adv_incsub1) for node in topo])
-            assert any([isinstance(node.op, self.adv_sub1) for node in topo])
-            gval, gshape = f()
-            good = numpy.zeros_like(data)
-            # good[idx] += numpy.exp(data[idx]) don't work when the same index is used many time
-            for i in idx:
-                good[i] += numpy.exp(data[i])
-            self.assertTrue(gval.ndim == data.ndim)
-            self.assertTrue(numpy.allclose(gval, good), (gval, good))
-            self.assertTrue(numpy.allclose(gshape, data.shape))
-
-            def fct(t):
-                return sum(t[idx_])
-            utt.verify_grad(fct, [data])
-
-            # Test the grad of the grad (e.i. AdvancedIncSubtensor1.grad)
-            def fct2(t):
-                return grad(sum(t[idx_]), t)
-            utt.verify_grad(fct2, [data])
-
-            # Test shape of AdvancedIncSubtensor1 and AdvancedSubtensor1
-            if not self.fast_compile:
-                ops = (self.adv_incsub1, self.adv_sub1)
-            else:
-                ops = self.ops
-            if idx is idxs[0]:
-                f = self.function([], [gn.shape, n[idx_].shape],
-                                  op=ops,
-                                  N=0, N_fast=2)
-                f()
-
-    def test_wrong_exception_regression(self):
-        a = fscalar()
-        b = fscalar()
-        c = vector()
-        try:
-            c[a:b]
-        except NotImplementedError:
-            self.fail()
-        except TypeError:
-            pass
-        try:
-            c[a:]
-        except NotImplementedError:
-            self.fail()
-        except TypeError:
-            pass
-        try:
-            c[:b]
-        except NotImplementedError:
-            self.fail()
-        except TypeError:
-            pass
-
-    def test_grad_list(self):
-        data = rand(4)
-        data = numpy.asarray(data, dtype=self.dtype)
-        idxs = [[i] for i in range(data.shape[0])]
-        for i in range(data.shape[0]):
-            for j in range(0, data.shape[0], 2):
-                idxs.append([i, j, (i + 1) % data.shape[0]])
-        self.grad_list_(idxs, data)
-
-        data = rand(4, 3)
-        data = numpy.asarray(data, dtype=self.dtype)
-        self.grad_list_(idxs, data)
-
-        data = rand(4, 3, 2)
-        data = numpy.asarray(data, dtype=self.dtype)
-        self.grad_list_(idxs, data)
-
-    def test_shape_list(self):
-        #TODO for all type of subtensor shape
-        for data, idx in [(rand(4), [1, 0]),
-                          (rand(4, 2), [2, 3]),
-                          (rand(4, 2, 3), [0, 3]),
-                          (rand(4, 2, 3), [3, 3, 1, 2, 2, ]),
-                          ]:
-            data = numpy.asarray(data, dtype=self.dtype)
-            n = self.shared(data)
-            t = n[idx]
-            f = self.function([], t.shape, op=self.ops, N=0, N_fast=1)
-            val = f()
-            self.assertTrue(numpy.allclose(val, data[idx].shape))
-
-    def test_grad_advanced_inc_subtensor(self):
-        def inc_slice(*s):
-            def just_numeric_args(a, b):
-                cost = (a[s] + b).sum()
-                cost_wrt_a = grad(cost, a)
-                cost_wrt_b = grad(cost, b)
-                grads = cost_wrt_a.sum() + cost_wrt_b.sum()
-                return grads
-            return just_numeric_args
-
-        # vector
-        utt.verify_grad(
-            inc_slice(slice(2, 4, None)),
-            (numpy.asarray([0, 1, 2, 3, 4, 5.]), numpy.asarray([9, 9.]),))
-
-        # matrix
-        utt.verify_grad(
-            inc_slice(slice(1, 2, None), slice(None, None, None)),
-            (numpy.asarray([[0, 1], [2, 3], [4, 5.]]),
-             numpy.asarray([[9, 9.]]),))
-
-        #single element
-        utt.verify_grad(
-            inc_slice(2, 1),
-            (numpy.asarray([[0, 1], [2, 3], [4, 5.]]), numpy.asarray(9.),))
-
-    def test_advanced_inc_and_set(self):
-        """
-        Test advanced increment and set.
-        """
-        rng = numpy.random.RandomState(seed=utt.fetch_seed())
-        all_inputs_var = []
-        all_inputs_num = []
-        all_outputs_var = []
-        all_outputs_num = []
-        for set_instead_of_inc in (False, True):
-            for inplace in (False, True):
-                for data_shape in ((10,), (4, 5), (1, 2, 3), (4, 5, 6, 7)):
-                    data_n_dims = len(data_shape)
-                    data_size = numpy.product(data_shape)
-                    # Corresponding numeric variable.
-                    data_num_init = numpy.arange(data_size, dtype=self.dtype)
-                    data_num_init = data_num_init.reshape(data_shape)
-                    inc_shapes = [data_shape[i:]
-                                  for i in xrange(0, len(data_shape) + 1)]
-                    for inc_shape in inc_shapes:
-                        inc_n_dims = len(inc_shape)
-                        # We copy the numeric value to be 100% sure there is no
-                        # risk of accidentally sharing it.
-                        data_num = data_num_init.copy()
-                        # Symbolic variable to be incremented.
-                        # We create a new one every time in order not to
-                        # have duplicated variables in the function's inputs
-                        data_var = tensor.tensor(
-                                broadcastable=[False] * data_n_dims,
-                                dtype=self.dtype)
-                        # Symbolic variable with rows to be incremented.
-                        idx_var = theano.tensor.vector(dtype='int64')
-                        n_to_inc = rng.randint(data_shape[0])
-                        # Corresponding numeric variable.
-                        idx_num = rng.randint(0, data_shape[0], n_to_inc)
-                        idx_num = idx_num.astype('int64')
-                        # Symbolic variable with increment value.
-                        inc_var = tensor.tensor(
-                                broadcastable=[False] * inc_n_dims,
-                                dtype=self.dtype)
-                        # Trick for the case where `inc_shape` is the same as
-                        # `data_shape`: what we actually want is the first
-                        # shape element to be equal to the number of rows to
-                        # increment.
-                        if len(inc_shape) == len(data_shape):
-                            inc_shape = (n_to_inc,) + inc_shape[1:]
-                        inc_size = numpy.product(inc_shape)
-                        # Corresponding numeric variable.
-                        inc_num = rng.uniform(size=inc_size).astype(self.dtype)
-                        inc_num = inc_num.reshape(inc_shape)
-                        # Result of the incrementation.
-                        # (i) Theano
-                        if set_instead_of_inc:
-                            op = set_subtensor
-                        else:
-                            op = inc_subtensor
-                        output = op(data_var[idx_var], inc_var,
-                                    inplace=inplace)
-                        # (ii) Numpy (note that Numpy increments only once
-                        # duplicated indices, so we cannot directly use +=).
-                        data_copy = data_num.copy()
-                        for j, idx in enumerate(idx_num):
-                            if len(inc_shape) == len(data_shape):
-                                # Special case where there is no broadcasting.
-                                if set_instead_of_inc:
-                                    data_copy[idx] = inc_num[j]
-                                else:
-                                    data_copy[idx] += inc_num[j]
-                            else:
-                                if set_instead_of_inc:
-                                    data_copy[idx] = inc_num
-                                else:
-                                    data_copy[idx] += inc_num
-                        data_var = theano.In(data_var, mutable=True)
-
-                        # Remember data for the Theano function (see below).
-                        all_inputs_var += [data_var, idx_var, inc_var]
-                        all_inputs_num += [data_num, idx_num, inc_num]
-                        all_outputs_var.append(output)
-                        all_outputs_num.append(data_copy)
-                        if False:  # Enable for debugging purpose.
-                            f = self.function([data_var, idx_var, inc_var],
-                                              output, accept_inplace=inplace,
-                                              op=self.adv_incsub1)
-                            if inplace:
-                                # Ensure calling `f` will not alter `data_num`.
-                                data_num = data_num.copy()
-                            f_out = f(data_num.copy(), idx_num, inc_num)
-                            assert numpy.allclose(f_out, data_copy)
-                            if not inplace:
-                                # Sanity check: `data_num` should be intact.
-                                assert (data_num == data_num_init).all()
-
-        # Actual test (we compile a single Theano function to make it faster).
-        orig_warn = theano.config.warn.gpu_set_subtensor1
-        try:
-            theano.config.warn.gpu_set_subtensor1 = False
-            f = self.function(all_inputs_var, all_outputs_var,
-                              accept_inplace=True,
-                              op=self.adv_incsub1,
-                              N=len(all_outputs_var))
-        finally:
-            theano.config.warn.gpu_set_subtensor1 = orig_warn
-
-        f_outs = f(*all_inputs_num)
-        assert len(f_outs) == len(all_outputs_num)
-        for f_out, output_num in izip(f_outs, all_outputs_num):
-            # NB: if this assert fails, it will probably be easier to debug if
-            # you enable the debug code above.
-            assert numpy.allclose(f_out, output_num)
-
-    def test_adv_constant_arg(self):
-        # Test case provided (and bug detected, gh-607) by John Salvatier
-        m = matrix('m')
-        gv = numpy.array([0, 1, 3])
-        g = constant(gv)
-        i = lvector('i')
-
-        # s1 used to fail
-        s1 = m[gv, i]
-        s2 = m[g, i]
-
-        assert gof.graph.is_same_graph(s1, s2)
-
-    def test_adv1_inc_sub_notlastdim(self):
-        # Test that taking 1-dimensional advanced indexing
-        # over a dimension that's not the first (outer-most) works.
-        m = matrix('m')
-        i = lvector('i')
-
-        m1 = set_subtensor(m[:, i], 0)
-        m2 = inc_subtensor(m[:, i], 1)
-        f = theano.function([m, i], [m1, m2])
-
-        m_val = rand(3, 5)
-        i_val = randint_ranged(min=0, max=4, shape=(4,))
-        m1_ref = m_val.copy()
-        m2_ref = m_val.copy()
-
-        m1_val, m2_val = f(m_val, i_val)
-        for idx in i_val:
-            m1_ref[:, idx] = 0
-            m2_ref[:, idx] += 1
-
-        assert numpy.allclose(m1_val, m1_ref), (m1_val, m1_ref)
-        assert numpy.allclose(m2_val, m2_ref), (m2_val, m2_ref)
-
-    def test_adv1_inc_sub_notlastdim_2didx(self):
-        # Test that taking 1-dimensional advanced indexing
-        # over a dimension that's not the first (outer-most) works,
-        # if the index is a matrix.
-        m = matrix('m')
-        i = lmatrix('i')
-
-        m1 = set_subtensor(m[:, i], 0)
-        m2 = inc_subtensor(m[:, i], 1)
-        f = theano.function([m, i], [m1, m2])
-
-        m_val = rand(5, 7)
-        i_val = randint_ranged(min=0, max=6, shape=(4, 2))
-        m1_ref = m_val.copy()
-        m2_ref = m_val.copy()
-
-        m1_val, m2_val = f(m_val, i_val)
-        for idx in i_val.ravel():
-            m1_ref[:, idx] = 0
-            m2_ref[:, idx] += 1
-
-        assert numpy.allclose(m1_val, m1_ref), (m1_val, m1_ref)
-        assert numpy.allclose(m2_val, m2_ref), (m2_val, m2_ref)
-
-
-class TestIncSubtensor1(unittest.TestCase):
-    # test inc_subtensor
-    # also tests set_subtensor
-
-    def setUp(self):
-        self.s = iscalar()
-        self.v = fvector()
-        self.m = dmatrix()
-        self.t = ctensor3()
-
-        self.adv1q = lvector()  # advanced 1d query
-
-    def test_cant_adv_idx_into_scalar(self):
-        self.assertRaises(TypeError, lambda: self.s[self.adv1q])
-
-    def test_index_into_vec_w_vec(self):
-        a = self.v[self.adv1q]
-        assert a.type == self.v.type
-
-    def test_1d_set_adv_selection(self):
-        a = set_subtensor(self.v[self.adv1q], self.v[self.adv1q])
-
-        assert a.type == self.v.type
-
-        #TODO: compile a function and verify that the subtensor is removed
-        #      completely, because the whole expression is redundant.
-
-        f = theano.function([self.v, self.adv1q], a, allow_input_downcast=True)
-        aval = f([.4, .9, .1], [1, 2])
-        assert numpy.allclose(aval, [.4, 0.9, 0.1])
-
-    def test_1d_inc_adv_selection(self):
-        a = inc_subtensor(self.v[self.adv1q], self.v[self.adv1q])
-
-        assert a.type == self.v.type
-        f = theano.function([self.v, self.adv1q], a, allow_input_downcast=True)
-        aval = f([.4, .9, .1], [1, 2])
-        assert numpy.allclose(aval, [.4, 1.8, 0.2])
-
-    def test_1d_inc_adv_selection_w_broadcasting(self):
-        a = inc_subtensor(self.v[self.adv1q], 3.0)
-
-        assert a.type == self.v.type
-        f = theano.function([self.v, self.adv1q], a, allow_input_downcast=True)
-        aval = f([.4, .9, .1], [1, 2])
-        assert numpy.allclose(aval, [.4, 3.9, 3.1])
-
-    def test_assigning_matrix_to_vector_selection(self):
-        self.assertRaises(TypeError,
-                lambda: inc_subtensor(self.v[self.adv1q], fmatrix()))
-
-inplace_increment_missing = SkipTest("inc_subtensor with advanced indexing not enabled. "
-                       "Installing NumPy 1.8 or the latest development version "
-                       "should make that feature available.")
-
-class TestAdvancedSubtensor(unittest.TestCase):
-    # test inc_subtensor
-    # also tests set_subtensor
-
-    def setUp(self):
-        self.s = iscalar()
-        self.v = fvector()
-        self.m = dmatrix()
-        self.t = ctensor3()
-
-        self.ix1 = lvector()  # advanced 1d query
-        self.ix12 = lvector()
-        self.ix2 = lmatrix()
-
-    def test_cant_adv_idx_into_scalar(self):
-        self.assertRaises(TypeError, lambda: self.s[self.ix1])
-
-    def test_index_into_vec_w_vec(self):
-        a = self.v[self.ix1]
-        assert a.type == self.v.type, (a.type, self.v.type)
-
-    def test_index_into_vec_w_matrix(self):
-        a = self.v[self.ix2]
-        assert a.dtype == self.v.dtype, (a.dtype, self.v.dtype)
-        assert a.broadcastable == self.ix2.broadcastable, (
-                a.broadcastable, self.ix2.broadcastable)
-
-    def test_inc_adv_subtensor_w_matrix(self):
-        if inplace_increment is None:
-            raise inplace_increment_missing
-
-        subt = self.v[self.ix2]
-        a = inc_subtensor(subt,subt)
-
-        assert a.type == self.v.type, (a.type, self.v.type)
-        f = theano.function([self.v, self.ix2], a, allow_input_downcast=True)
-        aval = f([.4, .9, .1], [[1, 2],
-                                [1, 2]])
-        assert numpy.allclose(aval, [.4, .9 * 3, .1 * 3])
-
-    def test_inc_adv_subtensor_w_2vec(self):
-        if inplace_increment is None:
-            raise inplace_increment_missing
-
-        subt = self.m[self.ix1, self.ix12]
-        a = inc_subtensor(subt, subt)
-
-        typ = TensorType(self.m.type.dtype, self.ix2.type.broadcastable)
-        assert a.type == typ, (a.type, typ)
-        f = theano.function([self.m, self.ix1, self.ix12], a,
-                            allow_input_downcast=True)
-        aval = f([[.4, .9, .1],
-                  [5,   6,  7],
-                  [.5, .3, .15]],
-                 [1, 2, 1],
-                 [0, 1, 0])
-        assert numpy.allclose(aval,
-                [[.4, .9, .1],
-                  [5 * 3,   6,  7],
-                  [.5, .3 * 2, .15]]), aval
-
-    def test_inc_adv_subtensor_with_broadcasting(self):
-        if inplace_increment is None:
-            raise inplace_increment_missing
-
-        a = inc_subtensor(self.m[self.ix1, self.ix12], 2.1)
-
-        assert a.type == self.m.type, (a.type, self.m.type)
-        f = theano.function([self.m, self.ix1, self.ix12], a,
-                            allow_input_downcast=True)
-        aval = f([[.4, .9, .1],
-                  [5,   6,  7],
-                  [.5, .3, .15]],
-                 [1, 2, 1],
-                 [0, 1, 0])
-        assert numpy.allclose(aval,
-                [[.4, .9, .1],
-                  [5 + 2.1 * 2,   6,  7],
-                  [.5, .3 + 2.1, .15]]), aval
-
-    def test_inc_adv_subtensor_with_index_broadcasting(self):
-        if inplace_increment is None:
-            raise inplace_increment_missing
-
-        a = inc_subtensor(self.m[self.ix1, self.ix2], 2.1)
-
-        assert a.type == self.m.type, (a.type, self.m.type)
-        f = theano.function([self.m, self.ix1, self.ix2], a,
-                            allow_input_downcast=True)
-        aval = f([[.4, .9, .1],
-                  [5,   6,  7],
-                  [.5, .3, .15]],
-                 [0, 2, 0],
-                 [[0, 1, 0],
-                  [2, 2, 2]])
-        assert numpy.allclose(aval,
-                [[.4 + 2*2.1, .9, .1 + 2*2.1],
-                  [5 ,   6,  7 ],
-                  [.5, .3 + 2.1, .15 + 2.1]]), aval
-
 class T_Join_and_Split(unittest.TestCase):
    """
    Split is tested by each verify_grad method.
@@ -6857,7 +5750,7 @@ class test_complex_mod(unittest.TestCase):
        try:
            x % 5
            assert False
-        except ComplexError:
+        except theano.scalar.ComplexError:
            pass


@@ -7346,166 +6239,6 @@ class TestInferShape(utt.InferShapeTester):
                                [Mean(aiscal_val)(adtens3)],
                                [adtens3_val], Mean)

-        # IncSubtensor
-        admat = dmatrix()
-        bdmat = dmatrix()
-        advec = dvector()
-        adscal = dscalar()
-        admat_val = rand(5, 4)
-        self._compile_and_check([admat, bdmat],
-                            [inc_subtensor(admat[2:4], bdmat)],
-                            [admat_val, [[1, 2, 3, 4]]], IncSubtensor)
-
-        self._compile_and_check([admat, advec],
-                            [inc_subtensor(admat[2], advec)],
-                            [admat_val, [1, 2, 3, 4]], IncSubtensor)
-
-        self._compile_and_check([admat, adscal],
-                            [inc_subtensor(admat[2, 3], adscal)],
-                            [admat_val, 1], IncSubtensor)
-
-        self._compile_and_check([admat, adscal],
-                            [inc_subtensor(admat[1:3, 2], adscal)],
-                            [admat_val, 1], IncSubtensor)
-
-        self._compile_and_check([admat, bdmat],
-                            [set_subtensor(admat[2:4], bdmat)],
-                            [admat_val, [[1, 2, 3, 4]]], IncSubtensor)
-
-        self._compile_and_check([admat, advec],
-                            [set_subtensor(admat[2], advec)],
-                            [admat_val, [1, 2, 3, 4]], IncSubtensor)
-
-        self._compile_and_check([admat, adscal],
-                            [set_subtensor(admat[2, 3], adscal)],
-                            [admat_val, 1], IncSubtensor)
-
-        self._compile_and_check([admat, adscal],
-                            [set_subtensor(admat[1:3, 2], adscal)],
-                            [admat_val, 1], IncSubtensor)
-
-        bdtens4 = dtensor4()
-        adtens4_val = rand(3, 4, 2, 5)
-        self._compile_and_check([adtens4, bdtens4],
-                            [inc_subtensor(adtens4[::, 2:4, ::, ::], bdtens4)],
-                            [adtens4_val, [[[[1, 2, 3, 4, 5]]]]], IncSubtensor,
-                            warn=False)
-        self._compile_and_check([adtens4, bdmat],
-                            [inc_subtensor(adtens4[2, 2:4, 1, ::], bdmat)],
-                            [adtens4_val, [[1, 2, 3, 4, 5]]], IncSubtensor)
-
-        self._compile_and_check([adtens4, advec],
-                            [inc_subtensor(adtens4[0, 1, ::, 4], advec)],
-                            [adtens4_val, [1, 2]], IncSubtensor)
-
-        self._compile_and_check([adtens4, adscal],
-                            [inc_subtensor(adtens4[1:3, 1, ::, 2:4], adscal)],
-                            [adtens4_val, 1], IncSubtensor)
-
-        self._compile_and_check([adtens4, bdtens4],
-                            [set_subtensor(adtens4[::, 2:4, ::, ::], bdtens4)],
-                            [adtens4_val, [[[[1, 2, 3, 4, 5]]]]], IncSubtensor,
-                            warn=False)
-
-        self._compile_and_check([adtens4, bdmat],
-                            [set_subtensor(adtens4[2, 2:4, 1, ::], bdmat)],
-                            [adtens4_val, [[1, 2, 3, 4, 5]]], IncSubtensor)
-
-        self._compile_and_check([adtens4, advec],
-                            [set_subtensor(adtens4[0, 1, ::, 4], advec)],
-                            [adtens4_val, [1, 2]], IncSubtensor)
-
-        self._compile_and_check([adtens4, adscal],
-                            [set_subtensor(adtens4[1:3, 1, ::, 2:4], adscal)],
-                            [adtens4_val, 1], IncSubtensor)
-
-        # AdvancedIncSubtensor1
-        admat = dmatrix()
-        bdmat = dmatrix()
-        advec = dvector()
-        adscal = dscalar()
-        admat_val = rand(5, 4)
-        aivec_val = [2, 3]
-        self._compile_and_check([admat, bdmat],
-                            [set_subtensor(admat[aivec_val], bdmat)],
-                            [admat_val, [[1, 2, 3, 4]]], AdvancedIncSubtensor1)
-
-        aivec_val = [1, 3, 2]
-        self._compile_and_check([admat, advec],
-                            [set_subtensor(admat[aivec_val], advec)],
-                            [admat_val, [1, 2, 3, 4]], AdvancedIncSubtensor1)
-
-        aivec_val = [0, 3, 0]
-        self._compile_and_check([admat, adscal],
-                            [set_subtensor(admat[aivec_val], adscal)],
-                            [admat_val, 1], AdvancedIncSubtensor1)
-
-        bdtens4 = dtensor4()
-        adtens4_val = rand(4, 3, 2, 5)
-        aivec_val = [2, 3]
-        self._compile_and_check([adtens4, bdtens4],
-                            [set_subtensor(adtens4[aivec_val], bdtens4)],
-                            [adtens4_val, [[[[1, 2, 3, 4, 5]]]]],
-                            AdvancedIncSubtensor1,
-                            warn=False)
-
-        aivec_val = [1, 3, 2]
-        self._compile_and_check([adtens4, advec],
-                            [set_subtensor(adtens4[aivec_val], advec)],
-                            [adtens4_val, [1, 2, 3, 4, 5]],
-                            AdvancedIncSubtensor1)
-
-        aivec_val = [0, 3, 0]
-        self._compile_and_check([adtens4, adscal],
-                            [set_subtensor(adtens4[aivec_val], adscal)],
-                            [adtens4_val, 1],
-                            AdvancedIncSubtensor1)
-
-        aivec_val = [2, 3]
-        self._compile_and_check([admat, bdmat],
-                                [inc_subtensor(admat[aivec_val], bdmat)],
-                                [admat_val, [[1, 2, 3, 4], [5, 6, 7, 8]]],
-                                AdvancedIncSubtensor1)
-
-        aivec_val = [1, 3, 2]
-        self._compile_and_check([admat, advec],
-                            [inc_subtensor(admat[aivec_val], advec)],
-                            [admat_val, [1, 2, 3, 4]], AdvancedIncSubtensor1)
-
-        aivec_val = [0, 3, 0]
-        self._compile_and_check([admat, adscal],
-                            [inc_subtensor(admat[aivec_val], adscal)],
-                            [admat_val, 1], AdvancedIncSubtensor1)
-
-        bdtens4 = dtensor4()
-        adtens4_val = rand(4, 3, 2, 5)
-        aivec_val = [2, 3]
-        self._compile_and_check([adtens4, bdtens4],
-                            [inc_subtensor(adtens4[aivec_val], bdtens4)],
-                            [adtens4_val, [[[[1, 2, 3, 4, 5]]],
-                                           [[[6, 7, 8, 9, 10]]]]],
-                            AdvancedIncSubtensor1,
-                            warn=False)
-
-        aivec_val = [1, 2, 1]
-        self._compile_and_check([adtens4, advec],
-                            [inc_subtensor(adtens4[aivec_val], advec)],
-                            [adtens4_val, [1, 2, 3, 4, 5]],
-                            AdvancedIncSubtensor1)
-
-        aivec_val = [0, 3, 0]
-        self._compile_and_check([adtens4, adscal],
-                            [inc_subtensor(adtens4[aivec_val], adscal)],
-                            [adtens4_val, 2],
-                            AdvancedIncSubtensor1)
-
-        # AdvancedIncSubtensor
-        aivec_val = [1, 3, 2]
-        bivec_val = [0, 3, 3]
-        advec_val = [23, 24, 25]
-        self._compile_and_check([admat, advec],
-                    [set_subtensor(admat[aivec_val, bivec_val], advec)],
-                    [admat_val, advec_val], AdvancedIncSubtensor)

        # Reshape
        # TODO: generalize infer_shape to account for tensor variable

--- a/theano/tensor/tests/test_opt.py
+++ b/theano/tensor/tests/test_opt.py
@@ -2034,7 +2034,7 @@ class test_local_subtensor_merge(unittest.TestCase):
        val = fun(data)
        assert numpy.all(val == data[3:6, 2:6, 1:7][1])
        assert len([n for n in fun.maker.fgraph.toposort()
-                    if isinstance(n.op, tensor.basic.Subtensor)]) == nops
+                    if isinstance(n.op, Subtensor)]) == nops

        # test 2)
        y = x[2, 3][1]
@@ -2042,7 +2042,7 @@ class test_local_subtensor_merge(unittest.TestCase):
        val = fun(data)
        assert numpy.all(val == data[2, 3][1])
        assert len([n for n in fun.maker.fgraph.toposort()
-                    if isinstance(n.op, tensor.basic.Subtensor)]) == nops
+                    if isinstance(n.op, Subtensor)]) == nops

        # test 3)
        y = x[3:6, 2, 1:7][1]
@@ -2050,7 +2050,7 @@ class test_local_subtensor_merge(unittest.TestCase):
        val = fun(data)
        assert numpy.all(val == data[3:6, 2, 1:7][1])
        assert len([n for n in fun.maker.fgraph.toposort()
-                    if isinstance(n.op, tensor.basic.Subtensor)]) == nops
+                    if isinstance(n.op, Subtensor)]) == nops

    def test_scalar6(self):
        # General case with one slice and one index

--- a/theano/tensor/tests/test_subtensor.py
+++ b/theano/tensor/tests/test_subtensor.py
+from itertools import izip
+import logging
+import sys
+import unittest
+
+from nose.plugins.skip import SkipTest
+import numpy
+
+import theano
+from theano.compat import exc_message
+from theano.compat.six import StringIO
+from theano.compile import DeepCopyOp
+from theano import config
+from theano import gof
+import theano.scalar as scal
+import theano.tensor as tensor
+from theano.tests import unittest_tools as utt
+from theano.tensor.subtensor import (inc_subtensor, set_subtensor,
+                                     Subtensor, IncSubtensor,
+                                     AdvancedSubtensor1, AdvancedSubtensor,
+                                     advanced_subtensor1, inplace_increment,
+                                     AdvancedIncSubtensor1,
+                                     AdvancedIncSubtensor,
+                                     get_canonical_form_slice)
+from theano.tensor import (as_tensor_variable, _shared,
+                           NotScalarConstantError,
+                           fscalar, iscalar, dscalar, cscalar,
+                           vector, dvector, fvector, lvector,
+                           fmatrix, dmatrix, lmatrix, matrix,
+                           ctensor3, dtensor4)
+from theano.tensor.tests.test_basic import rand, randint_ranged, inplace_func
+
+
+class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
+    """
+    This is build in a way that allow to reuse it to test the
+    equivalent gpu op.
+    """
+    def __init__(self, name, shared=tensor._shared,
+                 sub=tensor.Subtensor,
+                 inc_sub=tensor.IncSubtensor,
+                 adv_sub1=tensor.AdvancedSubtensor1,
+                 adv_incsub1=tensor.AdvancedIncSubtensor1,
+                 mode=None,
+                 dtype=theano.config.floatX,
+                 ignore_topo=DeepCopyOp):
+        self.shared = shared
+        self.sub = sub
+        self.inc_sub = inc_sub
+        self.adv_sub1 = adv_sub1
+        self.adv_incsub1 = adv_incsub1
+        if mode is None:
+            mode = theano.compile.mode.get_default_mode()
+        self.mode = mode
+        self.dtype = dtype
+        self.ignore_topo = ignore_topo
+        self.fast_compile = theano.config.mode == 'FAST_COMPILE'
+        self.ops = (sub, inc_sub, adv_sub1, adv_incsub1)
+        return super(T_subtensor, self).__init__(name)
+
+    def function(self, inputs, outputs, accept_inplace=False,
+                 op=None, mode=None, N=1, N_fast=None):
+        """ wrapper around theano.function that also check the output
+
+        :param N: the number of op expected in the toposort
+                  if tuple of length 2, (expected if fast_compile,
+                                         if not fast_compile)
+        """
+        if self.fast_compile and N_fast is not None:
+            N = N_fast
+        if mode is None:
+            mode = self.mode
+        if op is None:
+            op = self.sub
+
+        f = theano.function(inputs, outputs, mode=mode,
+                            accept_inplace=accept_inplace)
+        self.assertFunctionContainsClassN(f, op, N)
+        return f
+
+    def setUp(self):
+        Subtensor.debug = False
+        utt.seed_rng()
+
+    def eval_output_and_check(self, t, list=False):
+        f = inplace_func([], t, mode=self.mode)
+        topo = f.maker.fgraph.toposort()
+        topo_ = [node for node in topo if not isinstance(node.op,
+             self.ignore_topo)]
+        assert len(topo_) == 1
+        if not list:
+            assert isinstance(topo_[0].op, self.sub)
+        else:
+            assert isinstance(topo_[0].op, self.adv_sub1)
+        tval = f()
+        return tval
+
+    def test0_err_invalid(self):
+        #it is impossible to retrieve a view of a 0-d tensor
+        n = self.shared(numpy.ones((), dtype=self.dtype))
+        try:
+            t = n[0]
+        except ValueError, e:
+            self.assertTrue(hasattr(e, 'subtensor_invalid'))
+            return
+        self.fail()
+
+    def test1_err_bounds(self):
+        n = self.shared(numpy.ones(3, dtype=self.dtype))
+        ctv_backup = config.compute_test_value
+        config.compute_test_value = 'off'
+        try:
+            t = n[7]
+        finally:
+            config.compute_test_value = ctv_backup
+        self.assertTrue(isinstance(t.owner.op, Subtensor))
+        # Silence expected error messages
+        _logger = logging.getLogger('theano.gof.opt')
+        oldlevel = _logger.level
+        _logger.setLevel(logging.CRITICAL)
+        try:
+            try:
+                self.eval_output_and_check(t)
+                assert 0
+            except Exception, e:
+                if 'out of bounds' not in exc_message(e):
+                    raise
+        finally:
+            _logger.setLevel(oldlevel)
+
+    def test1_err_subslice(self):
+        n = self.shared(numpy.ones(3, dtype=self.dtype))
+        try:
+            t = n[slice(0, slice(1, 2, None), None)]
+        except Exception, e:
+            ### Relax constraint on the type of Exception,
+            ### since this might be handled by AvancedSubtensor
+            #if e[0] != Subtensor.e_indextype:
+            #    raise
+            return
+        self.fail()
+
+    def test1_ok_range_finite(self):
+        n = self.shared(numpy.arange(3, dtype=self.dtype))
+        t = n[0:2]
+        self.assertTrue(isinstance(t.owner.op, Subtensor))
+        tval = self.eval_output_and_check(t)
+        self.assertTrue(tval.shape == (2,))
+        self.assertTrue((tval == [0, 1]).all())
+
+    def test2_ok_range_finite(self):
+        n = self.shared(numpy.arange(12, dtype=self.dtype).reshape((3, 4)))
+        # Also check negative index
+        for idx in [(slice(0, 2), 3), ((slice(0, 2), -1)), (slice(0, 2), -4)]:
+            t = n[idx]  # l]#0:2,3]
+            self.assertTrue(isinstance(t.owner.op, Subtensor))
+            tval = self.eval_output_and_check(t)
+            self.assertTrue(tval.shape == (2,))
+            self.assertTrue(numpy.allclose(tval, n.get_value()[idx]))
+
+    def test1_0_dims(self):
+        n = self.shared(numpy.ones((), dtype=self.dtype))
+        t = theano.tensor.Subtensor([])(n)
+        self.assertTrue(isinstance(t.owner.op, Subtensor))
+        mode = self.mode
+        self.mode = mode.excluding("local_useless_subtensor")
+        try:
+            self.eval_output_and_check(t)
+        finally:
+            self.mode = mode
+
+    def test1_err_invalid(self):
+        n = self.shared(numpy.ones(1, dtype=self.dtype))
+        try:
+            t = n[0, 0]
+        except ValueError, e:
+            self.assertTrue(hasattr(e, 'subtensor_invalid'))
+            return
+        self.fail()
+
+    def test1_ok_elem(self):
+        n = self.shared(numpy.ones(1, dtype=self.dtype) * 5)
+        t = n[0]
+        self.assertTrue(isinstance(t.owner.op, Subtensor))
+        tval = self.eval_output_and_check(t)
+        self.assertTrue(tval.shape == ())
+        self.assertTrue(tval == 5.0)
+
+    def test1_ok_range_infinite(self):
+        #Subtensor.debug = True
+        n = self.shared(numpy.arange(3, dtype=self.dtype))
+        t = n[1:]
+        self.assertTrue(isinstance(t.owner.op, Subtensor))
+        tval = self.eval_output_and_check(t)
+        self.assertTrue(tval.shape == (2,))
+        self.assertTrue((tval == [1.0, 2.0]).all())
+
+    def test1_ok_strided(self):
+        n = self.shared(numpy.arange(5, dtype=self.dtype))
+        t = n[1::2]
+        self.assertTrue(isinstance(t.owner.op, Subtensor))
+        tval = self.eval_output_and_check(t)
+        self.assertTrue(tval.shape == (2,))
+        self.assertTrue((tval == [1.0, 3.0]).all())
+
+        t = n[0:-1:2]  # 0 to 1 from the end stepping by 2
+        tval = self.eval_output_and_check(t)
+        self.assertTrue(tval.shape == (2,))
+        self.assertTrue((tval == [0.0, 2.0]).all())
+
+    def test2_err_bounds0(self):
+        n = self.shared(numpy.ones((2, 3), dtype=self.dtype) * 5)
+        ctv_backup = config.compute_test_value
+        config.compute_test_value = 'off'
+        try:
+            for idx in [(0, 4), (0, -4)]:
+                t = n[idx]
+                self.assertTrue(isinstance(t.owner.op, Subtensor))
+                # Silence expected warnings
+                _logger = logging.getLogger('theano.gof.opt')
+                oldlevel = _logger.level
+                _logger.setLevel(logging.CRITICAL)
+                try:
+                    self.assertRaises(IndexError,
+                                      self.eval_output_and_check, [t])
+                finally:
+                    _logger.setLevel(oldlevel)
+        finally:
+            config.compute_test_value = ctv_backup
+
+    def test2_err_bounds1(self):
+        n = self.shared((numpy.ones((2, 3), dtype=self.dtype) * 5))
+        t = n[4:5, 3]
+        self.assertTrue(isinstance(t.owner.op, Subtensor))
+        old_stderr = sys.stderr
+        sys.stderr = StringIO()
+        try:
+            self.assertRaises(IndexError,
+                              self.eval_output_and_check, [t])
+        finally:
+            sys.stderr = old_stderr
+
+    def test2_ok_elem(self):
+        n = self.shared(numpy.arange(6, dtype=self.dtype).reshape((2, 3)))
+        t = n[0, 2]
+        self.assertTrue(isinstance(t.owner.op, Subtensor))
+        tval = self.eval_output_and_check(t)
+        self.assertTrue(tval.shape == ())
+        self.assertTrue(numpy.all(tval == 2))
+
+    def test2_ok_row(self):
+        n = self.shared(numpy.arange(6, dtype=self.dtype).reshape((2, 3)))
+        t = n[1]
+        self.assertFalse(any(n.type.broadcastable))
+        self.assertTrue(isinstance(t.owner.op, Subtensor))
+        tval = self.eval_output_and_check(t)
+        self.assertTrue(tval.shape == (3,))
+        self.assertTrue(numpy.all(tval == [3, 4, 5]))
+
+    def test2_ok_col(self):
+        n = self.shared(numpy.arange(6, dtype=self.dtype).reshape((2, 3)))
+        t = n[:, 0]
+        self.assertTrue(isinstance(t.owner.op, Subtensor))
+        self.assertFalse(any(n.type.broadcastable))
+        tval = self.eval_output_and_check(t)
+        self.assertTrue(tval.shape == (2,))
+        self.assertTrue(numpy.all(tval == [0, 3]))
+
+    def test2_ok_rows_finite(self):
+        n = self.shared(numpy.arange(12, dtype=self.dtype).reshape((4, 3)))
+        t = n[1:3, 0]
+        self.assertTrue(isinstance(t.owner.op, Subtensor))
+        tval = self.eval_output_and_check(t)
+        self.assertTrue(tval.shape == (2,))
+        self.assertTrue(numpy.all(tval == [3, 6]))
+
+    def test2_ok_cols_infinite(self):
+        n = self.shared(numpy.arange(12, dtype=self.dtype).reshape((4, 3)))
+        t = n[1, 2:]
+        self.assertTrue(isinstance(t.owner.op, Subtensor))
+        tval = self.eval_output_and_check(t)
+        self.assertTrue(tval.shape == (1,))
+        self.assertTrue(numpy.all(tval == 5))
+
+    def test2_ok_strided(self):
+        n = self.shared(numpy.arange(20, dtype=self.dtype).reshape((4, 5)))
+        t = n[1:4:2, 1:5:2]
+        self.assertTrue(isinstance(t.owner.op, Subtensor))
+        tval = self.eval_output_and_check(t)
+        self.assertTrue(tval.shape == (2, 2))
+        self.assertTrue(numpy.all(tval == [[6, 8], [16, 18]]))
+
+    def test3_ok_mat(self):
+        n = self.shared(numpy.arange(24, dtype=self.dtype).reshape((2, 3, 4)))
+        t = n[0, 0, 0]
+        self.assertTrue(isinstance(t.owner.op, Subtensor))
+        tval = self.eval_output_and_check(t)
+        self.assertTrue(tval.shape == ())
+        self.assertTrue(numpy.all(tval == 0))
+
+    def test_long(self):
+        n = self.shared(numpy.arange(12, dtype=self.dtype).reshape((4, 3)))
+        t = n[1L:4L:2L, 1L]
+        self.assertTrue(isinstance(t.owner.op, Subtensor))
+        tval = self.eval_output_and_check(t)
+        self.assertTrue(tval.shape == (2,))
+        self.assertTrue(numpy.all(tval == [4, 10]))
+
+    def test_long_too_big(self):
+        # Currently, we cast Python longs to int64 when used for indexing.
+        # This test checks that using a long that does not fit raises an error.
+        n = self.shared(numpy.arange(12, dtype=self.dtype).reshape((4, 3)))
+        self.assertRaises(Exception, lambda: n[:(2L ** 63)])
+
+    def test_newaxis(self):
+        """
+        newaxis support comes from logic in the __getitem__ of TensorType
+        Variables, which currently inserts dimshuffle to get the right number
+        of dimensions, and adjusts the slice tuple accordingly.
+
+        So testing is done via square-bracket notation rather than direct
+        interaction with the Subtensor Op (which has no support of its own for
+        newaxis).
+        """
+        newaxis = numpy.newaxis
+
+        n = self.shared(numpy.arange(24, dtype=self.dtype).reshape((2, 3, 4)))
+        assert n.ndim == 3
+
+        n4 = n[newaxis, :, :, :]
+        assert n4.broadcastable == (True, False, False, False), n4
+
+        n4 = n[:, newaxis, :, :]
+        assert n4.broadcastable == (False, True, False, False), n4
+
+        n4 = n[:, :, newaxis, :]
+        assert n4.broadcastable == (False, False, True, False), n4
+
+        n4 = n[:, :, :, newaxis]
+        assert n4.broadcastable == (False, False, False, True), n4
+
+        n3 = n.flatten()[newaxis, :, newaxis]
+        assert n3.broadcastable == (True, False, True), n3
+
+        s = cscalar()
+        s1 = s[newaxis]
+        assert s1.broadcastable == (True,), s1
+
+        vs1, vn3, vn4 = theano.function([s], [s1, n3, n4])(-2.0)
+
+        assert numpy.all(vs1 == [-2.0])
+        assert numpy.all(vn3
+                == numpy.arange(24)[newaxis, :, newaxis])
+        assert numpy.all(vn4
+                == numpy.arange(24).reshape((2, 3, 4))[:, :, :, newaxis])
+
+    def test_grad_1d(self):
+        subi = 0
+        data = numpy.asarray(rand(2, 3), dtype=self.dtype)
+        n = self.shared(data)
+        z = scal.constant(subi)
+        t = n[z:, z]
+        gn = theano.tensor.grad(theano.tensor.sum(theano.tensor.exp(t)), n)
+
+        f = inplace_func([], gn, mode=self.mode)
+        topo = f.maker.fgraph.toposort()
+        topo_ = [node for node in topo if not isinstance(node.op,
+             self.ignore_topo)]
+        if not self.fast_compile:
+            assert len(topo_) == 6
+        assert numpy.sum([isinstance(node.op, self.inc_sub)
+             for node in topo_]) == 1
+        assert numpy.sum([isinstance(node.op, self.sub)
+             for node in topo_]) == 1
+        gval = f()
+
+        good = numpy.zeros_like(data)
+        good[subi:, subi] = numpy.exp(data[subi:, subi])
+        self.assertTrue(numpy.allclose(gval, good), (gval, good))
+
+    def test_grad_0d(self):
+        data = numpy.asarray(rand(2, 3), dtype=self.dtype)
+        n = self.shared(data)
+        t = n[1, 0]
+        gn = theano.tensor.grad(theano.tensor.sum(theano.tensor.exp(t)), n)
+        f = self.function([], gn)
+        topo = f.maker.fgraph.toposort()
+        topo_ = [node for node in topo if not isinstance(node.op,
+             self.ignore_topo)]
+        if not self.fast_compile:
+            assert len(topo_) == 6
+        assert numpy.sum([isinstance(node.op, self.inc_sub)
+             for node in topo_]) == 1
+        assert numpy.sum([isinstance(node.op, self.sub)
+             for node in topo_]) == 1
+
+        gval = f()
+        good = numpy.zeros_like(data)
+        good[1, 0] = numpy.exp(data[1, 0])
+        self.assertTrue(numpy.allclose(gval, good), (gval, good))
+
+    def test_ok_list(self):
+        for data, idx in [(rand(4), [1, 0]),
+                          (rand(4, 5), [2, 3]),
+                          (rand(4, 2, 3), [0, 3]),
+                          (rand(4, 2, 3), [3, 3, 1, 1, 2, 2, 0, 0]),
+                          (rand(4, 2, 3), [3, 3, 1, 1, 2, 2, 0, 0,
+                                           -1, -2, -3, -4]),
+                          # Test 4 dims as gpu code use another algo
+                          # in that case This new algo is not as much
+                          # optimized for that case.
+                          (rand(4, 4, 2, 3), [3,
+                               3, 1, 1, 2, 2, 0, 0, -1, -2, -3, -4]),
+                          # Test with TensorConstant index.
+                          (rand(4, 2, 3),
+                           theano.tensor.constant([3, 3, 1, 1, 2, 2, 0, 0])),
+                          ]:
+            data = numpy.asarray(data, dtype=self.dtype)
+            n = self.shared(data)
+            t = n[idx]
+
+            # We test again AdvancedSubtensor1 as we transfer data to the cpu.
+            self.assertTrue(isinstance(t.owner.op, tensor.AdvancedSubtensor1))
+
+            val = self.eval_output_and_check(t, list=True)
+            if isinstance(idx, list):
+                good = data[idx]
+            else:
+                good = data[idx.data]
+            self.assertTrue(val.ndim == data.ndim)
+            self.assertTrue(numpy.allclose(val, good), (val, good))
+
+            # Test reuse of output memory
+            if isinstance(self.adv_sub1, tensor.AdvancedSubtensor1):
+                op = self.adv_sub1()
+                # When idx is a TensorConstant.
+                if hasattr(idx, "data"):
+                    idx = idx.data
+                test_out = [[None]]
+                op.perform(None, [data, idx], test_out)
+                out1 = test_out[0][0]
+                op.perform(None, [data, idx], test_out)
+                out2 = test_out[0][0]
+                assert out1 is out2
+
+    def test_err_invalid_list(self):
+        n = self.shared(numpy.asarray(5, dtype=self.dtype))
+        self.assertRaises(TypeError, n.__getitem__, [0, 0])
+
+    def test_err_invalid_2list_dtype(self):
+        n = self.shared(numpy.ones((3, 3), dtype=self.dtype) * 5)
+        self.assertRaises(TypeError, n.__getitem__, ([0., 0], [1, 1]))
+
+    def test_err_bound_list(self):
+        n = self.shared(numpy.ones((2, 3), dtype=self.dtype) * 5)
+        l = lvector()
+        t = n[l]
+        # We test again AdvancedSubtensor1 as we transfer data to the cpu.
+        self.assertTrue(isinstance(t.owner.op, tensor.AdvancedSubtensor1))
+
+        f = self.function([l], t, op=self.adv_sub1)
+        topo = f.maker.fgraph.toposort()
+        topo_ = [node for node in topo if not isinstance(node.op,
+             self.ignore_topo)]
+        assert len(topo_) == 1
+        self.assertTrue(isinstance(topo_[0].op, self.adv_sub1))
+        for shp in [[0, 4], [0, -3], [-10]]:
+            self.assertRaises(IndexError, f, shp)
+
+    def test_adv_sub1_broadcast(self):
+        ones = numpy.ones((1, 3), dtype=self.dtype)
+        n = self.shared(ones * 5, broadcastable=(True, False))
+        idx = tensor.lvector()
+        t = n[idx]
+        self.assertTrue(isinstance(t.owner.op, tensor.AdvancedSubtensor1))
+
+        f = self.function([idx], t, op=self.adv_sub1)
+        topo = f.maker.fgraph.toposort()
+        topo_ = [node for node in topo if not isinstance(node.op,
+             self.ignore_topo)]
+        assert len(topo_) == 1
+        self.assertTrue(isinstance(topo_[0].op, self.adv_sub1))
+        self.assertTrue(numpy.allclose(f([0]), ones[0] * 5))
+        self.assertRaises(IndexError, f, [0, 1])
+
+    def test_adv_sub1_idx_broadcast(self):
+        # The idx can be a broadcastable vector.
+        ones = numpy.ones((4, 3), dtype=self.dtype)
+        n = self.shared(ones * 5)
+        idx = tensor.TensorType(dtype='int64', broadcastable=(True,))()
+        assert idx.type.broadcastable == (True,)
+        t = n[idx]
+        self.assertTrue(isinstance(t.owner.op, tensor.AdvancedSubtensor1))
+
+        f = self.function([idx], t, op=self.adv_sub1)
+        topo = f.maker.fgraph.toposort()
+        topo_ = [node for node in topo if not isinstance(node.op,
+             self.ignore_topo)]
+        assert len(topo_) == 1
+        self.assertTrue(isinstance(topo_[0].op, self.adv_sub1))
+        self.assertTrue(numpy.allclose(f([0]), ones[0] * 5))
+
+    def test_shape_i_const(self):
+        # Each axis is treated independently by shape_i/shape operators
+
+        mode_opt = self.mode.including("fast_run")
+        data = self.shared(numpy.array(numpy.arange(5), dtype=self.dtype))
+        for start in [None] + [-8, -5, -1, 0, 1, 5, 8]:
+            outs = []
+            shapes = []
+            for stop in [None] + [-8, -5, -1, 0, 1, 5, 8]:
+                for step in [None] + [-3, -1, 2]:
+                    outs += [data[start:stop:step].shape]
+                    shapes += [data.get_value(
+                        borrow=True)[start:stop:step].shape]
+            f = self.function([], outs, mode=mode_opt,
+                              op=self.ops, N=0)
+            t_shapes = f()
+            for t_shape, shape in zip(t_shapes, shapes):
+                assert numpy.all(t_shape == shape)
+            assert tensor.Subtensor not in [x.op for x in
+                                           f.maker.fgraph.toposort()]
+
+    def test_shape_i_scalar(self):
+        # Each axis is treated independently by shape_i/shape operators
+
+        mode_opt = self.mode.including("fast_run")
+
+        v_data = numpy.array(numpy.arange(5), dtype=self.dtype)
+        t_data = self.shared(v_data)
+        start = tensor.iscalar('b')
+        stop = tensor.iscalar('e')
+        step = tensor.iscalar('s')
+        f = self.function([start, stop, step],
+                          t_data[start:stop:step].shape,
+                          mode=mode_opt,
+                          op=self.ops,
+                          N=0)
+        assert tensor.Subtensor not in [x.op for x in f.maker.
+            fgraph.toposort()]
+        for start in [-8, -5, -4, -1, 0, 1, 4, 5, 8]:
+            for stop in [-8, -5, -4, -1, 0, 1, 4, 5, 8]:
+                for step in [-3, -1, 2, 5]:
+                    assert numpy.all(f(start, stop, step) ==
+                                     v_data[start:stop:step].shape)
+
+    def test_slice_canonical_form_0(self):
+        start = tensor.iscalar('b')
+        stop = tensor.iscalar('e')
+        step = tensor.iscalar('s')
+        length = tensor.iscalar('l')
+        cnf = get_canonical_form_slice(slice(start, stop, step), length)
+        f = self.function([start, stop, step, length], [
+            tensor.as_tensor_variable(cnf[0].start),
+            tensor.as_tensor_variable(cnf[0].stop),
+            tensor.as_tensor_variable(cnf[0].step),
+            tensor.as_tensor_variable(cnf[1])], N=0, op=self.ops)
+
+        length = 5
+        a = numpy.arange(length)
+        for start in [-8, -5, -4, -1, 0, 1, 4, 5, 8]:
+            for stop in  [-8, -5, -4, -1, 0, 1, 4, 5, 8]:
+                for step in [-6, -3, -1, 2, 5]:
+                    out = f(start, stop, step, length)
+                    t_out = a[out[0]:out[1]:out[2]][::out[3]]
+                    v_out = a[start:stop:step]
+                    assert numpy.all(t_out == v_out)
+                    assert numpy.all(t_out.shape == v_out.shape)
+
+    def test_slice_canonical_form_1(self):
+        stop = tensor.iscalar('e')
+        step = tensor.iscalar('s')
+        length = tensor.iscalar('l')
+        cnf = get_canonical_form_slice(slice(None, stop, step), length)
+        f = self.function([stop, step, length], [
+            tensor.as_tensor_variable(cnf[0].start),
+            tensor.as_tensor_variable(cnf[0].stop),
+            tensor.as_tensor_variable(cnf[0].step),
+            tensor.as_tensor_variable(cnf[1])], N=0, op=self.ops)
+
+        length = 5
+        a = numpy.arange(length)
+        for stop in  [-8, -5, -4, -1, 0, 1, 4, 5, 8]:
+            for step in [-6, -3, -1, 2, 5]:
+                out = f(stop, step, length)
+                t_out = a[out[0]:out[1]:out[2]][::out[3]]
+                v_out = a[:stop:step]
+                assert numpy.all(t_out == v_out)
+                assert numpy.all(t_out.shape == v_out.shape)
+
+    def test_slice_canonical_form_2(self):
+        start = tensor.iscalar('b')
+        step = tensor.iscalar('s')
+        length = tensor.iscalar('l')
+        cnf = get_canonical_form_slice(slice(start, None, step), length)
+        f = self.function([start, step, length], [
+            tensor.as_tensor_variable(cnf[0].start),
+            tensor.as_tensor_variable(cnf[0].stop),
+            tensor.as_tensor_variable(cnf[0].step),
+            tensor.as_tensor_variable(cnf[1])], N=0, op=self.ops)
+
+        length = 5
+        a = numpy.arange(length)
+        for start in [-8, -5, -4, -1, 0, 1, 4, 5, 8]:
+            for step in [-6, -3, -1, 2, 5]:
+                out = f(start, step, length)
+                t_out = a[out[0]:out[1]:out[2]][::out[3]]
+                v_out = a[start:None:step]
+                assert numpy.all(t_out == v_out)
+                assert numpy.all(t_out.shape == v_out.shape)
+
+    def test_slice_canonical_form_3(self):
+        start = tensor.iscalar('b')
+        stop = tensor.iscalar('e')
+        length = tensor.iscalar('l')
+        cnf = get_canonical_form_slice(slice(start, stop, None), length)
+        f = self.function([start, stop, length], [
+            tensor.as_tensor_variable(cnf[0].start),
+            tensor.as_tensor_variable(cnf[0].stop),
+            tensor.as_tensor_variable(cnf[0].step),
+            tensor.as_tensor_variable(cnf[1])], N=0, op=self.ops)
+
+        length = 5
+        a = numpy.arange(length)
+        for start in [-8, -5, -4, -1, 0, 1, 4, 5, 8]:
+            for stop in  [-8, -5, -4, -1, 0, 1, 4, 5, 8]:
+                out = f(start, stop, length)
+                t_out = a[out[0]:out[1]:out[2]][::out[3]]
+                v_out = a[start:stop:None]
+                assert numpy.all(t_out == v_out)
+                assert numpy.all(t_out.shape == v_out.shape)
+
+    def test_slice_canonical_form_4(self):
+        step = tensor.iscalar('s')
+        length = tensor.iscalar('l')
+        cnf = get_canonical_form_slice(slice(None, None, step), length)
+        f = self.function([step, length], [
+            tensor.as_tensor_variable(cnf[0].start),
+            tensor.as_tensor_variable(cnf[0].stop),
+            tensor.as_tensor_variable(cnf[0].step),
+            tensor.as_tensor_variable(cnf[1])], N=0, op=self.ops)
+
+        length = 5
+        a = numpy.arange(length)
+        for step in [-6, -3, -1, 2, 5]:
+            out = f(step, length)
+            t_out = a[out[0]:out[1]:out[2]][::out[3]]
+            v_out = a[None:None:step]
+            assert numpy.all(t_out == v_out)
+            assert numpy.all(t_out.shape == v_out.shape)
+
+    def test_slice_canonical_form_5(self):
+        start = tensor.iscalar('b')
+        length = tensor.iscalar('l')
+        cnf = get_canonical_form_slice(slice(start, None, None), length)
+        f = self.function([start, length], [
+            tensor.as_tensor_variable(cnf[0].start),
+            tensor.as_tensor_variable(cnf[0].stop),
+            tensor.as_tensor_variable(cnf[0].step),
+            tensor.as_tensor_variable(cnf[1])], N=0, op=self.ops)
+
+        length = 5
+        a = numpy.arange(length)
+        for start in [-8, -5, -4, -1, 0, 1, 4, 5, 8]:
+            out = f(start, length)
+            t_out = a[out[0]:out[1]:out[2]][::out[3]]
+            v_out = a[start:None:None]
+            assert numpy.all(t_out == v_out)
+            assert numpy.all(t_out.shape == v_out.shape)
+
+    def test_slice_canonical_form_6(self):
+        stop = tensor.iscalar('e')
+        length = tensor.iscalar('l')
+        cnf = get_canonical_form_slice(slice(None, stop, None), length)
+        f = self.function([stop, length], [
+            tensor.as_tensor_variable(cnf[0].start),
+            tensor.as_tensor_variable(cnf[0].stop),
+            tensor.as_tensor_variable(cnf[0].step),
+            tensor.as_tensor_variable(cnf[1])], N=0, op=self.ops)
+
+        length = 5
+        a = numpy.arange(length)
+        for stop in  [-8, -5, -4, -1, 0, 1, 4, 5, 8]:
+            out = f(stop, length)
+            t_out = a[out[0]:out[1]:out[2]][::out[3]]
+            v_out = a[None:stop:None]
+            assert numpy.all(t_out == v_out)
+            assert numpy.all(t_out.shape == v_out.shape)
+
+    def grad_list_(self, idxs, data):
+        n = self.shared(data)
+
+        for idx in idxs:
+            # Should stay on the cpu.
+            idx_ = _shared(numpy.asarray(idx))
+            t = n[idx_]
+            gn = theano.tensor.grad(theano.tensor.sum(theano.tensor.exp(t)), n)
+            f = self.function([], [gn, gn.shape], op=self.adv_incsub1)
+            topo = f.maker.fgraph.toposort()
+            if not self.fast_compile:
+                assert any([isinstance(node.op, self.
+                    adv_incsub1) and node.op.inplace for node in topo])
+            else:
+                assert any([isinstance(node.op, self.
+                    adv_incsub1) for node in topo])
+            assert any([isinstance(node.op, self.adv_sub1) for node in topo])
+            gval, gshape = f()
+            good = numpy.zeros_like(data)
+            # don't work when the same index is used many time
+            # good[idx] += numpy.exp(data[idx])
+            for i in idx:
+                good[i] += numpy.exp(data[i])
+            self.assertTrue(gval.ndim == data.ndim)
+            self.assertTrue(numpy.allclose(gval, good), (gval, good))
+            self.assertTrue(numpy.allclose(gshape, data.shape))
+
+            def fct(t):
+                return theano.tensor.sum(t[idx_])
+            utt.verify_grad(fct, [data])
+
+            # Test the grad of the grad (e.i. AdvancedIncSubtensor1.grad)
+            def fct2(t):
+                return theano.tensor.grad(theano.tensor.sum(t[idx_]), t)
+            utt.verify_grad(fct2, [data])
+
+            # Test shape of AdvancedIncSubtensor1 and AdvancedSubtensor1
+            if not self.fast_compile:
+                ops = (self.adv_incsub1, self.adv_sub1)
+            else:
+                ops = self.ops
+            if idx is idxs[0]:
+                f = self.function([], [gn.shape, n[idx_].shape],
+                                  op=ops,
+                                  N=0, N_fast=2)
+                f()
+
+    def test_wrong_exception_regression(self):
+        a = fscalar()
+        b = fscalar()
+        c = vector()
+        try:
+            c[a:b]
+        except NotImplementedError:
+            self.fail()
+        except TypeError:
+            pass
+        try:
+            c[a:]
+        except NotImplementedError:
+            self.fail()
+        except TypeError:
+            pass
+        try:
+            c[:b]
+        except NotImplementedError:
+            self.fail()
+        except TypeError:
+            pass
+
+    def test_grad_list(self):
+        data = rand(4)
+        data = numpy.asarray(data, dtype=self.dtype)
+        idxs = [[i] for i in range(data.shape[0])]
+        for i in range(data.shape[0]):
+            for j in range(0, data.shape[0], 2):
+                idxs.append([i, j, (i + 1) % data.shape[0]])
+        self.grad_list_(idxs, data)
+
+        data = rand(4, 3)
+        data = numpy.asarray(data, dtype=self.dtype)
+        self.grad_list_(idxs, data)
+
+        data = rand(4, 3, 2)
+        data = numpy.asarray(data, dtype=self.dtype)
+        self.grad_list_(idxs, data)
+
+    def test_shape_list(self):
+        #TODO for all type of subtensor shape
+        for data, idx in [(rand(4), [1, 0]),
+                          (rand(4, 2), [2, 3]),
+                          (rand(4, 2, 3), [0, 3]),
+                          (rand(4, 2, 3), [3, 3, 1, 2, 2, ]),
+                          ]:
+            data = numpy.asarray(data, dtype=self.dtype)
+            n = self.shared(data)
+            t = n[idx]
+            f = self.function([], t.shape, op=self.ops, N=0, N_fast=1)
+            val = f()
+            self.assertTrue(numpy.allclose(val, data[idx].shape))
+
+    def test_grad_advanced_inc_subtensor(self):
+        def inc_slice(*s):
+            def just_numeric_args(a, b):
+                cost = (a[s] + b).sum()
+                cost_wrt_a = theano.tensor.grad(cost, a)
+                cost_wrt_b = theano.tensor.grad(cost, b)
+                grads = cost_wrt_a.sum() + cost_wrt_b.sum()
+                return grads
+            return just_numeric_args
+
+        # vector
+        utt.verify_grad(
+            inc_slice(slice(2, 4, None)),
+            (numpy.asarray([0, 1, 2, 3, 4, 5.]), numpy.asarray([9, 9.]),))
+
+        # matrix
+        utt.verify_grad(
+            inc_slice(slice(1, 2, None), slice(None, None, None)),
+            (numpy.asarray([[0, 1], [2, 3], [4, 5.]]),
+             numpy.asarray([[9, 9.]]),))
+
+        #single element
+        utt.verify_grad(
+            inc_slice(2, 1),
+            (numpy.asarray([[0, 1], [2, 3], [4, 5.]]), numpy.asarray(9.),))
+
+    def test_advanced_inc_and_set(self):
+        """
+        Test advanced increment and set.
+        """
+        rng = numpy.random.RandomState(seed=utt.fetch_seed())
+        all_inputs_var = []
+        all_inputs_num = []
+        all_outputs_var = []
+        all_outputs_num = []
+        for set_instead_of_inc in (False, True):
+            for inplace in (False, True):
+                for data_shape in ((10,), (4, 5), (1, 2, 3), (4, 5, 6, 7)):
+                    data_n_dims = len(data_shape)
+                    data_size = numpy.product(data_shape)
+                    # Corresponding numeric variable.
+                    data_num_init = numpy.arange(data_size, dtype=self.dtype)
+                    data_num_init = data_num_init.reshape(data_shape)
+                    inc_shapes = [data_shape[i:]
+                                  for i in xrange(0, len(data_shape) + 1)]
+                    for inc_shape in inc_shapes:
+                        inc_n_dims = len(inc_shape)
+                        # We copy the numeric value to be 100% sure there is no
+                        # risk of accidentally sharing it.
+                        data_num = data_num_init.copy()
+                        # Symbolic variable to be incremented.
+                        # We create a new one every time in order not to
+                        # have duplicated variables in the function's inputs
+                        data_var = tensor.tensor(
+                                broadcastable=[False] * data_n_dims,
+                                dtype=self.dtype)
+                        # Symbolic variable with rows to be incremented.
+                        idx_var = theano.tensor.vector(dtype='int64')
+                        n_to_inc = rng.randint(data_shape[0])
+                        # Corresponding numeric variable.
+                        idx_num = rng.randint(0, data_shape[0], n_to_inc)
+                        idx_num = idx_num.astype('int64')
+                        # Symbolic variable with increment value.
+                        inc_var = tensor.tensor(
+                                broadcastable=[False] * inc_n_dims,
+                                dtype=self.dtype)
+                        # Trick for the case where `inc_shape` is the same as
+                        # `data_shape`: what we actually want is the first
+                        # shape element to be equal to the number of rows to
+                        # increment.
+                        if len(inc_shape) == len(data_shape):
+                            inc_shape = (n_to_inc,) + inc_shape[1:]
+                        inc_size = numpy.product(inc_shape)
+                        # Corresponding numeric variable.
+                        inc_num = rng.uniform(size=inc_size).astype(self.dtype)
+                        inc_num = inc_num.reshape(inc_shape)
+                        # Result of the incrementation.
+                        # (i) Theano
+                        if set_instead_of_inc:
+                            op = set_subtensor
+                        else:
+                            op = inc_subtensor
+                        output = op(data_var[idx_var], inc_var,
+                                    inplace=inplace)
+                        # (ii) Numpy (note that Numpy increments only once
+                        # duplicated indices, so we cannot directly use +=).
+                        data_copy = data_num.copy()
+                        for j, idx in enumerate(idx_num):
+                            if len(inc_shape) == len(data_shape):
+                                # Special case where there is no broadcasting.
+                                if set_instead_of_inc:
+                                    data_copy[idx] = inc_num[j]
+                                else:
+                                    data_copy[idx] += inc_num[j]
+                            else:
+                                if set_instead_of_inc:
+                                    data_copy[idx] = inc_num
+                                else:
+                                    data_copy[idx] += inc_num
+                        data_var = theano.In(data_var, mutable=True)
+
+                        # Remember data for the Theano function (see below).
+                        all_inputs_var += [data_var, idx_var, inc_var]
+                        all_inputs_num += [data_num, idx_num, inc_num]
+                        all_outputs_var.append(output)
+                        all_outputs_num.append(data_copy)
+                        if False:  # Enable for debugging purpose.
+                            f = self.function([data_var, idx_var, inc_var],
+                                              output, accept_inplace=inplace,
+                                              op=self.adv_incsub1)
+                            if inplace:
+                                # Ensure calling `f` will not alter `data_num`.
+                                data_num = data_num.copy()
+                            f_out = f(data_num.copy(), idx_num, inc_num)
+                            assert numpy.allclose(f_out, data_copy)
+                            if not inplace:
+                                # Sanity check: `data_num` should be intact.
+                                assert (data_num == data_num_init).all()
+
+        # Actual test (we compile a single Theano function to make it faster).
+        orig_warn = theano.config.warn.gpu_set_subtensor1
+        try:
+            theano.config.warn.gpu_set_subtensor1 = False
+            f = self.function(all_inputs_var, all_outputs_var,
+                              accept_inplace=True,
+                              op=self.adv_incsub1,
+                              N=len(all_outputs_var))
+        finally:
+            theano.config.warn.gpu_set_subtensor1 = orig_warn
+
+        f_outs = f(*all_inputs_num)
+        assert len(f_outs) == len(all_outputs_num)
+        for f_out, output_num in izip(f_outs, all_outputs_num):
+            # NB: if this assert fails, it will probably be easier to debug if
+            # you enable the debug code above.
+            assert numpy.allclose(f_out, output_num)
+
+    def test_adv_constant_arg(self):
+        # Test case provided (and bug detected, gh-607) by John Salvatier
+        m = matrix('m')
+        gv = numpy.array([0, 1, 3])
+        g = theano.tensor.constant(gv)
+        i = theano.tensor.lvector('i')
+
+        # s1 used to fail
+        s1 = m[gv, i]
+        s2 = m[g, i]
+
+        assert gof.graph.is_same_graph(s1, s2)
+
+    def test_adv1_inc_sub_notlastdim(self):
+        # Test that taking 1-dimensional advanced indexing
+        # over a dimension that's not the first (outer-most) works.
+        m = matrix('m')
+        i = lvector('i')
+
+        m1 = set_subtensor(m[:, i], 0)
+        m2 = inc_subtensor(m[:, i], 1)
+        f = theano.function([m, i], [m1, m2])
+
+        m_val = rand(3, 5)
+        i_val = randint_ranged(min=0, max=4, shape=(4,))
+        m1_ref = m_val.copy()
+        m2_ref = m_val.copy()
+
+        m1_val, m2_val = f(m_val, i_val)
+        for idx in i_val:
+            m1_ref[:, idx] = 0
+            m2_ref[:, idx] += 1
+
+        assert numpy.allclose(m1_val, m1_ref), (m1_val, m1_ref)
+        assert numpy.allclose(m2_val, m2_ref), (m2_val, m2_ref)
+
+    def test_adv1_inc_sub_notlastdim_2didx(self):
+        # Test that taking 1-dimensional advanced indexing
+        # over a dimension that's not the first (outer-most) works,
+        # if the index is a matrix.
+        m = matrix('m')
+        i = lmatrix('i')
+
+        m1 = set_subtensor(m[:, i], 0)
+        m2 = inc_subtensor(m[:, i], 1)
+        f = theano.function([m, i], [m1, m2])
+
+        m_val = rand(5, 7)
+        i_val = randint_ranged(min=0, max=6, shape=(4, 2))
+        m1_ref = m_val.copy()
+        m2_ref = m_val.copy()
+
+        m1_val, m2_val = f(m_val, i_val)
+        for idx in i_val.ravel():
+            m1_ref[:, idx] = 0
+            m2_ref[:, idx] += 1
+
+        assert numpy.allclose(m1_val, m1_ref), (m1_val, m1_ref)
+        assert numpy.allclose(m2_val, m2_ref), (m2_val, m2_ref)
+
+
+class TestIncSubtensor1(unittest.TestCase):
+    # test inc_subtensor
+    # also tests set_subtensor
+
+    def setUp(self):
+        self.s = tensor.iscalar()
+        self.v = tensor.fvector()
+        self.m = tensor.dmatrix()
+        self.t = tensor.ctensor3()
+
+        self.adv1q = tensor.lvector()  # advanced 1d query
+
+    def test_cant_adv_idx_into_scalar(self):
+        self.assertRaises(TypeError, lambda: self.s[self.adv1q])
+
+    def test_index_into_vec_w_vec(self):
+        a = self.v[self.adv1q]
+        assert a.type == self.v.type
+
+    def test_1d_set_adv_selection(self):
+        a = set_subtensor(self.v[self.adv1q], self.v[self.adv1q])
+
+        assert a.type == self.v.type
+
+        #TODO: compile a function and verify that the subtensor is removed
+        #      completely, because the whole expression is redundant.
+
+        f = theano.function([self.v, self.adv1q], a, allow_input_downcast=True)
+        aval = f([.4, .9, .1], [1, 2])
+        assert numpy.allclose(aval, [.4, 0.9, 0.1])
+
+    def test_1d_inc_adv_selection(self):
+        a = inc_subtensor(self.v[self.adv1q], self.v[self.adv1q])
+
+        assert a.type == self.v.type
+        f = theano.function([self.v, self.adv1q], a, allow_input_downcast=True)
+        aval = f([.4, .9, .1], [1, 2])
+        assert numpy.allclose(aval, [.4, 1.8, 0.2])
+
+    def test_1d_inc_adv_selection_w_broadcasting(self):
+        a = inc_subtensor(self.v[self.adv1q], 3.0)
+
+        assert a.type == self.v.type
+        f = theano.function([self.v, self.adv1q], a, allow_input_downcast=True)
+        aval = f([.4, .9, .1], [1, 2])
+        assert numpy.allclose(aval, [.4, 3.9, 3.1])
+
+    def test_assigning_matrix_to_vector_selection(self):
+        self.assertRaises(TypeError,
+                          lambda: inc_subtensor(self.v[self.adv1q], fmatrix()))
+
+
+inplace_increment_missing = SkipTest(
+    "inc_subtensor with advanced indexing not enabled. "
+    "Installing NumPy 1.8 or the latest development version "
+    "should make that feature available.")
+
+
+class TestAdvancedSubtensor(unittest.TestCase):
+    # test inc_subtensor
+    # also tests set_subtensor
+
+    def setUp(self):
+        self.s = iscalar()
+        self.v = fvector()
+        self.m = dmatrix()
+        self.t = ctensor3()
+
+        self.ix1 = lvector()  # advanced 1d query
+        self.ix12 = lvector()
+        self.ix2 = lmatrix()
+
+    def test_cant_adv_idx_into_scalar(self):
+        self.assertRaises(TypeError, lambda: self.s[self.ix1])
+
+    def test_index_into_vec_w_vec(self):
+        a = self.v[self.ix1]
+        assert a.type == self.v.type, (a.type, self.v.type)
+
+    def test_index_into_vec_w_matrix(self):
+        a = self.v[self.ix2]
+        assert a.dtype == self.v.dtype, (a.dtype, self.v.dtype)
+        assert a.broadcastable == self.ix2.broadcastable, (
+                a.broadcastable, self.ix2.broadcastable)
+
+    def test_inc_adv_subtensor_w_matrix(self):
+        if inplace_increment is None:
+            raise inplace_increment_missing
+
+        subt = self.v[self.ix2]
+        a = inc_subtensor(subt, subt)
+
+        assert a.type == self.v.type, (a.type, self.v.type)
+        f = theano.function([self.v, self.ix2], a, allow_input_downcast=True)
+        aval = f([.4, .9, .1], [[1, 2],
+                                [1, 2]])
+        assert numpy.allclose(aval, [.4, .9 * 3, .1 * 3])
+
+    def test_inc_adv_subtensor_w_2vec(self):
+        if inplace_increment is None:
+            raise inplace_increment_missing
+
+        subt = self.m[self.ix1, self.ix12]
+        a = inc_subtensor(subt, subt)
+
+        typ = TensorType(self.m.type.dtype, self.ix2.type.broadcastable)
+        assert a.type == typ, (a.type, typ)
+        f = theano.function([self.m, self.ix1, self.ix12], a,
+                            allow_input_downcast=True)
+        aval = f([[.4, .9, .1],
+                  [5, 6, 7],
+                  [.5, .3, .15]],
+                 [1, 2, 1],
+                 [0, 1, 0])
+        assert numpy.allclose(aval,
+                [[.4, .9, .1],
+                  [5 * 3, 6, 7],
+                  [.5, .3 * 2, .15]]), aval
+
+    def test_inc_adv_subtensor_with_broadcasting(self):
+        if inplace_increment is None:
+            raise inplace_increment_missing
+
+        a = inc_subtensor(self.m[self.ix1, self.ix12], 2.1)
+
+        assert a.type == self.m.type, (a.type, self.m.type)
+        f = theano.function([self.m, self.ix1, self.ix12], a,
+                            allow_input_downcast=True)
+        aval = f([[.4, .9, .1],
+                  [5, 6, 7],
+                  [.5, .3, .15]],
+                 [1, 2, 1],
+                 [0, 1, 0])
+        assert numpy.allclose(aval,
+                [[.4, .9, .1],
+                  [5 + 2.1 * 2, 6, 7],
+                  [.5, .3 + 2.1, .15]]), aval
+
+    def test_inc_adv_subtensor_with_index_broadcasting(self):
+        if inplace_increment is None:
+            raise inplace_increment_missing
+
+        a = inc_subtensor(self.m[self.ix1, self.ix2], 2.1)
+
+        assert a.type == self.m.type, (a.type, self.m.type)
+        f = theano.function([self.m, self.ix1, self.ix2], a,
+                            allow_input_downcast=True)
+        aval = f([[.4, .9, .1],
+                  [5, 6, 7],
+                  [.5, .3, .15]],
+                 [0, 2, 0],
+                 [[0, 1, 0],
+                  [2, 2, 2]])
+        assert numpy.allclose(aval,
+                [[.4 + 2 * 2.1, .9, .1 + 2 * 2.1],
+                  [5, 6, 7],
+                  [.5, .3 + 2.1, .15 + 2.1]]), aval
+
+
+class TestInferShape(utt.InferShapeTester):
+    def test_infer_shape(self):
+        # IncSubtensor
+        admat = dmatrix()
+        bdmat = dmatrix()
+        advec = dvector()
+        adscal = dscalar()
+        admat_val = rand(5, 4)
+        self._compile_and_check([admat, bdmat],
+                            [inc_subtensor(admat[2:4], bdmat)],
+                            [admat_val, [[1, 2, 3, 4]]], IncSubtensor)
+
+        self._compile_and_check([admat, advec],
+                            [inc_subtensor(admat[2], advec)],
+                            [admat_val, [1, 2, 3, 4]], IncSubtensor)
+
+        self._compile_and_check([admat, adscal],
+                            [inc_subtensor(admat[2, 3], adscal)],
+                            [admat_val, 1], IncSubtensor)
+
+        self._compile_and_check([admat, adscal],
+                            [inc_subtensor(admat[1:3, 2], adscal)],
+                            [admat_val, 1], IncSubtensor)
+
+        self._compile_and_check([admat, bdmat],
+                            [set_subtensor(admat[2:4], bdmat)],
+                            [admat_val, [[1, 2, 3, 4]]], IncSubtensor)
+
+        self._compile_and_check([admat, advec],
+                            [set_subtensor(admat[2], advec)],
+                            [admat_val, [1, 2, 3, 4]], IncSubtensor)
+
+        self._compile_and_check([admat, adscal],
+                            [set_subtensor(admat[2, 3], adscal)],
+                            [admat_val, 1], IncSubtensor)
+
+        self._compile_and_check([admat, adscal],
+                            [set_subtensor(admat[1:3, 2], adscal)],
+                            [admat_val, 1], IncSubtensor)
+
+        adtens4 = dtensor4()
+        bdtens4 = dtensor4()
+        adtens4_val = rand(3, 4, 2, 5)
+        self._compile_and_check([adtens4, bdtens4],
+                            [inc_subtensor(adtens4[::, 2:4, ::, ::], bdtens4)],
+                            [adtens4_val, [[[[1, 2, 3, 4, 5]]]]], IncSubtensor,
+                            warn=False)
+        self._compile_and_check([adtens4, bdmat],
+                            [inc_subtensor(adtens4[2, 2:4, 1, ::], bdmat)],
+                            [adtens4_val, [[1, 2, 3, 4, 5]]], IncSubtensor)
+
+        self._compile_and_check([adtens4, advec],
+                            [inc_subtensor(adtens4[0, 1, ::, 4], advec)],
+                            [adtens4_val, [1, 2]], IncSubtensor)
+
+        self._compile_and_check([adtens4, adscal],
+                            [inc_subtensor(adtens4[1:3, 1, ::, 2:4], adscal)],
+                            [adtens4_val, 1], IncSubtensor)
+
+        self._compile_and_check([adtens4, bdtens4],
+                            [set_subtensor(adtens4[::, 2:4, ::, ::], bdtens4)],
+                            [adtens4_val, [[[[1, 2, 3, 4, 5]]]]], IncSubtensor,
+                            warn=False)
+
+        self._compile_and_check([adtens4, bdmat],
+                            [set_subtensor(adtens4[2, 2:4, 1, ::], bdmat)],
+                            [adtens4_val, [[1, 2, 3, 4, 5]]], IncSubtensor)
+
+        self._compile_and_check([adtens4, advec],
+                            [set_subtensor(adtens4[0, 1, ::, 4], advec)],
+                            [adtens4_val, [1, 2]], IncSubtensor)
+
+        self._compile_and_check([adtens4, adscal],
+                            [set_subtensor(adtens4[1:3, 1, ::, 2:4], adscal)],
+                            [adtens4_val, 1], IncSubtensor)
+
+        # AdvancedIncSubtensor1
+        admat = dmatrix()
+        bdmat = dmatrix()
+        advec = dvector()
+        adscal = dscalar()
+        admat_val = rand(5, 4)
+        aivec_val = [2, 3]
+        self._compile_and_check([admat, bdmat],
+                            [set_subtensor(admat[aivec_val], bdmat)],
+                            [admat_val, [[1, 2, 3, 4]]], AdvancedIncSubtensor1)
+
+        aivec_val = [1, 3, 2]
+        self._compile_and_check([admat, advec],
+                            [set_subtensor(admat[aivec_val], advec)],
+                            [admat_val, [1, 2, 3, 4]], AdvancedIncSubtensor1)
+
+        aivec_val = [0, 3, 0]
+        self._compile_and_check([admat, adscal],
+                            [set_subtensor(admat[aivec_val], adscal)],
+                            [admat_val, 1], AdvancedIncSubtensor1)
+
+        bdtens4 = dtensor4()
+        adtens4_val = rand(4, 3, 2, 5)
+        aivec_val = [2, 3]
+        self._compile_and_check([adtens4, bdtens4],
+                            [set_subtensor(adtens4[aivec_val], bdtens4)],
+                            [adtens4_val, [[[[1, 2, 3, 4, 5]]]]],
+                            AdvancedIncSubtensor1,
+                            warn=False)
+
+        aivec_val = [1, 3, 2]
+        self._compile_and_check([adtens4, advec],
+                            [set_subtensor(adtens4[aivec_val], advec)],
+                            [adtens4_val, [1, 2, 3, 4, 5]],
+                            AdvancedIncSubtensor1)
+
+        aivec_val = [0, 3, 0]
+        self._compile_and_check([adtens4, adscal],
+                            [set_subtensor(adtens4[aivec_val], adscal)],
+                            [adtens4_val, 1],
+                            AdvancedIncSubtensor1)
+
+        aivec_val = [2, 3]
+        self._compile_and_check([admat, bdmat],
+                                [inc_subtensor(admat[aivec_val], bdmat)],
+                                [admat_val, [[1, 2, 3, 4], [5, 6, 7, 8]]],
+                                AdvancedIncSubtensor1)
+
+        aivec_val = [1, 3, 2]
+        self._compile_and_check([admat, advec],
+                            [inc_subtensor(admat[aivec_val], advec)],
+                            [admat_val, [1, 2, 3, 4]], AdvancedIncSubtensor1)
+
+        aivec_val = [0, 3, 0]
+        self._compile_and_check([admat, adscal],
+                            [inc_subtensor(admat[aivec_val], adscal)],
+                            [admat_val, 1], AdvancedIncSubtensor1)
+
+        bdtens4 = dtensor4()
+        adtens4_val = rand(4, 3, 2, 5)
+        aivec_val = [2, 3]
+        self._compile_and_check([adtens4, bdtens4],
+                            [inc_subtensor(adtens4[aivec_val], bdtens4)],
+                            [adtens4_val, [[[[1, 2, 3, 4, 5]]],
+                                           [[[6, 7, 8, 9, 10]]]]],
+                            AdvancedIncSubtensor1,
+                            warn=False)
+
+        aivec_val = [1, 2, 1]
+        self._compile_and_check([adtens4, advec],
+                            [inc_subtensor(adtens4[aivec_val], advec)],
+                            [adtens4_val, [1, 2, 3, 4, 5]],
+                            AdvancedIncSubtensor1)
+
+        aivec_val = [0, 3, 0]
+        self._compile_and_check([adtens4, adscal],
+                            [inc_subtensor(adtens4[aivec_val], adscal)],
+                            [adtens4_val, 2],
+                            AdvancedIncSubtensor1)
+
+        # AdvancedIncSubtensor
+        aivec_val = [1, 3, 2]
+        bivec_val = [0, 3, 3]
+        advec_val = [23, 24, 25]
+        self._compile_and_check([admat, advec],
+                    [set_subtensor(admat[aivec_val, bivec_val], advec)],
+                    [admat_val, advec_val], AdvancedIncSubtensor)
--- a/theano/tensor/type.py
+++ b/theano/tensor/type.py
+import logging
+_logger = logging.getLogger("theano.tensor.type")
+
+import numpy
+
+import theano
+from theano import config
+from theano.gof import Constant, hashtype, Type, Variable
+from theano.gof.python25 import any
+from theano import scalar as scal
+
+
+class TensorType(Type):
+    """Symbolic `Type` representing a numpy.ndarray value."""
+
+    filter_checks_isfinite = False
+    """
+    When this is True, strict filtering rejects data containing NaN or
+    Inf entries. (Used in `DebugMode`)
+    """
+
+    def __init__(self, dtype, broadcastable, name=None, sparse_grad=False):
+        """Initialize self.dtype and self.broadcastable.
+
+        :Parameters:
+         - `dtype`: str corresponding to numpy dtype (e.g., 'int64')
+           The value (ndarray) associated to a `Variable` of this `Type` will
+           have this dtype.
+         - `broadcastable`: tuple, list, or array of boolean values
+           This argument serves two purposes.  First, the True elements of this
+           list indicate the dimensions where the shape of an associated value
+           must be 1.  Secondly, the length of this list is the number of
+           dimensions that an associated value must have.  See
+           :doc:`broadcasting` for an explanation of how this list is used.
+         - `name`: str
+           Optional name for this type.
+        """
+        self.dtype = str(dtype)
+        if self.dtype == 'floatX':
+            self.dtype = config.floatX
+        ###    broadcastable is immutable, and all elements are either
+        ###    True or False
+        self.broadcastable = tuple(bool(b) for b in broadcastable)
+        self.dtype_specs()  # error checking is done there
+        self.name = name
+        self.numpy_dtype = numpy.dtype(self.dtype)
+        self.sparse_grad = sparse_grad
+        if sparse_grad:
+            warnings.warn(
+                "DEPRECATION WARNING: You use an old interface to"
+                " AdvancedSubtensor1 sparse_grad. Now use"
+                " theano.sparse_grad(a_tensor[an_int_vector]).")
+
+    def filter(self, data, strict=False, allow_downcast=None):
+        """Convert `data` to something which can be associated to a
+        `TensorVariable`.
+
+        This function is not meant to be called in user code.  It is for
+        `Linker` instances to use when running a compiled graph.
+        """
+        # Explicit error message when one accidentally uses a Variable as
+        # input (typical mistake, especially with shared variables).
+        if isinstance(data, Variable):
+            raise TypeError(
+                    'Expected an array-like object, but found a Variable: '
+                    'maybe you are trying to call a function on a (possibly '
+                    'shared) variable instead of a numeric array?')
+
+        if ((type(data) is numpy.ndarray)
+                and (data.dtype == self.numpy_dtype)):
+            if data.dtype.num != self.numpy_dtype.num:
+                data = theano._asarray(data, dtype=self.dtype)
+            # -- now fall through to ndim check
+        elif((type(data) is numpy.memmap)
+                and (data.dtype == self.numpy_dtype)):
+            # numpy.memmap is a "safe" subclass of ndarray,
+            # so we can use it whereever we expect a base ndarray.
+            # however, casting it would defeat the purpose of not
+            # loading the whole data into memory
+            pass
+        elif strict:
+            # If any of the two conditions above was not met,
+            # we raise a meaningful TypeError.
+            if not (type(data) is numpy.ndarray):
+                raise TypeError("%s expected a ndarray object." % self,
+                        data, type(data))
+            if data.dtype != self.numpy_dtype:
+                raise TypeError(("%s expected a ndarray object with "
+                        "dtype = %s (got %s).") % (
+                            self, self.numpy_dtype, data.dtype))
+            assert False, "This point should never be reached."
+        else:
+            if allow_downcast:
+                # Convert to self.dtype, regardless of the type of data
+                data = theano._asarray(data, dtype=self.dtype)
+                # TODO: consider to pad shape with ones to make it consistent
+                # with self.broadcastable... like vector->row type thing
+            else:
+                if isinstance(data, numpy.ndarray):
+                    # Check if self.dtype can accurately represent data
+                    # (do not try to convert the data)
+                    up_dtype = scal.upcast(self.dtype, data.dtype)
+                    if up_dtype == self.dtype:
+                        # Bug in the following line when data is a
+                        # scalar array, see
+                        # http://projects.scipy.org/numpy/ticket/1611
+                        # data = data.astype(self.dtype)
+                        data = theano._asarray(data, dtype=self.dtype)
+                    if up_dtype != self.dtype:
+                        err_msg = (
+                            '%s cannot store a value of dtype %s without '
+                            'risking loss of precision. If you do not mind '
+                            'this loss, you can: '
+                            '1) explicitly cast your data to %s, or '
+                            '2) set "allow_input_downcast=True" when calling '
+                            '"function".'
+                            % (self, data.dtype, self.dtype))
+                        raise TypeError(err_msg, data)
+                elif (allow_downcast is None and
+                        type(data) is float and
+                        self.dtype == theano.config.floatX):
+                    # Special case where we allow downcasting of Python float
+                    # literals to floatX, even when floatX=='float32'
+                    data = theano._asarray(data, self.dtype)
+                else:
+                    # data has to be converted.
+                    # Check that this conversion is lossless
+                    converted_data = theano._asarray(data, self.dtype)
+                    # We use the `values_eq` static function from TensorType
+                    # to handle NaN values.
+                    if TensorType.values_eq(numpy.asarray(data),
+                                            converted_data,
+                                            force_same_dtype=False):
+                        data = converted_data
+                    else:
+                        # Do not print a too long description of data
+                        # (ndarray truncates it, but it's not sure for data)
+                        str_data = str(data)
+                        if len(str_data) > 80:
+                            str_data = str_data[:75] + '(...)'
+
+                        err_msg = (
+                            '%s cannot store accurately value %s, '
+                            'it would be represented as %s. '
+                            'If you do not mind this precision loss, you can: '
+                            '1) explicitly convert your data to a numpy array '
+                            'of dtype %s, or '
+                            '2) set "allow_input_downcast=True" when calling '
+                            '"function".'
+                            % (self, data, converted_data, self.dtype))
+                        raise TypeError(err_msg, data)
+
+        if self.ndim != data.ndim:
+            raise TypeError("Wrong number of dimensions: expected %s,"
+                            " got %s with shape %s." % (self.ndim, data.ndim,
+                                                        data.shape))
+        if not data.flags.aligned:
+            try:
+                msg = "object buffer" + str(data.data)
+            except AttributeError:
+                msg = ""
+            raise TypeError("The numpy.ndarray object is not aligned."
+                            " Theano C code does not support that.",
+                            msg,
+                            "object shape", data.shape,
+                            "object strides", data.strides)
+
+        i = 0
+        for b in self.broadcastable:
+            if b and data.shape[i] != 1:
+                raise TypeError("Non-unit value on shape on a broadcastable"
+                                " dimension.", data.shape, self.broadcastable)
+            i += 1
+        if (self.filter_checks_isfinite and
+            not numpy.all(numpy.isfinite(data))):
+            raise ValueError("non-finite elements not allowed")
+        return data
+
+    def filter_variable(self, other):
+        """Convert a symbolic Variable into a TensorType, if compatible.
+
+        For the moment, only a TensorType or CudaNdarrayType will be
+        converted, provided they have the same number of dimensions,
+        broadcastable pattern, and dtype.
+        """
+        if hasattr(other, '_as_TensorVariable'):
+            other = other._as_TensorVariable()
+
+        if not isinstance(other, Variable):
+            # The value is not a Variable: we cast it into
+            # a Constant of the appropriate Type.
+            other = self.Constant(type=self, data=other)
+
+        if other.type == self:
+            return other
+
+        raise TypeError(
+                'Cannot convert Type %(othertype)s '
+                '(of Variable %(other)s) into Type %(self)s. '
+                'You can try to manually convert %(other)s into a %(self)s.'
+                % dict(
+                    othertype=other.type,
+                    other=other,
+                    self=self)
+                )
+
+    def value_validity_msg(self, a):
+        try:
+            self.filter(a, strict=True)
+        except Exception, e:
+            return str(e)
+        return "value is valid"
+
+    def dtype_specs(self):
+        """Return a tuple (python type, c type, numpy typenum) that corresponds
+        to self.dtype.
+
+        This function is used internally as part of C code generation.
+        """
+        # TODO: add more type correspondances for e.g. int32, int64, float32,
+        # complex64, etc.
+        try:
+            return {
+                'float32': (float, 'npy_float32', 'NPY_FLOAT32'),
+                'float64': (float, 'npy_float64', 'NPY_FLOAT64'),
+                'uint8': (int, 'npy_uint8', 'NPY_UINT8'),
+                'int8': (int, 'npy_int8', 'NPY_INT8'),
+                'uint16': (int, 'npy_uint16', 'NPY_UINT16'),
+                'int16': (int, 'npy_int16', 'NPY_INT16'),
+                'uint32': (int, 'npy_uint32', 'NPY_UINT32'),
+                'int32': (int, 'npy_int32', 'NPY_INT32'),
+                'uint64': (int, 'npy_uint64', 'NPY_UINT64'),
+                'int64': (int, 'npy_int64', 'NPY_INT64'),
+                'complex128': (complex, 'theano_complex128', 'NPY_COMPLEX128'),
+                'complex64': (complex, 'theano_complex64', 'NPY_COMPLEX64')
+                }[self.dtype]
+        except KeyError:
+            raise TypeError("Unsupported dtype for %s: %s"
+                    % (self.__class__.__name__, self.dtype))
+
+    def to_scalar_type(self):
+        return scal.Scalar(dtype=self.dtype)
+
+    def __eq__(self, other):
+        """Compare True iff other is the same kind of TensorType"""
+        return type(self) == type(other) and other.dtype == self.dtype \
+            and other.broadcastable == self.broadcastable
+
+    @staticmethod
+    def may_share_memory(a, b):
+        # This is a method of TensorType, so both a and b should be ndarrays
+        if isinstance(a, numpy.ndarray) and isinstance(b, numpy.ndarray):
+            return numpy.may_share_memory(a, b)
+        else:
+            return False
+
+    @staticmethod
+    def values_eq(a, b, force_same_dtype=True):
+        # TODO: check to see if the shapes must match
+        #      for now, we err on safe side...
+        if a.shape != b.shape:
+            return False
+        if force_same_dtype and a.dtype != b.dtype:
+            return False
+        a_eq_b = (a == b)
+        r = numpy.all(a_eq_b)
+        if r:
+            return True
+        # maybe the trouble is that there are NaNs
+        a_missing = numpy.isnan(a)
+        if a_missing.any():
+            b_missing = numpy.isnan(b)
+            return numpy.all(a_eq_b + (a_missing == b_missing))
+        else:
+            return False
+
+    @staticmethod
+    def values_eq_approx(a, b, allow_remove_inf=False, allow_remove_nan=False,
+                         rtol=None, atol=None):
+        """
+        :param allow_remove_inf: If True, when there is an inf in a,
+                                 we allow any value in b in that position.
+                                 Event -inf
+        :param allow_remove_nan: If True, when there is a nan in a,
+                                 we allow any value in b in that position.
+                                 Event +-inf
+        :param rtol: relative tolerance, passed to _allclose
+        :param atol: absolute tolerance, passed to _allclose
+        """
+        if isinstance(a, numpy.ndarray) and isinstance(b, numpy.ndarray):
+            if a.shape != b.shape:
+                return False
+            if a.dtype != b.dtype:
+                return False
+            if 'int' in str(a.dtype):
+                return numpy.all(a == b)
+            else:
+                # work around a numpy.allclose bug:
+                # http://projects.scipy.org/numpy/ticket/1672
+                if a.ndim == 0 and numpy.isinf(a):
+                    a = a.reshape(1)
+                    b = b.reshape(1)
+
+                cmp = theano.tensor.basic._allclose(a, b, rtol=rtol, atol=atol)
+                if cmp:
+                    # Numpy claims they are close, this is good enough for us.
+                    return True
+                # Numpy is unhappy, but it does not necessarily mean that a and
+                # b are different. Indeed, Numpy does not like missing values
+                # and will return False whenever some are found in a or b.
+                # The proper way would be to use the MaskArray stuff available
+                # in Numpy. However, it looks like it has been added to Numpy's
+                # core recently, so it may not be available to everyone. Thus,
+                # for now we use a home-made recipe, that should probably be
+                # revisited in the future.
+                a_missing = numpy.isnan(a)
+                a_inf = numpy.isinf(a)
+
+                if not (a_missing.any() or (allow_remove_inf and a_inf.any())):
+                    # There are no missing values in a, thus this is not the
+                    # reason why numpy.allclose(a, b) returned False.
+                    _logger.info(
+                        'numpy allclose failed for abs_err %f and rel_err %f',
+                        numpy.max(abs(a - b)),
+                        numpy.max(abs(a - b) / (abs(a) + abs(b))))
+                    return False
+                # The following line is what numpy.allclose bases its decision
+                # upon, according to its documentation.
+                rtol = 1.0000000000000001e-05
+                atol = 1e-8
+                cmp_elemwise = (numpy.absolute(a - b) <=
+                        (atol + rtol * numpy.absolute(b)))
+                # Find places where both a and b have missing values.
+                both_missing = a_missing * numpy.isnan(b)
+
+                # Find places where both a and b have inf of the same sign.
+                both_inf = a_inf * numpy.isinf(b)
+
+                # cmp_elemwise is weird when we have inf and -inf.
+                # set it to False
+                cmp_elemwise = numpy.where(
+                        both_inf & cmp_elemwise,
+                        a == b,
+                        cmp_elemwise)
+
+                # check the sign of the inf
+                both_inf = numpy.where(both_inf, (a == b), both_inf)
+
+                if allow_remove_inf:
+                    both_inf += a_inf
+                if allow_remove_nan:
+                    both_missing += a_missing
+
+                # Combine all information.
+                return (cmp_elemwise + both_missing + both_inf).all()
+
+        return False
+
+    @staticmethod
+    def values_eq_approx_remove_inf(a, b):
+        return TensorType.values_eq_approx(a, b, True)
+
+    @staticmethod
+    def values_eq_approx_remove_nan(a, b):
+        return TensorType.values_eq_approx(a, b, False, True)
+
+    @staticmethod
+    def values_eq_approx_remove_inf_nan(a, b):
+        return TensorType.values_eq_approx(a, b, True, True)
+
+    def __hash__(self):
+        """Hash equal for same kinds of TensorType"""
+        return hashtype(self) ^ hash(self.dtype) ^ hash(self.broadcastable)
+
+    ndim = property(lambda self: len(self.broadcastable),
+            doc="number of dimensions")
+    """Number of dimensions
+
+    This read-only property is the preferred way to get the number of
+    dimensions of a `TensorType`.
+
+    """
+
+    def make_variable(self, name=None):
+        """Return a `TensorVariable` of this type
+
+        :Parameters:
+         - `name`: str
+           A pretty name to identify this `Variable` when printing and
+           debugging
+        """
+        return self.Variable(self, name=name)
+
+    def __str__(self):
+        if self.name:
+            return self.name
+        else:
+            b = self.broadcastable
+            named_broadcastable = {(): 'scalar',
+                     (False,): 'vector',
+                     (False, True): 'col',
+                     (True, False): 'row',
+                     (False, False): 'matrix'}
+            if b in named_broadcastable:
+                bcast = named_broadcastable[b]
+            else:
+                if any(b):
+                    bcast = str(b)
+                else:
+                    bcast = '%iD' % len(b)
+            return "TensorType(%s, %s)" % (str(self.dtype), bcast)
+
+    def __repr__(self):
+        return str(self)
+        #"TensorType{%s, %s}" % (str(self.dtype), str(self.broadcastable))
+
+    def c_declare(self, name, sub):
+        """Override `CLinkerOp.c_declare` """
+        return """
+        PyArrayObject* %(name)s;
+        int type_num_%(name)s;
+        typedef %(dtype)s dtype_%(name)s;
+        """ % dict(sub, name=name, dtype=self.dtype_specs()[1])
+
+    def c_init(self, name, sub):
+        """Override `CLinkerOp.c_init` """
+        return """
+        %(name)s = NULL;
+        type_num_%(name)s = %(type_num)s;
+        """ % dict(sub, name=name, type_num=self.dtype_specs()[2])
+
+    def c_extract(self, name, sub):
+        """Override `CLinkerOp.c_extract` """
+        return """
+        %(name)s = NULL;
+        if (py_%(name)s == Py_None) {
+            // We can either fail here or set %(name)s to NULL and rely on Ops
+            // using tensors to handle the NULL case, but if they fail to do so
+            // they'll end up with nasty segfaults, so this is public service.
+            PyErr_SetString(PyExc_ValueError, "expected an ndarray, not None");
+            %(fail)s
+        }
+        if (!PyArray_Check(py_%(name)s)) {
+            PyErr_SetString(PyExc_ValueError, "expected an ndarray");
+            %(fail)s
+        }
+        // We expect %(type_num)s
+        type_num_%(name)s = ((PyArrayObject*)py_%(name)s)->descr->type_num;
+        if (!PyArray_ISALIGNED(py_%(name)s)) {
+            PyErr_Format(PyExc_NotImplementedError,
+                         "expected an aligned array of type %%ld "
+                         "(%(type_num)s), got non-aligned array of type %%ld"
+                         " with %%ld dimensions, with 3 last dims "
+                         "%%ld, %%ld, %%ld"
+                         " and 3 last strides %%ld %%ld, %%ld.",
+                         (long int) %(type_num)s,
+                         (long int) type_num_%(name)s,
+                         (long int) PyArray_NDIM(py_%(name)s),
+                         (long int) PyArray_NDIM(py_%(name)s) >= 3 ?
+        PyArray_DIMS(py_%(name)s)[PyArray_NDIM(py_%(name)s)-3] : -1,
+                         (long int) PyArray_NDIM(py_%(name)s) >= 2 ?
+        PyArray_DIMS(py_%(name)s)[PyArray_NDIM(py_%(name)s)-2] : -1,
+                         (long int) PyArray_NDIM(py_%(name)s) >= 1 ?
+        PyArray_DIMS(py_%(name)s)[PyArray_NDIM(py_%(name)s)-1] : -1,
+                         (long int) PyArray_NDIM(py_%(name)s) >= 3 ?
+        PyArray_STRIDES(py_%(name)s)[PyArray_NDIM(py_%(name)s)-3] : -1,
+                         (long int) PyArray_NDIM(py_%(name)s) >= 2 ?
+        PyArray_STRIDES(py_%(name)s)[PyArray_NDIM(py_%(name)s)-2] : -1,
+                         (long int) PyArray_NDIM(py_%(name)s) >= 1 ?
+        PyArray_STRIDES(py_%(name)s)[PyArray_NDIM(py_%(name)s)-1] : -1
+        );
+            %(fail)s
+        }
+        // This is a TypeError to be consistent with DEBUG_MODE
+        // Note: DEBUG_MODE also tells the name of the container
+        if (type_num_%(name)s != %(type_num)s) {
+            PyErr_Format(PyExc_TypeError,
+                         "expected type_num %%d (%(type_num)s) got %%d",
+                         %(type_num)s, type_num_%(name)s);
+            %(fail)s
+        }
+        %(name)s = (PyArrayObject*)(py_%(name)s);
+        Py_XINCREF(%(name)s);
+        """ % dict(sub, name=name, type_num=self.dtype_specs()[2])
+
+    def c_cleanup(self, name, sub):
+        """Override `CLinkerOp.c_cleanup` """
+        return """
+        if (%(name)s) {
+            Py_XDECREF(%(name)s);
+        }
+        """ % locals()
+
+    def c_sync(self, name, sub):
+        """Override `CLinkerOp.c_sync` """
+        fail = sub['fail']
+        type_num = self.dtype_specs()[2]
+        return """
+        {Py_XDECREF(py_%(name)s);}
+        if (!%(name)s) {
+            Py_INCREF(Py_None);
+            py_%(name)s = Py_None;
+        }
+        else if ((void*)py_%(name)s != (void*)%(name)s) {
+            py_%(name)s = (PyObject*)%(name)s;
+        }
+
+        {Py_XINCREF(py_%(name)s);}
+
+        if (!PyArray_ISALIGNED(py_%(name)s)) {
+            PyErr_Format(PyExc_NotImplementedError,
+                         "c_sync: expected an aligned array of type %%ld "
+                         "(%(type_num)s), got non-aligned array of type %%ld"
+                         " with %%ld dimensions, with 3 last dims "
+                         "%%ld, %%ld, %%ld"
+                         " and 3 last strides %%ld %%ld, %%ld.",
+                         (long int) %(type_num)s,
+                         (long int) type_num_%(name)s,
+                         (long int) PyArray_NDIM(py_%(name)s),
+                         (long int) PyArray_NDIM(py_%(name)s) >= 3 ?
+        PyArray_DIMS(py_%(name)s)[PyArray_NDIM(py_%(name)s)-3] : -1,
+                         (long int) PyArray_NDIM(py_%(name)s) >= 2 ?
+        PyArray_DIMS(py_%(name)s)[PyArray_NDIM(py_%(name)s)-2] : -1,
+                         (long int) PyArray_NDIM(py_%(name)s) >= 1 ?
+        PyArray_DIMS(py_%(name)s)[PyArray_NDIM(py_%(name)s)-1] : -1,
+                         (long int) PyArray_NDIM(py_%(name)s) >= 3 ?
+        PyArray_STRIDES(py_%(name)s)[PyArray_NDIM(py_%(name)s)-3] : -1,
+                         (long int) PyArray_NDIM(py_%(name)s) >= 2 ?
+        PyArray_STRIDES(py_%(name)s)[PyArray_NDIM(py_%(name)s)-2] : -1,
+                         (long int) PyArray_NDIM(py_%(name)s) >= 1 ?
+        PyArray_STRIDES(py_%(name)s)[PyArray_NDIM(py_%(name)s)-1] : -1
+        );
+            %(fail)s
+        }
+        """ % locals()
+
+    def c_headers(self):
+        """Override `CLinkerOp.c_headers` """
+        return scal.Scalar(self.dtype).c_headers()
+
+    def c_libraries(self):
+        return scal.Scalar(self.dtype).c_libraries()
+
+    def c_compile_args(self):
+        return scal.Scalar(self.dtype).c_compile_args()
+
+    def c_support_code(self):
+        """Override `CLinkerOp.c_support_code` """
+        return scal.Scalar(self.dtype).c_support_code()
+
+    def c_code_cache_version(self):
+        scalar_version = scal.Scalar(self.dtype).c_code_cache_version()
+        if scalar_version:
+            return (9,) + scalar_version
+        else:
+            return ()
+
+    def value_zeros(self, shape):
+        """
+        Create an numpy ndarray full of 0 values.
+        """
+        return numpy.zeros(shape, dtype=self.dtype)
+
+    def get_shape_info(self, obj):
+        """
+        Return the information needed to compute the memory size of ``obj``.
+
+        The memory size is only the data, so this excludes the container.
+        For an ndarray, this is the data, but not the ndarray object and
+        other data structures such as shape and strides.
+
+        ``get_shape_info()`` and ``get_size()`` work in tandem for the memory
+        profiler.
+
+        ``get_shape_info()`` is called during the execution of the function.
+        So it is better that it is not too slow.
+
+        ``get_size()`` will be called on the output of this function
+        when printing the memory profile.
+
+        :param obj: The object that this Type represents during execution
+        :return: Python object that ``self.get_size()`` understands
+        """
+        return obj.shape
+
+    def get_size(self, shape_info):
+        """ Number of bytes taken by the object represented by shape_info.
+
+        :param shape_info: the output of the call to get_shape_info()
+        :return: the number of bytes taken by the object described by
+            ``shape_info``.
+        """
+        if shape_info:
+            return numpy.prod(shape_info) * numpy.dtype(self.dtype).itemsize
+        else:  # a scalar
+            return numpy.dtype(self.dtype).itemsize
+theano.compile.ops.expandable_types += (TensorType,)
+
+# Register TensorType C code for ViewOp.
+theano.compile.register_view_op_c_code(
+        TensorType,
+        """
+        Py_XDECREF(%(oname)s);
+        %(oname)s = %(iname)s;
+        Py_XINCREF(%(oname)s);
+        """,
+        version=1)
+
+# Register TensorType C code for DeepCopyOp
+theano.compile.register_deep_copy_op_c_code(
+        TensorType,
+        """
+        int alloc = %(oname)s == NULL;
+        for(int i=0; !alloc && i<PyArray_NDIM(%(oname)s); i++) {
+           if(PyArray_DIMS(%(iname)s)[i] != PyArray_DIMS(%(oname)s)[i]) {
+               alloc = true;
+               break;
+           }
+        }
+        if(alloc) {
+            Py_XDECREF(%(oname)s);
+            %(oname)s = (PyArrayObject*)PyArray_NewCopy(%(iname)s,
+                                                        NPY_ANYORDER);
+            if (!%(oname)s)
+            {
+                PyErr_SetString(PyExc_ValueError,
+                                "DeepCopyOp: the copy failed!");
+                %(fail)s;
+            }
+        } else {
+            if(PyArray_CopyInto(%(oname)s, %(iname)s)){
+                PyErr_SetString(PyExc_ValueError,
+            "DeepCopyOp: the copy failed into already allocated space!");
+                %(fail)s;
+            }
+        }
+        """,
+        version=2)
--- a/theano/tensor/type_other.py
+++ b/theano/tensor/type_other.py
+#
+# Slice type and Op. None Type and NoneConst.
+#
+from theano.gof import Apply, Constant, Op, Type
+from theano.gradient import DisconnectedType
+
+
+def as_int_none_variable(x):
+    if x is None:
+        return NoneConst
+    x = theano.tensor.as_tensor_variable(x, ndim=0)
+    if x.type.dtype[:3] not in ('int', 'uin'):
+        raise TypeError('index must be integers')
+    return x
+
+
+class MakeSlice(Op):
+    def make_node(self, slc):
+        return Apply(self,
+                     map(as_int_none_variable,
+                         [slc.start, slc.stop, slc.step]),
+                     [slicetype()])
+
+    def perform(self, node, inp, out_):
+        out, = out_
+        out[0] = slice(*inp)
+
+    def __str__(self):
+        return self.__class__.__name__
+
+    def __eq__(self, other):
+        return type(self) == type(other)
+
+    def __hash__(self):
+        return hash(type(self))
+
+    def grad(self, inputs, grads):
+        return [DisconnectedType()() for i in inputs]
+
+make_slice = MakeSlice()
+
+
+class SliceType(Type):
+
+    def filter(self, x, strict=False, allow_downcast=None):
+        if isinstance(x, slice):
+            return x
+        else:
+            raise TypeError('Expected a slice!')
+
+    def __str__(self):
+        return "slice"
+
+slicetype = SliceType()
+
+
+class NoneTypeT(Type):
+
+    def filter(self, x, strict=False, allow_downcast=None):
+        if x is None:
+            return x
+        else:
+            raise TypeError('Expected None!')
+
+    def __str__(self):
+        return "None"
+
+NoneConst = Constant(NoneTypeT(), None, name='None')
--- a/theano/tensor/var.py
+++ b/theano/tensor/var.py
+import numpy
+
+import theano
+from theano.compat import PY3
+from theano.scalar import ComplexError, IntegerDivisionError
+from theano.gof import Constant, Variable
+from theano.gof.utils import hashtype
+from theano.tensor.utils import hash_from_ndarray
+from theano.tensor.type import TensorType
+
+
+class AsTensorError(TypeError):
+    """Raised when as_tensor_variable isn't able to create a
+    TensorVariable.
+    """
+    pass
+
+
+class _tensor_py_operators:
+    # UNARY
+    def __abs__(self):
+        return theano.tensor.basic.abs_(self)
+
+    def __neg__(self):
+        return theano.tensor.basic.neg(self)
+
+    # CASTS
+    #### REMOVED THESE BECAUSE PYTHON appears to require __int__ to return
+    #### an int. -JB 20081112
+    #def __int__(self): return convert_to_int32(self)
+    #def __float__(self): return convert_to_float64(self)
+    #def __complex__(self): return convert_to_complex128(self)
+
+    # COMPARISONS
+    _is_nonzero = True
+
+    def __lt__(self, other):
+        rval = theano.tensor.basic.lt(self, other)
+        rval._is_nonzero = False
+        return rval
+
+    def __le__(self, other):
+        rval = theano.tensor.basic.le(self, other)
+        rval._is_nonzero = False
+        return rval
+
+    def __gt__(self, other):
+        rval = theano.tensor.basic.gt(self, other)
+        rval._is_nonzero = False
+        return rval
+
+    def __ge__(self, other):
+        rval = theano.tensor.basic.ge(self, other)
+        rval._is_nonzero = False
+        return rval
+
+    def __nonzero__(self):
+        # This is meant to prohibit stuff like a < b < c, which is internally
+        # implemented as (a < b) and (b < c). The trouble with this is the
+        # side-effect that checking for a non-NULL a by typing "if a: ..."
+        # uses the same __nonzero__ method.  We want these both to work, but
+        # it seems impossible.  Currently, all vars evaluate to nonzero except
+        # the return values of comparison operators, which raise this
+        # exception.  If you can think of a better solution, go for it!
+        if self._is_nonzero:
+            return True
+        else:
+            raise TypeError(
+                "Variables do not support boolean operations. This "
+                "can happen if you do a logical operation (<, <=, >, <=, "
+                "==, !=) between a numpy.ndarray and a Theano tensor"
+                "variable. Due to NumPy implementation before NumPy 1.8, "
+                "we cannot make the Python syntax work when the ndarray "
+                "is on the left, and this results in this error. To work "
+                "around that, either call "
+                "theano.tensor.{lt,le,eq,ne,gt,ge}(ndarray, tensor), or "
+                "use the Python syntax with the Theano tensor on the "
+                "left. Or update to NumPy 1.8 or above."
+            )
+
+    # BITWISE
+    def __invert__(self):
+        return theano.tensor.basic.invert(self)
+
+    def __and__(self, other):
+        return theano.tensor.basic.and_(self, other)
+
+    def __or__(self, other):
+        return theano.tensor.basic.or_(self, other)
+
+    def __xor__(self, other):
+        return theano.tensor.basic.xor(self, other)
+
+    def __rand__(self, other):
+        return theano.tensor.basic.and_(other, self)
+
+    def __ror__(self, other):
+        return theano.tensor.basic.or_(other, self)
+
+    def __rxor__(self, other):
+        return theano.tensor.basic.xor(other, self)
+
+    # def __iand__(self, other):
+    #    return _and_inplace(self, other)
+    #
+    # def __ior__(self, other):
+    #    return _or_inplace(self, other)
+    #
+    #def __ixor__(self, other):
+    #    return _xor_inplace(self, other)
+
+    # ARITHMETIC - NORMAL
+    def __add__(self, other):
+        try:
+            return theano.tensor.basic.add(self, other)
+        # We should catch the minimum number of exception here.
+        # Otherwise this will convert error when Theano flags
+        # compute_test_value is used
+        # Evidently, we need to catch NotImplementedError
+        # TypeError from as_tensor_variable are caught in Elemwise.make_node
+        # Oterwise TensorVariable * SparseVariable won't work!
+        except (NotImplementedError, AsTensorError):
+            # We must return NotImplemented and not an
+            # NotImplementedError or raise an NotImplementedError.
+            # That way python will give a good error message like this
+            # `TypeError: unsupported operand type(s) for +:
+            # 'TensorVariable' and 'TensorVariable'`
+            return NotImplemented
+
+    def __sub__(self, other):
+        # See explanation in __add__ for the error catched
+        # and the return value in that case
+        try:
+            return theano.tensor.basic.sub(self, other)
+        except (NotImplementedError, AsTensorError):
+            return NotImplemented
+
+    def __mul__(self, other):
+        # See explanation in __add__ for the error catched
+        # and the return value in that case
+        try:
+            return theano.tensor.mul(self, other)
+        except (NotImplementedError, AsTensorError):
+            return NotImplemented
+
+    def __div__(self, other):
+        # See explanation in __add__ for the error catched
+        # and the return value in that case
+        try:
+            return theano.tensor.basic.div_proxy(self, other)
+        except IntegerDivisionError:
+            # This is to raise the exception that occurs when trying to divide
+            # two integer arrays (currently forbidden).
+            raise
+        except (NotImplementedError, AsTensorError):
+            return NotImplemented
+    if PY3:
+        __truediv__ = __div__
+
+    def __pow__(self, other):
+        # See explanation in __add__ for the error catched
+        # adn the return value in that case
+        try:
+            return theano.tensor.basic.pow(self, other)
+        except (NotImplementedError, AsTensorError):
+            return NotImplemented
+
+    def __mod__(self, other):
+        # See explanation in __add__ for the error catched
+        # adn the return value in that case
+        try:
+            return theano.tensor.basic.mod_check(self, other)
+        except ComplexError:
+            # This is to raise the exception that occurs when trying to compute
+            # x % y with either x or y a complex number.
+            raise
+        except (NotImplementedError, AsTensorError):
+            return NotImplemented
+
+    def __truediv__(self, other):
+        return theano.tensor.basic.true_div(self, other)
+
+    def __floordiv__(self, other):
+        return theano.tensor.basic.floor_div(self, other)
+
+    def __rtruediv__(self, other):
+        return theano.tensor.basic.true_div(other, self)
+
+    def __rfloordiv__(self, other):
+        return theano.tensor.basic.floor_div(other, self)
+
+    ##### DO NOT USE THESE BECAUSE INPLACE OPS SHOULD BE INSERTED
+    ##### BY OPTIMIZATIONS ONLY
+    ## ARITHMETIC - INPLACE
+    #def __iadd__(self, other):
+    #    return _add_inplace(self, other)
+    #def __isub__(self, other):
+    #    return _sub_inplace(self, other)
+    #
+    #def __imul__(self, other):
+    #    return _mul_inplace(self, other)
+    #
+    #def __idiv__(self, other):
+    #    return _div_inplace(self, other)
+    #
+    #def __ipow__(self, other):
+    #    return _pow_inplace(self, other)
+
+    # ARITHMETIC - RIGHT-OPERAND
+    def __radd__(self, other):
+        return theano.tensor.basic.add(other, self)
+
+    def __rsub__(self, other):
+        return theano.tensor.basic.sub(other, self)
+
+    def __rmul__(self, other):
+        return theano.tensor.basic.mul(other, self)
+
+    def __rdiv__(self, other):
+        return theano.tensor.basic.div_proxy(other, self)
+
+    def __rmod__(self, other):
+        return theano.tensor.basic.mod(other, self)
+
+    def __rpow__(self, other):
+        return theano.tensor.basic.pow(other, self)
+
+    # TRANSPOSE
+    T = property(lambda self: theano.tensor.basic.transpose(self))
+
+    def transpose(self, *axes):
+        """
+        Return `tensor.transpose(self, axes)`
+        or `tensor.transpose(self, axes[0])`
+
+        If only one `axes` argument is provided and it is iterable, then it is
+        assumed to be the entire axes tuple, and passed intact to
+        tensor.transpose.
+
+        """
+        if len(axes) == 0:
+            return theano.tensor.basic.transpose(self)
+        try:
+            iter(axes[0])
+            iterable = True
+        except TypeError:
+            iterable = False
+        if len(axes) == 1 and iterable:
+            return theano.tensor.basic.transpose(self, axes[0])
+        else:
+            return theano.tensor.basic.transpose(self, axes)
+
+    shape = property(lambda self: theano.tensor.basic.shape(self))
+
+    size = property(lambda self: theano.tensor.basic.prod(self.shape))
+
+    # We can't implement __len__ to provide a better error message.
+    def any(self, axis=None, keepdims=False):
+        return theano.tensor.basic.any(self, axis=axis, keepdims=keepdims)
+
+    def all(self, axis=None, keepdims=False):
+        return theano.tensor.basic.all(self, axis=axis, keepdims=keepdims)
+
+    # Otherwise TensorVariable[:-1] does not work as Python 2.5.1 calls
+    # __len__ before calling __getitem__. It also does not catch the raised
+    # Exception!
+    # def __len__(self):
+    #     # We can't implement __len__ as Python requests that this
+    #     # function returns an integer >=0
+    #     raise Exception("Theano Variables can't work with len(Theano "
+    #                     "Variable) due to Python restriction. You can use "
+    #                     "TheanoVariable.shape[0] instead.")
+
+    def reshape(self, shape, ndim=None):
+        """Return a reshaped view/copy of this variable.
+
+        :param shape: something that can be converted to a symbolic vector of
+            integers
+
+        :param ndim: the length of the shape.  Passing None here means for
+            theano to try and guess the length of `shape`.
+
+        * warning-- this has a different signature than numpy's
+                    ndarray.reshape!
+                    in numpy you do not need to wrap the shape arguments
+                    in a tuple, in theano you do need to
+
+        """
+
+        if ndim is not None:
+            if not isinstance(ndim, int):
+                raise ValueError("Expected ndim to be an integer, is " +
+                                 str(type(ndim)))
+
+        return theano.tensor.basic.reshape(self, shape, ndim=ndim)
+
+    def dimshuffle(self, *pattern):
+        """
+        Reorder the dimensions of this variable, optionally inserting
+        broadcasted dimensions.
+
+        :param pattern: list/tuple of int mixed with 'x' for broadcastable
+            dimensions
+
+        For example, to create a 3D view of a [2D] matrix, call
+        ``dimshuffle([0,'x',1])``.  This will create a 3D view such that the
+        middle dimension is an implicit broadcasted dimension.  To do the same
+        thing on the transpose of that matrix, call
+        ``dimshuffle([1, 'x', 0])``.
+
+        This function supports the pattern passed as a tuple, or as a
+        variable-length argument (e.g. ``a.dimshuffle(pattern)`` is equivalent
+        to ``a.dimshuffle(*pattern)`` where ``pattern`` is a list/tuple of ints
+        mixed with 'x' characters).
+
+        For more information, see `DimShuffle`.
+        """
+        if (len(pattern) == 1) and (isinstance(pattern[0], (list, tuple))):
+            pattern = pattern[0]
+        op = theano.tensor.basic.DimShuffle(list(self.type.broadcastable),
+                                            pattern)
+        return op(self)
+
+    def flatten(self, ndim=1):
+        return theano.tensor.basic.flatten(self, ndim)
+
+    def ravel(self):
+        return theano.tensor.basic.flatten(self)
+
+    def diagonal(self, offset=0, axis1=0, axis2=1):
+        return theano.tensor.basic.diagonal(self, offset, axis1, axis2)
+
+    # CASTING
+    def astype(self, dtype):
+        return theano.tensor.cast(self, dtype)
+
+    # SLICING
+    # Do not define __getslice__ here:
+    # When calling t[1:], for instance, the arguments passed to __getslice__
+    # are (1, sys.maxsize), which is a pain to deal with, and can even not be
+    # an int (but a long).
+    # If __getslice__ does not exist, __getitem__ is called instead, with
+    # argument slice(1, None, None), which is much more desirable.
+    # __getslice__ is deprecated in python 2.6 anyway.
+
+    def __getitem__(self, args):
+        if not isinstance(args, tuple):
+            args = args,
+        # Determine if advanced indexing is needed or not
+        # The logic is already in Subtensor.convert: if it succeeds,
+        # standard indexing is used; if it fails with
+        # AdvancedIndexingError, advanced indexing
+        advanced = False
+        axis = None
+        for i, arg in enumerate(args):
+            try:
+                if arg != numpy.newaxis:
+                    theano.tensor.subtensor.Subtensor.convert(arg)
+            except theano.tensor.subtensor.AdvancedIndexingError:
+                if advanced:
+                    axis = None
+                    break
+                else:
+                    advanced = True
+                    axis = i
+
+        if advanced:
+            if (axis is not None
+                and numpy.all(a == slice(None) for a in args[:axis])
+                and numpy.all(a == slice(None) for a in args[axis + 1:])
+                and isinstance(args[axis], (
+                        numpy.ndarray,
+                        list,
+                        TensorVariable,
+                        TensorConstant,
+                        theano.tensor.sharedvar.TensorSharedVariable))):
+                return self.take(arg, axis)
+            else:
+                return theano.tensor.subtensor.AdvancedSubtensor()(self, *args)
+        else:
+            if numpy.newaxis in args:
+                # None (aka np.newaxis) in numpy indexing means to add a
+                # broadcastable dimension, which theano traditionally did with
+                # the dimshuffle op.  The following code converts numpy-style
+                # indexing on self to traditional [read: implemented] theano
+                # indexing on a dimshuffled view of self.
+
+                counter = 0
+                pattern = []
+                new_args = []
+                for arg in args:
+                    if arg == numpy.newaxis:
+                        pattern.append('x')
+                        new_args.append(slice(None, None, None))
+                    else:
+                        pattern.append(counter)
+                        counter += 1
+                        new_args.append(arg)
+                view = self.dimshuffle(pattern)
+                rval = view.__getitem__(tuple(new_args))
+                return rval
+            else:
+                return theano.tensor.subtensor.Subtensor(args)(
+                    self, *theano.tensor.subtensor.Subtensor.collapse(args,
+                    lambda entry: isinstance(entry, Variable)))
+
+    def take(self, indices, axis=None, mode='raise'):
+        return theano.tensor.subtensor.take(self, indices, axis, mode)
+
+    # COPYING
+    def copy(self):
+        return theano.tensor.basic.tensor_copy(self)
+
+    def __iter__(self):
+        try:
+            for i in xrange(theano.tensor.basic.get_vector_length(self)):
+                yield self[i]
+        except TypeError:
+            # This prevents accidental iteration via builtin.sum(self)
+            raise TypeError(('TensorType does not support iteration. '
+                             'Maybe you are using builtin.sum instead of '
+                             'theano.tensor.sum? (Maybe .max?)'))
+
+    # CONVENIENT ACCESS TO TYPE PROPERTIES
+    ndim = property(lambda self: self.type.ndim)
+    """The rank of this tensor."""
+
+    broadcastable = property(lambda self: self.type.broadcastable)
+    """The broadcastable signature of this tensor.
+
+    See :doc:`broadcasting` for details.
+    """
+
+    dtype = property(lambda self: self.type.dtype)
+    """ The dtype of this tensor.  """
+
+    # extra pseudo-operator symbols
+    def __dot__(left, right):
+        return theano.tensor.basic.dot(left, right)
+
+    def __rdot__(right, left):
+        return theano.tensor.basic.dot(left, right)
+
+    dot = __dot__
+
+    def sum(self, axis=None, dtype=None, keepdims=False, acc_dtype=None):
+        """See `theano.tensor.sum`"""
+        return theano.tensor.basic.sum(self, axis=axis,
+                                       dtype=dtype, keepdims=keepdims,
+                                       acc_dtype=acc_dtype)
+
+    def prod(self, axis=None, dtype=None, keepdims=False, acc_dtype=None):
+        """See `theano.tensor.prod`"""
+        return theano.tensor.basic.prod(self, axis=axis,
+                                        dtype=dtype, keepdims=keepdims,
+                                        acc_dtype=acc_dtype)
+
+    def norm(self, L, axis=None):
+        if L == 0:
+            raise NotImplementedError()
+        if numpy.isinf(L):
+            raise NotImplementedError()
+        # optimizations will/should catch cases like L=1, L=2
+        return theano.tensor.basic.pow(
+            theano.tensor.basic.pow(abs_(self), L).sum(axis=axis), 1.0 / L)
+
+    def mean(self, axis=None, dtype=None, keepdims=False, acc_dtype=None):
+        """See `theano.tensor.mean`"""
+        return theano.tensor.basic.mean(self, axis=axis,
+                                        dtype=dtype, keepdims=keepdims,
+                                        acc_dtype=acc_dtype)
+
+    def var(self, axis=None, keepdims=False):
+        """See `theano.tensor.var`"""
+        return theano.tensor.basic.var(self, axis, keepdims=keepdims)
+
+    def std(self, axis=None, keepdims=False):
+        """See `theano.tensor.std`"""
+        return theano.tensor.basic.std(self, axis, keepdims=keepdims)
+
+    def min(self, axis=None, keepdims=False):
+        """See `theano.tensor.min`"""
+        return theano.tensor.basic.min(self, axis, keepdims=keepdims)
+
+    def max(self, axis=None, keepdims=False):
+        """See `theano.tensor.max`"""
+        return theano.tensor.basic.max(self, axis, keepdims=keepdims)
+
+    def argmin(self, axis=None, keepdims=False):
+        """See `theano.tensor.argmin`"""
+        return theano.tensor.basic.argmin(self, axis, keepdims=keepdims)
+
+    def argmax(self, axis=None, keepdims=False):
+        """See `theano.tensor.argmax`"""
+        return theano.tensor.basic.argmax(self, axis, keepdims=keepdims)
+
+    def nonzero(self, return_matrix=False):
+        """See `theano.tensor.nonzero`"""
+        return theano.tensor.basic.nonzero(self, return_matrix=return_matrix)
+
+    def nonzero_values(self):
+        """See `theano.tensor.nonzero_values`"""
+        return theano.tensor.basic.nonzero_values(self)
+
+    def sort(self, axis=-1, kind='quicksort', order=None):
+        """See `theano.tensor.sort`"""
+        from theano.tensor.sort import sort
+        return sort(self, axis, kind, order)
+
+    def argsort(self, axis=-1, kind='quicksort', order=None):
+        """See `theano.tensor.argsort`"""
+        from theano.tensor.sort import argsort
+        return argsort(self, axis, kind, order)
+
+    def clip(self, a_min, a_max):
+        "Clip (limit) the values in an array."
+        return theano.tensor.basic.clip(self, a_min, a_max)
+
+    def conj(self):
+        """See `theano.tensor.conj`"""
+        return theano.tensor.basic.conj(self)
+
+    conjugate = conj
+
+    def repeat(self, repeats, axis=None):
+        """See `theano.tensor.repeat`"""
+        from theano.tensor.extra_ops import repeat
+        return repeat(self, repeats, axis)
+
+    def round(self, mode="half_away_from_zero"):
+        """See `theano.tensor.round`"""
+        return theano.tensor.basic.round(self, mode)
+
+    def trace(self):
+        from theano.sandbox.linalg import trace
+        return trace(self)
+
+    # TO TRUMP NUMPY OPERATORS
+    __array_priority__ = 1000
+
+    def get_scalar_constant_value(self):
+        return theano.tensor.basic.get_scalar_constant_value(self)
+
+    def zeros_like(model, dtype=None):
+        return theano.tensor.basic.zeros_like(model, dtype=dtype)
+
+
+class TensorVariable(_tensor_py_operators, Variable):
+    """Subclass to add the tensor operators to the basic `Variable` class."""
+
+TensorType.Variable = TensorVariable
+
+
+class TensorConstantSignature(tuple):
+    """A Signature object for comparing TensorConstant instances
+
+    An instance is a pair: (Type instance, ndarray).
+    """
+    def __eq__(self, other):
+        if type(self) != type(other):
+            return False
+        try:
+            (t0, d0), (t1, d1) = self, other
+        except Exception:
+            return False
+
+        # N.B. compare shape to ensure no broadcasting in ==
+        if t0 != t1 or d0.shape != d1.shape:
+            return False
+
+        self.no_nan  # Ensure has_nan is computed.
+        # Note that in the comparisons below, the elementwise comparisons
+        # come last because they are the most expensive checks.
+        if self.has_nan:
+            other.no_nan  # Ensure has_nan is computed.
+            return (other.has_nan and
+                    self.sum == other.sum and
+                    (self.no_nan.mask == other.no_nan.mask).all() and
+                    # Note that the second test below (==) may crash e.g. for
+                    # a single scalar NaN value, so we do not run it when all
+                    # values are missing.
+                    (self.no_nan.mask.all() or
+                     (self.no_nan == other.no_nan).all()))
+        else:
+            # Simple case where we do not need to worry about NaN values.
+            # (note that if there are NaN values in d1, this will return
+            # False, which is why we do not bother with testing `other.has_nan`
+            # here).
+            return (self.sum == other.sum) and numpy.all(d0 == d1)
+
+    def __hash__(self):
+        t, d = self
+        return hashtype(self) ^ hash(t) ^ hash(d.shape) ^ hash(self.sum)
+
+    def theano_hash(self):
+        _, d = self
+        return hash_from_ndarray(d)
+
+    def _get_sum(self):
+        """Compute sum of non NaN / Inf values in the array."""
+        try:
+            return self._sum
+        except AttributeError:
+            self._sum = self.no_nan.sum()
+            if self.has_nan and self.no_nan.mask.all():
+                # In this case the sum is not properly computed by numpy.
+                self._sum = 0
+            if numpy.isinf(self._sum) or numpy.isnan(self._sum):
+                # NaN may happen when there are both -inf and +inf values.
+                if self.has_nan:
+                    # Filter both NaN and Inf values.
+                    mask = self.no_nan.mask + numpy.isinf(self[1])
+                else:
+                    # Filter only Inf values.
+                    mask = numpy.isinf(self[1])
+                if mask.all():
+                    self._sum = 0
+                else:
+                    self._sum = numpy.ma.masked_array(self[1], mask).sum()
+                # At this point there should be no more NaN.
+                assert not numpy.isnan(self._sum)
+        return self._sum
+    sum = property(_get_sum)
+
+    def _get_no_nan(self):
+        try:
+            return self._no_nan
+        except AttributeError:
+            nan_mask = numpy.isnan(self[1])
+            if nan_mask.any():
+                self._no_nan = numpy.ma.masked_array(self[1], nan_mask)
+                self.has_nan = True
+            else:
+                self._no_nan = self[1]
+                self.has_nan = False
+        return self._no_nan
+    no_nan = property(_get_no_nan)
+
+
+class TensorConstant(_tensor_py_operators, Constant):
+    """Subclass to add the tensor operators to the basic `Constant` class.
+
+    To create a TensorConstant, use the `constant` function in this module.
+    """
+    def __init__(self, type, data, name=None):
+        Constant.__init__(self, type, data, name)
+        if (isinstance(data, numpy.ndarray) and
+            data.ndim > 0 and
+            len(numpy.unique(data)) == 1):
+            self.tag.unique_value = numpy.unique(data)[0]
+        else:
+            self.tag.unique_value = None
+
+    def __str__(self):
+        if self.tag.unique_value is not None:
+            name = "%s of %s" % (str(self.data.shape),
+                                 str(self.tag.unique_value))
+        else:
+            name = "%s" % self.data
+        if len(name) > 20:
+            name = name[:10] + ".." + name[-10:]
+
+        return "TensorConstant{%s}" % name
+
+    def signature(self):
+        return TensorConstantSignature((self.type, self.data))
+
+    def equals(self, other):
+        # Override Contant.equals to allow to compare with numpy.ndarray
+        if isinstance(other, numpy.ndarray):
+            # Make a TensorConstant to be able to compare
+            other = theano.tensor.basic.constant(other)
+        return (isinstance(other, TensorConstant) and
+                self.signature() == other.signature())
+
+TensorType.Constant = TensorConstant