Add missing file from the refactoring.

3167f410 · Frederic · a37ccad2 · 3167f410 · 3167f410 · 3167f410
--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -53,10 +53,6 @@ int_dtypes = map(str, scal.int_types)
 uint_dtypes = map(str, scal.uint_types)


-# Do a lazy import of the sparse module
-sparse_module_ref = None
-
-
 class ShapeError(Exception):
    """Raised when the shape cannot be computed."""
    pass

--- a/theano/tensor/subtensor.py
+++ b/theano/tensor/subtensor.py
+from copy import copy
+from itertools import izip
+import sys
+from textwrap import dedent
+import warnings
+import logging
+_logger = logging.getLogger("theano.tensor.subtensor")
+
+import numpy
+
+import theano
+from theano.compat.six import StringIO
+from theano.gradient import DisconnectedType
+from theano import gof
+from theano.gof import Apply, Constant, hashtype, Op, Type
+from theano.gof.python25 import maxsize
+from theano.printing import pprint
+from theano import scalar as scal
+from theano.tensor.basic import clip, sum, exp, ARange, TensorType
+from theano.tensor.elemwise import DimShuffle
+from theano.tensor.type_other import NoneConst, SliceType
+from theano import config
+
+inplace_increment = None
+if config.cxx:
+    import theano.gof.cutils  # needed to import cutils_ext
+    try:
+        from cutils_ext.cutils_ext import inplace_increment
+    except ImportError:
+        pass
+
+
+# Do a lazy import of the sparse module
+sparse_module_ref = None
+
+
+class AdvancedIndexingError(TypeError):
+    """
+    Raised when Subtensor is asked to perform advanced indexing.
+    """
+
+    def __init__(self, *args):
+        TypeError.__init__(self, *args)
+
+
+##########
+# Helpful functions to deal with Subtensor and IncSubtensor
+##########
+
+def get_idx_list(inputs, idx_list):
+    '''
+    Given a list of inputs to the subtensor and its idx_list reorders
+    the inputs according to the idx list to get the right values
+    '''
+
+    # The subtensor (or idx_list) does not depend on the inputs.
+    if len(inputs) == 1:
+        return tuple(idx_list)
+    indices = list(reversed(list(inputs[1:])))
+
+    # General case
+    def convert(entry):
+        if isinstance(entry, gof.Type):
+            return indices.pop()
+        elif isinstance(entry, slice):
+            return slice(convert(entry.start),
+                     convert(entry.stop),
+                     convert(entry.step))
+        else:
+            return entry
+    cdata = tuple(map(convert, idx_list))
+    return cdata
+
+
+def get_canonical_form_slice(theslice, length):
+    '''
+    Given a slice [start:stop:step] transform it into a canonical form
+    that respects the conventions imposed by python and numpy.
+
+    In a canonical form a slice is represented by a canonical form slice,
+    in which 0 <= start <= stop <= length and step > 0, and a flag which says
+    if the resulting set of numbers needs to be reversed or not.
+    '''
+    from theano.tensor import switch, lt, ge, sgn
+    if isinstance(theslice, slice):
+
+        def analyze(x):
+            try:
+                x_constant = theano.tensor.get_scalar_constant_value(x)
+                is_constant = True
+            except theano.tensor.NotScalarConstantError:
+                x_constant = theano.tensor.extract_constant(x)
+                is_constant = False
+            return x_constant, is_constant
+
+        start, is_start_constant = analyze(theslice.start)
+        stop, is_stop_constant = analyze(theslice.stop)
+        step, is_step_constant = analyze(theslice.step)
+        length, is_length_constant = analyze(length)
+
+        if step is None:
+            step = 1
+
+        # First handle the easier and common case where `step` is 1 and
+        # either `start` or `stop` is a range boundary. More specializations
+        # could be added later. This makes the resulting graph smaller than
+        # in the generic case below.
+        if step == 1:
+            is_start_0 = (
+                    start in [None, 0] or
+                    (is_start_constant and is_length_constant and
+                     start < 0 and start + length <= 0))
+            is_stop_length = (
+                    stop in [None, length, maxsize] or
+                    (is_stop_constant and is_length_constant and
+                     stop >= length))
+            if is_start_0:
+                # 0:stop:1
+                if is_stop_length:
+                    # Full slice.
+                    return slice(0, length, 1), 1
+                if is_stop_constant and stop >= 0:
+                    return (slice(0, switch(lt(stop, length), stop, length),
+                                  1), 1)
+                stop_plus_len = stop + length
+                stop = switch(
+                        lt(stop, 0),
+                        # stop < 0
+                        switch(
+                            lt(stop_plus_len, 0),
+                            # stop + len < 0
+                            0,
+                            # stop + len >= 0
+                            stop_plus_len),
+                        # stop >= 0: use min(stop, length)
+                        switch(lt(stop, length), stop, length))
+                return slice(0, stop, 1), 1
+            elif is_stop_length:
+                # start:length:1
+                if is_start_constant and start >= 0:
+                    return slice(switch(lt(start, length), start, length),
+                                 length, 1), 1
+                start_plus_len = start + length
+                start = switch(
+                        lt(start, 0),
+                        # start < 0
+                        switch(
+                            lt(start_plus_len, 0),
+                            # start + len < 0
+                            0,
+                            # start + len >= 0
+                            start_plus_len),
+                        # start >= 0: use min(start, length)
+                        switch(lt(start, length), start, length))
+                return slice(start, length, 1), 1
+
+        # This is the generic case.
+
+        if is_step_constant:
+            # When we know the sign of `step`, the graph can be made simpler.
+            assert step != 0
+            if step > 0:
+                def switch_neg_step(a, b):
+                    return b
+                abs_step = step
+                sgn_step = 1
+            else:
+                def switch_neg_step(a, b):
+                    return a
+                abs_step = -step
+                sgn_step = -1
+        else:
+            is_step_neg = lt(step, 0)
+
+            def switch_neg_step(a, b):
+                return switch(is_step_neg, a, b)
+            abs_step = abs(step)
+            sgn_step = sgn(step)
+
+        defstart = switch_neg_step(length - 1, 0)
+        defstop = switch_neg_step(-1, length)
+        if start is None:
+            start = defstart
+        else:
+            start = switch(lt(start, 0), start + length, start)
+            start = switch(lt(start, 0), switch_neg_step(-1, 0), start)
+            start = switch(ge(start, length),
+                           switch_neg_step(length - 1, length),
+                           start)
+        if stop in [None, maxsize]:
+            # The special "maxsize" case is probably not needed here,
+            # as slices containing maxsize are not generated by
+            # __getslice__ anymore.
+            stop = defstop
+        else:
+            stop = switch(lt(stop, 0), stop + length, stop)
+            stop = switch(lt(stop, 0), -1, stop)
+            stop = switch(ge(stop, length), length, stop)
+
+        nw_stop = switch_neg_step(start + 1, stop)
+        slice_len = (start - stop - 1) // abs_step + 1
+        slice_len = switch(lt(slice_len, 0), 0, slice_len)
+        neg_start = nw_stop - (slice_len - 1) * abs_step - 1
+        neg_start = switch(lt(neg_start, 0), (nw_stop - 1), neg_start)
+        nw_start = switch_neg_step(neg_start, start)
+        nw_start = switch(lt(nw_start, 0), 0, nw_start)
+        nw_stop = switch(lt(nw_stop, 0), 0, nw_stop)
+        # Ensure start <= stop.
+        nw_start = switch(lt(nw_start, nw_stop), nw_start, nw_stop)
+
+        nw_step = abs_step
+        if step != 1:
+            reverse = sgn_step
+            return slice(nw_start, nw_stop, nw_step), reverse
+        else:
+            return slice(nw_start, nw_stop, nw_step), 1
+    else:
+        value = theano.tensor.extract_constant(theslice)
+        value = switch(lt(value, 0), (value + length), value)
+
+        return value, 1
+
+
+class Subtensor(Op):
+    """Return a subtensor view
+
+    The inputs array is the tensor x, followed by scalar integer types.
+    TODO: WRITEME: how are the scalar integer variables formatted?
+
+    This class uses a relatively complex internal representation of the inputs
+    to remember how the input tensor x should be sliced.
+
+    idx_list: instance variable TODO: WRITEME: is this a list or a tuple?
+                                        (old docstring gives two conflicting
+                                        descriptions)
+              elements are either integers, theano scalar types, or slices.
+              one element per "explicitly named dimension"
+                TODO: WRITEME: what is an "explicitly named dimension" ?
+
+              if integer:
+                  indexes into the inputs array
+              if slice:
+                  start/stop/step members of each slice are integer indices
+                  into the inputs array or None
+                  integer indices be actual integers or theano scalar types
+
+    Note that the idx_list defines the Op, so two Subtensor instances are
+    considered to be different Ops if they have different idx_list fields.
+    This means that the entries in it are theano Types, not theano Variables.
+
+    @todo: add support for advanced tensor indexing (in Subtensor_dx too).
+
+    """
+    e_invalid = ('The index list is longer (size %d) than the number of '
+                 'dimensions of the tensor(namely %d). You are asking for '
+                 'a dimension of the tensor that does not exist! You might '
+                 'need to use dimshuffle to add extra dimension to your '
+                 'tensor.')
+    e_subslice = 'nested slicing is not supported'
+    e_indextype = "Invalid index type or slice for Subtensor"
+    debug = 0
+
+    view_map = {0: [0]}
+
+    @staticmethod
+    def collapse(idxs, cond):
+        """
+
+        idxs: a list of indices or slices.
+        cond: a callable that returns a bool
+
+        returns: idxs, with the slices flattened out into a list.
+                if cond is true for an entry, does not flatten it.
+
+        """
+        ret = []
+
+        def helper(entry):
+            if cond(entry):
+                ret.append(entry)
+            elif isinstance(entry, slice):
+                helper(entry.start)
+                helper(entry.stop)
+                helper(entry.step)
+
+        for idx in idxs:
+            helper(idx)
+
+        return ret
+
+    @staticmethod
+    def convert(entry, slice_ok=True):
+        """
+        The "idx_list" field is unique to each Subtensor instance.
+        It is not unique to each Apply node, so it should not refer to
+        specific Variables. This method changes references to Variables
+        into references to Types.
+        TODO: WRITEME: This method also accepts "entry" already being a Type;
+            when would that happen?
+        """
+        invalid_scal_types = [scal.float64, scal.float32]
+        scal_types = [scal.int64, scal.int32, scal.int16, scal.int8]
+        tensor_types = [theano.tensor.lscalar, theano.tensor.iscalar,
+                        theano.tensor.wscalar, theano.tensor.bscalar]
+        invalid_tensor_types = [theano.tensor.fscalar, theano.tensor.dscalar,
+                                theano.tensor.cscalar, theano.tensor.zscalar]
+        if (isinstance(entry, gof.Variable)
+                and (entry.type in invalid_scal_types
+                     or entry.type in invalid_tensor_types)):
+            raise TypeError("Expected an integer")
+
+        if isinstance(entry, gof.Variable) and entry.type in scal_types:
+            return entry.type
+        elif isinstance(entry, gof.Type) and entry in scal_types:
+            return entry
+
+        if (isinstance(entry, gof.Variable)
+                and entry.type in tensor_types
+                and numpy.all(entry.type.broadcastable)):
+            return scal.Scalar(entry.type.dtype)
+        elif (isinstance(entry, gof.Type)
+                and entry in tensor_types
+                and numpy.all(entry.broadcastable)):
+            return scal.Scalar(entry.dtype)
+        elif slice_ok and isinstance(entry, slice):
+            a = entry.start
+            b = entry.stop
+            c = entry.step
+
+            if a is not None:
+                slice_a = Subtensor.convert(a, False)
+            else:
+                slice_a = None
+
+            if b is not None and b != maxsize:
+                # The special "maxsize" case is probably not needed here,
+                # as slices containing maxsize are not generated by
+                # __getslice__ anymore.
+                slice_b = Subtensor.convert(b, False)
+            else:
+                slice_b = None
+
+            if c is not None:
+                slice_c = Subtensor.convert(c, False)
+            else:
+                slice_c = None
+
+            return slice(slice_a, slice_b, slice_c)
+        # There is a bug in numpy that results in isinstance(x, int) returning
+        # False for numpy integers.
+        # See <http://projects.scipy.org/numpy/ticket/2235>.
+        elif isinstance(entry, numpy.integer):
+            return entry
+        # On Windows 64-bit, shapes are returned as Python long, as they can
+        # be bigger than what a Python int can hold.
+        # Shapes should always fit in a numpy.int64, and we support them better
+        # 2) In Python3, long replaced int. So we must assert it fit in int64.
+        elif isinstance(entry, (int, long)):
+            entry64 = numpy.int64(entry)
+            return entry64
+        else:
+            raise AdvancedIndexingError(Subtensor.e_indextype, entry)
+
+    def __init__(self, idx_list):
+        self.idx_list = tuple(map(self.convert, idx_list))
+        self.perform_cache_cdata = None
+
+    @staticmethod
+    def my_as_scalar(a):
+        # Since scal.as_scalar does not know about tensor types (it would
+        # create a circular import) , this method converts either a
+        # TensorVariable or a ScalarVariable to a scalar.
+        if isinstance(a, gof.Variable) and isinstance(a.type, TensorType):
+            return theano.tensor.scalar_from_tensor(a)
+        else:
+            return scal.as_scalar(a)
+
+    def make_node(self, x, *inputs):
+        """
+            x: the tensor to take a subtensor of
+            inputs: a list of theano Scalars
+        """
+        x = theano.tensor.as_tensor_variable(x)
+        inputs = tuple(self.my_as_scalar(a) for a in inputs)
+
+        idx_list = list(self.idx_list)
+        if len(idx_list) > x.type.ndim:
+            exception = ValueError(Subtensor.e_invalid % (
+                len(idx_list), x.type.ndim))
+            exception.subtensor_invalid = True
+            raise exception
+
+        # infer the broadcasting pattern
+        padded = (idx_list
+                + [slice(None, None, None)] * (x.type.ndim - len(idx_list)))
+        broadcastable = [bc for p, bc in izip(padded, x.type.broadcastable)
+                if isinstance(p, slice)]
+
+        input_types = Subtensor.collapse(idx_list,
+                lambda entry: isinstance(entry, gof.Type))
+        if len(inputs) != len(input_types):
+            raise IndexError(
+                    "Not enough inputs to fill in the Subtensor template.",
+                    inputs, idx_list)
+        for input, expected_type in izip(inputs, input_types):
+            if input.type != expected_type:
+                raise TypeError(
+                    "Wrong type for Subtensor template. Expected %s, got %s."
+                    % (input.type, expected_type))
+
+        return gof.Apply(self,
+                         (x, ) + inputs,
+                         [theano.tensor.tensor(dtype=x.type.dtype,
+                                 broadcastable=broadcastable)])
+
+    def perform(self, node, inputs, out_):
+        out, = out_
+        x = inputs[0]
+
+        # The subtensor (or idx_list) does not depend on the inputs.
+        # (and cdata was cached on initial call)
+        if self.perform_cache_cdata is not None:
+            out[0] = numpy.asarray(x.__getitem__(self.perform_cache_cdata))
+            return
+
+        cdata = get_idx_list(inputs, self.idx_list)
+        if len(cdata) == 1:
+            cdata = cdata[0]
+        # (first call caches cdata here)
+        if len(inputs) == 1:
+            self.perform_cache_cdata = cdata
+
+        out[0] = numpy.asarray(x.__getitem__(cdata))
+
+    def infer_shape(self, node, shapes):
+        xshp = shapes[0]
+        assert len(xshp) == node.inputs[0].ndim
+        outshp = []
+        actual_idx_list = list(get_idx_list(node.inputs, self.idx_list))
+        padded = (actual_idx_list +
+                  [slice(None, None, None)] * (len(xshp) - len(self.idx_list)))
+        i = 0
+        for idx, xl in izip(padded, xshp):
+            if isinstance(idx, slice):
+                # If it is the default (None, None, None) slice, or a variant,
+                # the shape will be xl
+                if ((idx.start in [None, 0])
+                    and (idx.stop in [None, maxsize])
+                    and (idx.step is None or idx.step == 1)):
+                    outshp.append(xl)
+                else:
+                    cnf = get_canonical_form_slice(idx, xl)[0]
+                    if cnf.step == 1:
+                        length = cnf.stop - cnf.start
+                    else:
+                        length = (cnf.stop - cnf.start - 1) // cnf.step + 1
+                    outshp.append(length)
+                i += 1
+            else:
+                # That dimension is dropped
+                pass
+        assert i == node.outputs[0].ndim
+        assert len(outshp) == node.outputs[0].ndim
+        return [outshp]
+
+    def grad(self, inputs, grads):
+        gz, = grads
+        x = inputs[0]
+        rest = inputs[1:]
+        output = self(*inputs)
+        if output.dtype.find('int') != -1:
+            first = x.zeros_like().astype(theano.config.floatX)
+        else:
+            first = IncSubtensor(self.idx_list)(x.zeros_like(), gz, *rest)
+        return ([first]
+                + [DisconnectedType()()] * len(rest))
+
+    def connection_pattern(self, node):
+
+        rval = [[True]]
+
+        for ipt in node.inputs[1:]:
+            rval.append([False])
+
+        return rval
+
+    def __eq__(self, other):
+        return type(self) == type(other) and self.idx_list == other.idx_list
+
+    def __hash__(self):
+        # TODO: optimize by cache this hash value
+        msg = []
+        for entry in self.idx_list:
+            if isinstance(entry, slice):
+                msg += [(entry.start, entry.stop, entry.step)]
+            else:
+                msg += [entry]
+
+        idx_list = tuple(msg)
+        # backport
+        # idx_list = tuple((entry.start, entry.stop, entry.step)
+        #                 if isinstance(entry, slice)
+        #                 else entry
+        #                 for entry in self.idx_list)
+        return hash(idx_list)
+
+    @staticmethod
+    def str_from_slice(entry):
+        msg = []
+        for x in [entry.start, entry.stop, entry.step]:
+            if x is None:
+                msg.append("")
+            else:
+                msg.append(str(x))
+        return ":".join(msg)
+
+    def __str__(self):
+        indices = []
+        for entry in self.idx_list:
+            if isinstance(entry, slice):
+                indices.append(self.str_from_slice(entry))
+            else:
+                indices.append(str(entry))
+        return "%s{%s}" % (self.__class__.__name__, ", ".join(indices))
+
+    @staticmethod
+    def default_helper_c_code_args():
+        """
+        Returns a dictionary of default arguments to
+        helper_c_code
+        """
+
+        return {
+                "c_prefix": "PyArray",
+                "update_flags": ("PyArray_UpdateFlags(%(view_name)s,"
+                " NPY_ARRAY_C_CONTIGUOUS|"
+                "NPY_ARRAY_F_CONTIGUOUS);"),
+                "set_data": "PyArray_set_data",
+                "set_dim": "PyArray_set_dim",
+                "set_stride": "PyArray_set_stride",
+                "strides_mul": 1,
+                "view_name": "xview"}
+
+    @staticmethod
+    def helper_c_code(node, name, inputs, outputs, sub, idx_list,
+                      c_prefix=None,
+                      update_flags=None,
+                      set_data=None,
+                      set_dim=None,
+                      set_stride=None,
+                      strides_mul=None,
+                      view_name=None
+                  ):
+        """
+        The parameters c_prefix, update_flags, set_data, set_dim,
+        set_stride and strides_mul are there to allow reusing this
+        function on PyArray and CudaNdarray object.
+        """
+
+        default_args = Subtensor.default_helper_c_code_args()
+
+        if update_flags is None:
+            update_flags = default_args['update_flags']
+
+        if set_data is None:
+            set_data = default_args['set_data']
+
+        if set_dim is None:
+            set_dim = default_args['set_dim']
+
+        if set_stride is None:
+            set_stride = default_args['set_stride']
+
+        if strides_mul is None:
+            strides_mul = default_args['strides_mul']
+
+        if c_prefix is None:
+            c_prefix = default_args['c_prefix']
+
+        if view_name is None:
+            view_name = default_args['view_name']
+
+        #update_flags may depend on view_name
+        update_flags = update_flags % locals()
+
+        #
+        # two arrays are created in C code:
+        # is_slice: len == ndim, 0 means int, 1 means slice
+        # subtensor_spec: len = n_ints + 3 * n_slices
+        #
+        fail = sub['fail']
+        init_cmds = []  # initialization for subtensor_spec
+        is_slice = []
+        # TODO: change that, it might lead to unexpected results,
+        # see assembla-#767
+        NONE_CODE = maxsize - 1
+
+        pos = [0, 1]  # annoying version of global variable for init_entry
+
+        def inc_spec_pos(amt):
+            pos[0] += amt
+
+        def inc_input_pos(amt):
+            pos[1] += amt
+
+        def spec_pos():
+            return pos[0]
+
+        def input_pos():
+            return pos[1]
+
+        def init_entry(entry, depth=0):
+            if isinstance(entry, (numpy.integer, int)):
+                init_cmds.append(
+                        "subtensor_spec[%i] = %i;" % (spec_pos(),
+                            entry))
+                inc_spec_pos(1)
+                if depth == 0:
+                    is_slice.append(0)
+            elif isinstance(entry, Type):
+                init_cmds.append(
+                        "subtensor_spec[%i] = %s;" % (spec_pos(),
+                            inputs[input_pos()]))
+                inc_spec_pos(1)
+                inc_input_pos(1)
+                if depth == 0:
+                    is_slice.append(0)
+            elif entry is None:
+                init_cmds.append(
+                        "subtensor_spec[%i] = %i;" % (spec_pos(),
+                            NONE_CODE))
+                inc_spec_pos(1)
+                if depth == 0:
+                    is_slice.append(0)
+            elif depth == 0 and isinstance(entry, slice):
+                init_entry(entry.start, depth + 1)
+                init_entry(entry.stop, depth + 1)
+                init_entry(entry.step, depth + 1)
+                is_slice.append(1)
+            else:
+                assert 0, entry
+
+        for entry in idx_list:
+            init_entry(entry)
+        # make sure we used all inputs
+        assert input_pos() == len(inputs), input_pos()
+        assert len(is_slice) <= node.inputs[0].ndim, node.inputs[0].ndim
+
+        len_is_slice = len(is_slice)
+
+        len_subtensor_spec = spec_pos()
+
+        is_slice_init = ",".join([str(s) for s in is_slice])
+        subtensor_init = "\n".join(init_cmds)
+
+        x, = inputs[:1]
+        z, = outputs
+
+        xview = view_name
+
+        rval = """
+        #define PyArray_set_dim(obj, idx, d) PyArray_DIMS(obj)[idx]=d
+        #define PyArray_set_stride(obj, idx, d) PyArray_STRIDES(obj)[idx]=d
+        #define PyArray_set_data(obj, ptr, base) PyArray_BYTES(obj)=ptr
+
+        // The subtensor is created by iterating over the dimensions
+        // and updating stride, shape, and data pointers
+
+        int is_slice[] = {%(is_slice_init)s};
+        npy_intp subtensor_spec[%(len_subtensor_spec)s];
+        %(subtensor_init)s;
+        int spec_pos = 0; //position in subtensor_spec
+        int inner_ii = 0; // the current dimension of zview
+        int outer_ii = 0; // current dimension of z
+
+        char* ptr = (char*) %(c_prefix)s_BYTES(%(xview)s);
+
+        if ((%(c_prefix)s_DIMS(%(xview)s) == %(c_prefix)s_DIMS(%(x)s))
+            && (%(c_prefix)s_DIMS(%(x)s) != NULL))
+        {
+            PyErr_Format(PyExc_ValueError, "x and %(xview)s"
+                         "(with %%d dims) have the same dimensions"
+                         " pointers: %%p and %%p",
+                         %(c_prefix)s_NDIM(%(x)s),
+                         %(c_prefix)s_DIMS(%(xview)s),
+                         %(c_prefix)s_DIMS(%(x)s));
+            Py_XDECREF(%(xview)s);
+            %(fail)s;
+        }
+        if (%(c_prefix)s_STRIDES(%(xview)s) == %(c_prefix)s_STRIDES(%(x)s)
+            && (%(c_prefix)s_DIMS(%(x)s) != NULL))
+        {
+            PyErr_Format(PyExc_ValueError, "x and %(xview)s"
+                         "(with %%d dims) have the same strides"
+                         " pointers: %%p and %%p",
+                         %(c_prefix)s_NDIM(%(x)s),
+                         %(c_prefix)s_STRIDES(%(xview)s),
+                         %(c_prefix)s_STRIDES(%(x)s));
+            Py_XDECREF(%(xview)s);
+            %(fail)s;
+        }
+
+        for (; outer_ii < %(len_is_slice)s; ++outer_ii)
+        {
+            if (is_slice[outer_ii])
+            {
+                npy_intp length = %(c_prefix)s_DIMS(%(x)s)[outer_ii];
+                npy_intp slicelength;
+                npy_intp start = subtensor_spec[spec_pos+0];
+                npy_intp stop  = subtensor_spec[spec_pos+1];
+                npy_intp step  = subtensor_spec[spec_pos+2];
+                if (step == %(NONE_CODE)s) step = 1;
+
+                npy_intp defstart = step < 0 ? length-1 : 0;
+                npy_intp defstop = step < 0 ? -1 : length;
+
+                // logic adapted from
+                // PySlice_GetIndicesEx in python source
+                if (!step)
+                {
+                    Py_DECREF(%(xview)s);
+                    PyErr_Format(PyExc_ValueError,
+                                 "slice step cannot be zero");
+                    Py_XDECREF(%(xview)s);
+                    %(fail)s;
+                }
+
+                if (start == %(NONE_CODE)s)
+                {
+                    start = defstart;
+                }
+                else
+                {
+                    if (start < 0) start += length;
+                    if (start < 0) start = (step < 0) ? -1 : 0;
+                    if (start >= length)
+                        start = (step < 0) ? length - 1 : length;
+                }
+
+                if (stop == %(NONE_CODE)s)
+                {
+                    stop = defstop;
+                }
+                else
+                {
+                    if (stop < 0) stop += length;
+                    if (stop < 0) stop = (step < 0) ? -1 : 0;
+                    if (stop >= length)
+                        stop = (step < 0) ? length - 1 : length;
+                }
+
+                if ((step < 0 && stop >= start)
+                    || (step > 0 && start >= stop)) {
+                    slicelength = 0;
+                }
+                else if (step < 0) {
+                    slicelength = (stop-start+1)/step+1;
+                }
+                else {
+                    slicelength = (stop-start-1)/step+1;
+                }
+
+                if (0){
+                    fprintf(stdout, "start %%zi\\n", start);
+                    fprintf(stdout, "stop %%zi\\n", stop);
+                    fprintf(stdout, "step %%zi\\n", step);
+                    fprintf(stdout, "length %%zi\\n", length);
+                    fprintf(stdout, "slicelength %%zi\\n", slicelength);
+                }
+
+                assert (slicelength <= length);
+
+                ptr += %(c_prefix)s_STRIDES(%(x)s)[outer_ii] * start *
+                       %(strides_mul)s;
+                %(set_dim)s(%(xview)s, inner_ii, slicelength);
+                %(set_stride)s(%(xview)s, inner_ii,
+                               %(c_prefix)s_STRIDES(%(x)s)[outer_ii] * step);
+
+                inner_ii += 1;
+                spec_pos += 3;
+            }
+            else // tuple coord `outer_ii` is an int
+            {
+                int idx = subtensor_spec[spec_pos];
+                if (idx < 0) idx += %(c_prefix)s_DIMS(%(x)s)[outer_ii];
+                if (idx >= 0)
+                {
+                    if (idx < %(c_prefix)s_DIMS(%(x)s)[outer_ii])
+                    {
+                        ptr += %(c_prefix)s_STRIDES(%(x)s)[outer_ii] * idx *
+                               %(strides_mul)s;
+                    }
+                    else
+                    {
+                        PyErr_Format(PyExc_IndexError,"index out of bounds");
+                        Py_XDECREF(%(xview)s);
+                        %(fail)s;
+                    }
+                }
+                else
+                {
+                    PyErr_Format(PyExc_IndexError,"index out of bounds");
+                    Py_XDECREF(%(xview)s);
+                    %(fail)s;
+                }
+
+                spec_pos += 1;
+            }
+        }
+        %(set_data)s(%(xview)s, ptr, (PyObject*)NULL);
+        assert (inner_ii <= %(c_prefix)s_NDIM(%(xview)s));
+        while (inner_ii < %(c_prefix)s_NDIM(%(xview)s))
+        {
+            assert (outer_ii < %(c_prefix)s_NDIM(%(x)s));
+            %(set_dim)s(%(xview)s, inner_ii,
+                        %(c_prefix)s_DIMS(%(x)s)[outer_ii]);
+            %(set_stride)s(%(xview)s, inner_ii,
+                           %(c_prefix)s_STRIDES(%(x)s)[outer_ii]);
+            inner_ii += 1;
+            outer_ii += 1;
+        }
+        %(update_flags)s
+        """ % locals()
+        # print rval
+        return rval
+
+    @staticmethod
+    def helper_c_code_cache_version():
+        return (5,)
+
+    def c_code(self, node, name, inputs, outputs, sub):  # DEBUG
+        if not isinstance(node.inputs[0].type, theano.tensor.TensorType):
+            raise NotImplementedError()
+
+        x = inputs[0]
+        z, = outputs
+        view_ndim = node.outputs[0].ndim
+        fail = sub['fail']
+
+        build_view = """
+        //TODO: give this Op a second output so that this view can be cached
+        //TODO: alternatively, fix the memory leak on failure
+        Py_INCREF(PyArray_DESCR(%(x)s));
+        PyArrayObject * xview = (PyArrayObject*)PyArray_NewFromDescr(
+                &PyArray_Type,
+                PyArray_DESCR(%(x)s),
+                %(view_ndim)s,
+                PyArray_DIMS(%(x)s),
+                PyArray_STRIDES(%(x)s),
+                PyArray_DATA(%(x)s),
+                %(x)s->flags,
+                NULL);
+        if (!xview)
+        {
+            %(fail)s;
+        }
+        """ % locals()
+        get_xview = self.helper_c_code(node, name, inputs, outputs, sub,
+                self.idx_list)
+
+        finish_view = """
+        if (%(z)s) Py_DECREF(%(z)s);
+        Py_INCREF(py_%(x)s);
+        PyArray_BASE(xview) = py_%(x)s;
+        assert(py_%(x)s == (PyObject*)%(x)s);
+        %(z)s = xview;
+        """ % locals()
+
+        return build_view + "{" + get_xview + "}" + finish_view
+
+    def c_code_cache_version(self):
+        hv = self.helper_c_code_cache_version()
+        # If `helper_c_code_cache_version` is not versioned we do not want to
+        # have a versioned version of this op's C code.
+        if len(hv) == 0:
+            return ()
+        return (2, hv)
+
+    def R_op(self, inputs, eval_points):
+        # Subtensor is not differentiable wrt to its indices, therefore we
+        # do not even need to consider the eval_points provided for those
+        # (they should be defaulted to zeros_like by the global R_op)
+        if eval_points[0] is None:
+            return [None]
+        return self.make_node(eval_points[0], *inputs[1:]).outputs
+
+
+class SubtensorPrinter:
+
+    def process(self, r, pstate):
+        if r.owner is None:
+            raise TypeError("Can only print Subtensor.")
+        elif isinstance(r.owner.op, Subtensor):
+            idxs = r.owner.op.idx_list
+            inputs = list(r.owner.inputs)
+            input = inputs.pop()
+            sidxs = []
+            inbrack_pstate = pstate.clone(precedence=-1000)
+            for entry in idxs:
+                if isinstance(entry, int):
+                    sidxs.append(str(entry))
+                elif isinstance(entry, scal.Scalar):
+                    sidxs.append(inbrack_pstate.pprinter.process(inputs.pop()))
+                elif isinstance(entry, slice):
+                    if entry.start is None or entry.start == 0:
+                        msg1 = ""
+                    else:
+                        msg1 = entry.start
+
+                    if entry.stop is None or entry.stop == maxsize:
+                        msg2 = ""
+                    else:
+                        msg2 = entry.stop
+
+                    if entry.step is None:
+                        msg3 = ""
+                    else:
+                        msg3 = ":%s" % entry.step
+
+                    sidxs.append("%s:%s%s" % (msg1, msg2, msg3))
+            return "%s[%s]" % (pstate.pprinter.process(
+                input,
+                pstate.clone(precedence=1000)),
+                ", ".join(sidxs))
+        else:
+            raise TypeError("Can only print Subtensor.")
+
+pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, Subtensor),
+        SubtensorPrinter())
+
+
+def set_subtensor(x, y, inplace=False,
+        tolerate_inplace_aliasing=False):
+    """Return x with the given subtensor overwritten by y.
+
+    Example: To replicate the numpy expression "r[10:] = 5", type
+
+    >>> new_r = set_subtensor(r[10:], 5)
+
+    :param x: symbolic variable for the lvalue of = operation
+    :param y: symbolic variable for the rvalue of = operation
+    :param tolerate_inplace_aliasing: see inc_subtensor for documentation.
+    """
+    return inc_subtensor(x, y, inplace, set_instead_of_inc=True,
+            tolerate_inplace_aliasing=tolerate_inplace_aliasing)
+
+
+def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,
+        tolerate_inplace_aliasing=False):
+    """Return x with the given subtensor incremented by y.
+
+    :param x: the symbolic result of a Subtensor operation.
+    :param y: the amount by which to increment ths subtensor in question
+    :param tolerate_inplace_aliasing: allow x and y to be views of a single
+        underlying array even while working inplace.  For correct results,
+        x and y must not be overlapping views; if they overlap, the result
+        of this Op will generally be incorrect. This value has no effect if
+        inplace=False.
+
+    Example: To replicate the numpy expression "r[10:] += 5", type
+
+    >>> new_r = inc_subtensor(r[10:], 5)
+    """
+    # First of all, y cannot have a higher dimension than x,
+    # nor have non-broadcastable dimensions where x is broadcastable.
+
+    x = theano.tensor.as_tensor_variable(x)
+    y = theano.tensor.as_tensor_variable(y)
+
+    if y.ndim > x.ndim:
+        raise TypeError(("Trying to increment a %d-dimensional "
+            "subtensor with a %d-dimensional value.") % (x.ndim, y.ndim))
+
+    for dim in range(y.ndim):
+        dim_offset = x.ndim - y.ndim
+        if (x.broadcastable[dim + dim_offset]
+                and not y.broadcastable[dim]):
+            # It is acceptable to try to increment a subtensor with a
+            # broadcastable dim with a tensor that is not broadcastable
+            # on that dimension. However, its length must then be 1.
+            # We insert a Rebroadcast Op to make sure it is the case.
+            y = addbroadcast(y, dim)
+
+    if not x.owner:
+        raise TypeError('x must be the result of a subtensor operation')
+
+    # retrieve idx_list from x.owner
+    if isinstance(x.owner.op, Subtensor):
+        if tolerate_inplace_aliasing:
+            destroyhandler_tolerate_aliased = [[0, 1]]
+        else:
+            destroyhandler_tolerate_aliased = []
+        the_op = IncSubtensor(x.owner.op.idx_list, inplace, set_instead_of_inc,
+                destroyhandler_tolerate_aliased=destroyhandler_tolerate_aliased
+                )
+        real_x = x.owner.inputs[0]
+        real_idxargs = x.owner.inputs[1:]
+        return the_op(real_x, y, *real_idxargs)
+    elif isinstance(x.owner.op, AdvancedSubtensor1):
+        real_x = x.owner.inputs[0]
+        ilist = x.owner.inputs[1]
+        the_op = AdvancedIncSubtensor1(inplace,
+                                       set_instead_of_inc=set_instead_of_inc)
+        return the_op(real_x, y, ilist)
+    elif isinstance(x.owner.op, AdvancedSubtensor):
+        real_x = x.owner.inputs[0]
+        ilist = x.owner.inputs[1:]
+
+        the_op = AdvancedIncSubtensor(inplace,
+                                      set_instead_of_inc=set_instead_of_inc)
+        return the_op(real_x, y, *ilist)
+    elif isinstance(x.owner.op, DimShuffle):
+        inner_x = x.owner.inputs[0]
+        # In the dimshuffle case, there are in fact two dimshuffles:
+        # one to make the indexed dimension the last one,
+        # and one to put it back where it was. So, in the case where we have
+        # inc_subtensor(x[:,i], y), the graph is actually
+        # inc_subtensor((x.T)[i].T, y).
+        # We could get all the way to x, and then get rid of the dimshuffles
+        # completely, but the problem is that advanced_inc_subtensor1 can only
+        # work on the first (outer-most, left-most) dimension of x,
+        # just like advanced_subtensor1.
+        # So we call advanced_inc_subtensor1(x.T, i, y), but then we need to
+        # return something that has the same shape as x, not as x.T (inner_x).
+        # So re-apply the outer dimshuffle on the new inc_subtensor,
+        # and return advanced_inc_subtensor1(x.T, i, y).T.
+        inner_incsubtensor = inc_subtensor(inner_x, y,
+                inplace=inplace,
+                set_instead_of_inc=set_instead_of_inc,
+                tolerate_inplace_aliasing=tolerate_inplace_aliasing)
+        return x.owner.op(inner_incsubtensor, *x.owner.inputs[1:])
+    elif isinstance(x.owner.op, theano.tensor.Reshape):
+        inner_x = x.owner.inputs[0]
+        # Try to apply inc_subtensor on inner_x.
+        # If it works, there is no need to reshape, as the inc_subtensor
+        # will have the same shape as inner_x, which is what we want.
+        inner_incsubtensor = inc_subtensor(inner_x, y.flatten(),
+                inplace=inplace,
+                set_instead_of_inc=set_instead_of_inc,
+                tolerate_inplace_aliasing=tolerate_inplace_aliasing)
+        return inner_incsubtensor
+    else:
+        raise TypeError('x must be the result of a subtensor operation')
+
+
+class IncSubtensor(Op):
+    """Increment a subtensor.
+
+    This is like numpy's
+
+        x[i,j,k] += y
+
+    It is used internally to implement the gradient on SubTensor.
+
+    :param set_instead_of_inc: if True set the subtensor to the value instead
+    of incrementing it by that value.
+    """
+
+    def __init__(self, idx_list, inplace=False, set_instead_of_inc=False,
+            destroyhandler_tolerate_aliased=None):
+        if destroyhandler_tolerate_aliased is None:
+            destroyhandler_tolerate_aliased = []
+        self.idx_list = map(Subtensor.convert, idx_list)
+        self.inplace = inplace
+        if inplace:
+            self.destroy_map = {0: [0]}
+        self.destroyhandler_tolerate_aliased = list(
+                destroyhandler_tolerate_aliased)
+        self.set_instead_of_inc = set_instead_of_inc
+
+    def __eq__(self, other):
+        return type(self) == type(other) \
+                and self.idx_list == other.idx_list \
+                and self.inplace == other.inplace \
+                and self.set_instead_of_inc == other.set_instead_of_inc
+
+    def __hash__(self):
+        msg = []
+        for entry in self.idx_list:
+            if isinstance(entry, slice):
+                msg += [(entry.start, entry.stop, entry.step)]
+            else:
+                msg += [entry]
+
+        idx_list = tuple(msg)
+        # backport
+        #idx_list = tuple((entry.start, entry.stop, entry.step)
+        #                 if isinstance(entry, slice)
+        #                 else entry
+        #                 for entry in self.idx_list)
+        return hashtype(self) ^ hash(idx_list) ^ hash(self.inplace) \
+                        ^ hash(self.set_instead_of_inc)
+
+    def __str__(self):
+        indices = []
+        for entry in self.idx_list:
+            if isinstance(entry, slice):
+                indices.append(Subtensor.str_from_slice(entry))
+            else:
+                indices.append(str(entry))
+        if self.inplace:
+            msg = 'Inplace'
+        else:
+            msg = ''
+        if not self.set_instead_of_inc:
+            msg += 'Inc'
+        else:
+            msg += 'Set'
+        return  "%s{%s;%s}" % (
+                self.__class__.__name__,
+                msg,
+                ", ".join(indices))
+
+    def make_node(self, x, y, *inputs):
+        """
+            x: the tensor to increment
+            y: the value to increment by
+            inputs: TODO WRITEME
+        """
+        x, y = map(theano.tensor.as_tensor_variable, [x, y])
+        if y.ndim > x.ndim:
+            raise ValueError(("Trying to increment a %d-dimensional "
+                "subtensor with a %d-dimensional value.") % (x.ndim,
+                    y.ndim))
+        inputs = tuple(map(Subtensor.my_as_scalar, inputs))
+
+        idx_list = list(self.idx_list)
+        if len(idx_list) > x.type.ndim:
+            exception = ValueError(
+                    Subtensor.e_invalid % (
+                        len(idx_list),
+                        x.type.ndim))
+            exception.subtensor_invalid = True
+            raise exception
+
+        input_types = Subtensor.collapse(idx_list,
+                lambda entry: isinstance(entry, gof.Type))
+        if len(inputs) != len(input_types):
+            raise IndexError(
+                    "Not enough inputs to fill in the Subtensor template.",
+                    inputs, idx_list)
+        for input, expected_type in izip(inputs, input_types):
+            if input.type != expected_type:
+                raise TypeError(
+                    "Wrong type for Subtensor template. Expected %s, got %s."
+                    % (input.type, expected_type))
+
+        return gof.Apply(self,
+                         (x, y) + inputs,
+                         [x.type()])
+
+    def perform(self, node, inputs, out_):
+        out, = out_
+        x, y = inputs[:2]
+        indices = list(reversed(inputs[2:]))
+
+        def convert(entry):
+            if isinstance(entry, gof.Type):
+                rval = indices.pop()
+                if sys.version_info < (2, 5):
+                    # Before Python 2.5, PySlice_GetIndicesEx requires
+                    # Python int to be passed.
+                    rval_ = int(rval)
+                    if rval_ != rval:
+                        raise IndexError((
+                            "Invalid value for indexing: %s. "
+                            "That value may be too big.") % rval)
+                    return rval_
+                return rval
+            elif isinstance(entry, slice):
+                return slice(convert(entry.start),
+                             convert(entry.stop),
+                             convert(entry.step))
+            else:
+                return entry
+
+        cdata = tuple(map(convert, self.idx_list))
+        if len(cdata) == 1:
+            cdata = cdata[0]
+        if not self.inplace:
+            x = x.copy()
+        sub_x = x.__getitem__(cdata)
+        if sub_x.shape:
+            # we've sliced out an N-D tensor with N > 0
+            if not self.set_instead_of_inc:
+                sub_x += y
+            else:
+                #sub_x += -sub_x + y
+                x.__setitem__(cdata, y)
+        else:
+            # scalar case
+            if not self.set_instead_of_inc:
+                x.__setitem__(cdata, sub_x + y)
+            else:
+                x.__setitem__(cdata, y)
+        out[0] = x
+
+    def c_code(self, node, name, inputs, outputs, sub):
+
+        # This method delegates much of the work to helper
+        # methods. This method implements the main logic
+        # but subclasses may override the helper methods
+        # to change the particulars, e.g. GpuIncSubtensor
+        # turns the view/copy operations on numpy arrays
+        # into the same operations on cuda arrays.
+
+        self.do_type_checking(node)
+
+        if self.inplace:  # convert bool to int
+            inplace = 1
+        else:
+            inplace = 0
+        x = inputs[0]
+        y = inputs[1]
+        z, = outputs
+        if self.set_instead_of_inc:  # convert bool to int
+            op_is_set = 1
+        else:
+            op_is_set = 0
+        fail = sub['fail']
+        view_ndim = (node.inputs[0].ndim -
+                     numpy.sum([not isinstance(idx, slice)
+                                for idx in self.idx_list]))
+
+        copy_of_x = self.copy_of_x(x)
+
+        copy_input_if_necessary = """
+        if (%(inplace)s)
+        {
+            if (%(x)s != %(z)s)
+            {
+                Py_XDECREF(%(z)s);
+                Py_INCREF(%(x)s);
+                %(z)s = %(x)s;
+            }
+        }
+        else
+        {
+            if (%(z)s) Py_DECREF(%(z)s);
+            %(z)s = %(copy_of_x)s;
+        }
+        """ % locals()
+
+        alloc_zview = self.make_view_array(z, view_ndim)
+        # On GPU, it takes two steps to make a view
+        link_zview = self.link_view_array(z, fail)
+
+        #Make a first view on the output, as we will write into it.
+        build_view = """
+        //TODO: give this Op a second output so that this view can be cached
+        //TODO: alternatively, fix the memory leak on failure
+        %(alloc_zview)s;
+        if (!zview)
+        {
+            %(fail)s;
+        }
+        %(link_zview)s;
+        """ % locals()
+        # make zview actually a view of %(z)s
+        helper_args = self.get_helper_c_code_args()
+        helper_args['view_name'] = 'zview'
+        get_zview = self.define_set_data() + \
+                Subtensor.helper_c_code(
+                node=node,
+                name=name,
+                inputs=outputs[:1] + inputs[2:],
+                outputs=outputs,
+                sub=sub,
+                idx_list=self.idx_list,
+                ** helper_args
+                )
+
+        copy_into = self.copy_into("zview", y)
+
+        add_to_zview = self.add_to_zview(y, fail)
+
+        make_modification = """
+        if (%(op_is_set)s)
+        {
+            if (%(copy_into)s) // does broadcasting
+            {
+                Py_DECREF(zview);
+                %(fail)s;
+            }
+        }
+        else
+        {
+            %(add_to_zview)s
+        }
+        """ % locals()
+
+        return (copy_input_if_necessary
+                + build_view
+                + "{" + get_zview + "}"
+                + make_modification
+                + "Py_DECREF(zview);"
+                )
+
+    def do_type_checking(self, node):
+        """ Should raise NotImplementedError if c_code does not support
+        the types involved in this node.
+        """
+
+        if not isinstance(node.inputs[0].type, theano.tensor.TensorType):
+            raise NotImplementedError()
+
+    def c_code_cache_version(self):
+        hv = Subtensor.helper_c_code_cache_version()
+        if hv:
+            return (1, hv)
+        else:
+            return ()
+
+    def copy_of_x(self, x):
+        """
+            :param x: a string giving the name of a C variable
+                pointing to an array
+
+            :return: C code expression to make a copy of x
+
+            Base class uses PyArrayObject *, subclasses may override for
+            different types of arrays.
+        """
+        # Parameters of PyArrary_FromAny are:
+        # array
+        # dtype: we pass NULL to say any dtype is acceptable, so the existing
+        #        dtype will be copied
+        # min_depth: we pass 0 to have this parameter ignored
+        # max_depth: we pass 0 to have this parameter ignored
+        # requirements: here we pass NPY_ARRAY_ENSURECOPY to force a copy
+        # context: this is almost always NULL, I'm not sure what it's used for
+        return """(PyArrayObject*)PyArray_FromAny(py_%(x)s, NULL, 0, 0,
+                NPY_ARRAY_ENSURECOPY, NULL)""" % locals()
+
+    def make_view_array(self, x, view_ndim):
+        """
+            :param x: a string identifying an array to be viewed
+            :param view_ndim: a string specifying the number of dimensions
+                to have in the view
+
+            This doesn't need to actually set up the view with the
+            right indexing; we'll do that manually later.
+        """
+
+        return """Py_INCREF(PyArray_DESCR(%(x)s));
+        PyArrayObject * zview =
+                (PyArrayObject*)PyArray_NewFromDescr(
+                &PyArray_Type,
+                PyArray_DESCR(%(x)s),
+                %(view_ndim)s,
+                PyArray_DIMS(%(x)s),
+                PyArray_STRIDES(%(x)s),
+                PyArray_DATA(%(x)s),
+                %(x)s->flags,
+                NULL)""" % locals()
+
+    def get_helper_c_code_args(self):
+        """ Return a dictionary of arguments to pass to helper_c_code."""
+        return Subtensor.default_helper_c_code_args()
+
+    def copy_into(self, view, source):
+        """
+            view: string, C code expression for an array
+            source: string, C code expression for an array
+
+            returns a C code expression to copy source into view, and
+            return 0 on success
+        """
+        return """PyArray_CopyInto(%(view)s, %(source)s)""" % locals()
+
+    def define_set_data(self):
+        """ Returns C code used to define any macros used in the
+        set data argument to the helper C code. """
+        return ""
+
+    def link_view_array(self, x, fail):
+        """ Returns code to complete making zview a view of x"""
+
+        # On CPU there is nothing to do, make_view_array already did this
+        return ""
+
+    def set_view_base(self, x, fail):
+        """ Returns code to make zview be a correct view of x,
+        after helper_c_code is done messing with x"""
+
+        # On CPU there is nothing to do
+        return ""
+
+    def add_to_zview(self, x, fail):
+        """ Return C code to add x to zview. Should DECREF zview if the
+        add fails."""
+
+        return """
+            PyArrayObject * add_rval = (PyArrayObject*)PyNumber_InPlaceAdd(
+                    (PyObject*)zview, py_%(x)s);
+            if (add_rval)
+            {
+                assert (PyArray_Check((PyObject*)add_rval));
+                assert (PyArray_DATA(add_rval) == PyArray_DATA(zview));
+                Py_DECREF(add_rval);
+            }
+            else
+            {
+                Py_DECREF(zview);
+                %(fail)s;
+            }""" % locals()
+
+    def infer_shape(self, node, shapes):
+        return [shapes[0]]
+
+    def R_op(self, inputs, eval_points):
+        if eval_points[0] is None or eval_points[1] is None:
+            return [None]
+        # Again we ignore eval points for indices because incsubtensor is
+        # not differentiable wrt to those
+        return self.make_node(eval_points[0], eval_points[1],
+                            *inputs[2:]).outputs
+
+    def connection_pattern(self, node):
+
+        rval = [[True], [True]]
+
+        for ipt in node.inputs[2:]:
+            rval.append([False])
+
+        return rval
+
+    def grad(self, inputs, grads):
+        g_output, = grads
+        x, y = inputs[:2]
+        idx_list = inputs[2:]
+
+        if self.set_instead_of_inc:
+            gx = set_subtensor(
+                Subtensor(idx_list=self.idx_list)(g_output, *idx_list),
+                theano.tensor.zeros_like(y))
+        else:
+            gx = g_output
+        gy = Subtensor(idx_list=self.idx_list)(g_output, *idx_list)
+
+        return [gx, gy] + [DisconnectedType()()] * len(idx_list)
+
+
+#########################
+# Advanced indexing
+#########################
+#
+# Should reproduce numpy's behaviour, see url:
+# docs.scipy.org/doc/numpy/reference/arrays.indexing.html#advanced-indexing
+
+
+class AdvancedSubtensor1(Op):
+    """Implement x[ilist] where ilist is a vector of integers."""
+
+    def __init__(self, sparse_grad=False):
+        self.sparse_grad = sparse_grad
+
+    def __hash__(self):
+        return hash(type(self))
+
+    def __eq__(self, other):
+        # Don't check the sparse_grad attribute as
+        # This don't change the output of this op
+        # So we want the merge optimier to merge two op
+        # that differ from there sparse_grad attribute.
+        return type(self) == type(other)
+
+    def __str__(self):
+        return self.__class__.__name__
+
+    def make_node(self, x, ilist):
+        x_ = theano.tensor.as_tensor_variable(x)
+        ilist_ = theano.tensor.as_tensor_variable(ilist)
+        if ilist_.type.dtype[:3] not in ('int', 'uin'):
+            raise TypeError('index must be integers')
+        if ilist_.type.ndim != 1:
+            raise TypeError('index must be vector')
+        if x_.type.ndim == 0:
+            raise TypeError('cannot index into a scalar')
+        return Apply(self, [x_, ilist_], [x_.type()])
+
+    def perform(self, node, inp, out_):
+        x, i = inp
+        out, = out_
+        # Copy always implied by numpy advanced indexing semantic.
+        if out[0] is not None and out[0].shape == (len(i),) + x.shape[1:]:
+            o = out[0]
+        else:
+            o = None
+
+        # If i.dtype is more precise than numpy.intp (int32 on 32-bit machines,
+        # int64 on 64-bit machines), numpy may raise the following error:
+        # TypeError: array cannot be safely cast to required type.
+        # We need to check if values in i can fit in numpy.intp, because
+        # if they don't, that should be an error (no array can have that
+        # many elements on a 32-bit arch).
+        if i.dtype != numpy.intp:
+            i_ = theano._asarray(i, dtype=numpy.intp)
+            if not numpy.can_cast(i.dtype, numpy.intp):
+                # Check if there was actually an incorrect conversion
+                if numpy.any(i != i_):
+                    raise IndexError('index contains values that are bigger '
+                            'than the maximum array size on this system.', i)
+            i = i_
+
+        out[0] = x.take(i, axis=0, out=o)
+
+    def connection_pattern(self, node):
+        rval = [[True]]
+
+        for ipt in node.inputs[1:]:
+            rval.append([False])
+
+        return rval
+
+    def grad(self, inputs, grads):
+        global sparse_module_ref
+        x, ilist = inputs
+        gz, = grads
+        assert len(inputs) == 2
+        sparse = False
+        if getattr(x.type, 'sparse_grad', False):
+            sparse = True
+            warnings.warn(
+                "DEPRECATION WARNING: AdvancedSubtensor1, you are using"
+                " an old interface to the sparse grad. You should use"
+                " theano.sparse_grad(a_tensor[an_int_vector]). ")
+        if sparse or self.sparse_grad:
+            if x.type.ndim != 2:
+                raise TypeError(
+                    "AdvancedSubtensor1: you can't take the sparse grad"
+                    " from a tensor with ndim != 2. ndim is " +
+                    str(x.type.ndim))
+            if sparse_module_ref is None:
+                import theano.sparse as sparse_module_ref
+
+            rval1 = [sparse_module_ref.construct_sparse_from_list(x, gz,
+                                                                  ilist)]
+        else:
+            rval1 = [advanced_inc_subtensor1(x.zeros_like(), gz, ilist)]
+        return rval1 + [DisconnectedType()()] * (len(inputs) - 1)
+
+    def R_op(self, inputs, eval_points):
+        if eval_points[0] is None:
+            return [None]
+        return self.make_node(eval_points[0], *inputs[1:]).outputs
+
+    def infer_shape(self, node, ishapes):
+        x, ilist = ishapes
+        return [ilist + x[1:]]
+
+    def c_support_code(self):
+        # In some versions of numpy, NPY_MIN_INTP is defined as MIN_LONG,
+        # which is not defined. It should be NPY_MIN_LONG instead in that case.
+        return dedent("""\
+                #ifndef MIN_LONG
+                #define MIN_LONG NPY_MIN_LONG
+                #endif""")
+
+    def c_code(self, node, name, input_names, output_names, sub):
+        if self.__class__ is not AdvancedSubtensor1:
+            raise MethodNotDefined(
+                "c_code defined for AdvancedSubtensor1,"
+                " not for child class", type(self))
+        a_name, i_name = input_names[0], input_names[1]
+        output_name = output_names[0]
+        fail = sub['fail']
+        return """
+            PyObject *indices;
+            int i_type = PyArray_TYPE(%(i_name)s);
+            if (i_type != NPY_INTP) {
+                // Cast %(i_name)s to NPY_INTP (expected by PyArray_TakeFrom),
+                // if all values fit.
+                if (!PyArray_CanCastSafely(i_type, NPY_INTP)) {
+                    npy_int64 min_val, max_val;
+                    PyObject* py_min_val = PyArray_Min(%(i_name)s, NPY_MAXDIMS,
+                                                       NULL);
+                    if (py_min_val == NULL) {
+                        %(fail)s;
+                    }
+                    min_val = PyLong_AsLongLong(py_min_val);
+                    Py_DECREF(py_min_val);
+                    if (min_val == -1 && PyErr_Occurred()) {
+                        %(fail)s;
+                    }
+                    PyObject* py_max_val = PyArray_Max(%(i_name)s, NPY_MAXDIMS,
+                                                       NULL);
+                    if (py_max_val == NULL) {
+                        %(fail)s;
+                    }
+                    max_val = PyLong_AsLongLong(py_max_val);
+                    Py_DECREF(py_max_val);
+                    if (max_val == -1 && PyErr_Occurred()) {
+                        %(fail)s;
+                    }
+                    if (min_val < NPY_MIN_INTP || max_val > NPY_MAX_INTP) {
+                        PyErr_SetString(PyExc_IndexError,
+                                     "Index contains values "
+                                     "that are bigger than the maximum array "
+                                     "size on this system.");
+                        %(fail)s;
+                    }
+                }
+                indices = PyArray_Cast(%(i_name)s, NPY_INTP);
+                if (indices == NULL) {
+                    %(fail)s;
+                }
+            }
+            else {
+                 indices = (PyObject *)%(i_name)s;
+                 Py_INCREF(indices);
+            }
+            if (%(output_name)s != NULL) {
+                npy_intp nd, i, *shape;
+                nd = PyArray_NDIM(%(a_name)s) + PyArray_NDIM(indices) - 1;
+                if (PyArray_NDIM(%(output_name)s) != nd) {
+                    Py_CLEAR(%(output_name)s);
+                }
+                else {
+                    shape = PyArray_DIMS(%(output_name)s);
+                    for (i = 0; i < PyArray_NDIM(indices); i++) {
+                        if (shape[i] != PyArray_DIMS(indices)[i]) {
+                            Py_CLEAR(%(output_name)s);
+                            break;
+                        }
+                    }
+                    if (%(output_name)s != NULL) {
+                        for (; i < nd; i++) {
+                            if (shape[i] != PyArray_DIMS(%(a_name)s)[
+                                                i-PyArray_NDIM(indices)+1]) {
+                                Py_CLEAR(%(output_name)s);
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
+            %(output_name)s = (PyArrayObject*)PyArray_TakeFrom(
+                        %(a_name)s, indices, 0, %(output_name)s, NPY_RAISE);
+            Py_DECREF(indices);
+            if (%(output_name)s == NULL) %(fail)s;
+        """ % locals()
+
+    def c_code_cache_version(self):
+        return (0, 1, 1)
+
+advanced_subtensor1 = AdvancedSubtensor1()
+
+
+class AdvancedIncSubtensor1(Op):
+    """Increments a subtensor using advanced slicing (list of index)"""
+    def __init__(self, inplace=False, set_instead_of_inc=False):
+        self.inplace = inplace
+        self.set_instead_of_inc = set_instead_of_inc
+        if inplace:
+            self.destroy_map = {0: [0]}
+
+    def __hash__(self):
+        return hash((type(self), self.inplace, self.set_instead_of_inc))
+
+    def __eq__(self, other):
+        return (type(self) == type(other)
+                and self.inplace == other.inplace
+                and self.set_instead_of_inc == other.set_instead_of_inc)
+
+    def __str__(self):
+        if self.inplace:
+            msg = "inplace"
+        else:
+            msg = "no_inplace"
+        if self.set_instead_of_inc:
+            msg += ",set"
+        else:
+            msg += ",inc"
+
+        return self.__class__.__name__ + "{%s}" % msg
+
+    def make_node(self, x, y, ilist):
+        x_ = theano.tensor.as_tensor_variable(x)
+        y_ = theano.tensor.as_tensor_variable(y)
+        ilist_ = theano.tensor.as_tensor_variable(ilist)
+
+        if ilist_.type.dtype[:3] not in ('int', 'uin'):
+            raise TypeError('index must be integers')
+        if ilist_.type.ndim != 1:
+            raise TypeError('index must be vector')
+        if x_.type.ndim == 0:
+            raise TypeError('cannot index into a scalar')
+        if y_.type.ndim > x_.type.ndim:
+            if self.set_instead_of_inc:
+                opname = 'set'
+            else:
+                opname = 'increment'
+            raise TypeError('cannot %s x subtensor with ndim=%s'
+            ' by y with ndim=%s to x subtensor with ndim=%s ' % (
+                opname, x_.type.ndim, y_.type.ndim))
+
+        return Apply(self, [x_, y_, ilist_], [x_.type()])
+
+    def perform(self, node, inp, out_):
+        # TODO opt to make this inplace
+        x, y, idx = inp
+        out, = out_
+        if not self.inplace:
+            x = x.copy()
+        # In Numpy, x[idx] += y doesn't work if the same index is present
+        # many times: it does it only once. Is it a bug? In any case, for
+        # this reason we implement our own 'inc' iteration.
+        if self.set_instead_of_inc:
+            x[idx] = y
+        else:
+            increment = inplace_increment
+            if increment is None:
+                increment = self.inplace_increment1d_slow
+
+            increment(x, idx, y)
+
+        out[0] = x
+
+    def inplace_increment1d_slow(self, x, idx, y):
+        # If `y` has as many dimensions as `x`, then we want to iterate
+        # jointly on `x` and `y`. Otherwise, it means `y` should be
+        # broadcasted to fill all relevant rows of `x`.
+        assert y.ndim <= x.ndim   # Should be guaranteed by `make_node`
+        if y.ndim == x.ndim:
+            assert len(y) == len(idx)
+            for (j, i) in enumerate(idx):
+                x[i] += y[j]
+        else:
+            for i in idx:
+                x[i] += y
+
+    def infer_shape(self, node, ishapes):
+        x, y, ilist = ishapes
+        return [x]
+
+    def R_op(self, inputs, eval_points):
+        if None in eval_points[:2]:
+            return [None]
+        return self.make_node(eval_points[0], eval_points[1],
+                              *inputs[2:]).outputs
+
+    def connection_pattern(self, node):
+
+        rval = [[True], [True], [False]]
+        return rval
+
+    def grad(self, inputs, grads):
+        g_output, = grads
+        x, y = inputs[:2]
+        idx_list = inputs[2:]
+
+        gx = g_output
+        gy = advanced_subtensor1(g_output, *idx_list)
+
+        return [gx, gy] + [DisconnectedType()()] * len(idx_list)
+
+advanced_inc_subtensor1 = AdvancedIncSubtensor1()
+
+
+def as_index_variable(idx):
+    if idx is None:
+        return NoneConst
+    if isinstance(idx, slice):
+        return make_slice(idx)
+    idx = theano.tensor.as_tensor_variable(idx)
+    if idx.type.dtype[:3] not in ('int', 'uin'):
+        raise TypeError('index must be integers')
+    return idx
+
+
+def as_int_none_variable(x):
+    if x is None:
+        return NoneConst
+    x = theano.tensor.as_tensor_variable(x, ndim=0)
+    if x.type.dtype[:3] not in ('int', 'uin'):
+        raise TypeError('index must be integers')
+    return x
+
+
+def adv_index_broadcastable_pattern(a, idx):
+    """
+    This function is only used to determine the broadcast pattern for
+    AdvancedSubtensor output variable.
+
+    For this, we make a fake ndarray and a fake idx and call use ask numpy
+    the output. From this, we find the output broadcast pattern.
+    """
+
+    def replace_slice(v):
+        if isinstance(v, gof.Apply):
+            if len(v.outputs) != 1:
+                raise ValueError(
+                    "It is ambiguous which output of a multi-output Op has"
+                    " to be fetched.", v)
+            else:
+                v = v.outputs[0]
+
+        if NoneConst.equals(v):
+            return None
+        if isinstance(v.type, SliceType):
+            return slice(None, None)
+
+        return numpy.zeros((2,) * v.ndim, int)
+
+    newidx = tuple(map(replace_slice, idx))
+
+    #2 - True = 1; 2 - False = 2
+    fakeshape = [2 - bc for bc in a.broadcastable]
+    retshape = numpy.empty(fakeshape)[newidx].shape
+    return tuple([dim == 1 for dim in retshape])
+
+
+class AdvancedSubtensor(Op):
+    """Return a subtensor copy, using advanced indexing.
+    """
+    # Should be used by __getitem__ and __getslice__, as follow:
+    # AdvancedSubtensor()(self, *args),
+    # if args contains and advanced indexing pattern
+
+    def __eq__(self, other):
+        return self.__class__ == other.__class__
+
+    def __hash__(self):
+        return hash(self.__class__)
+
+    def __str__(self):
+        return self.__class__.__name__
+
+    def make_node(self, x, *index):
+        x = theano.tensor.as_tensor_variable(x)
+
+        index = tuple(map(as_index_variable, index))
+        bcast = adv_index_broadcastable_pattern(x, index)
+        return gof.Apply(self,
+                         (x,) + index,
+                         [theano.tensor.tensor(dtype=x.type.dtype,
+                                 broadcastable=bcast)])
+
+    def R_op(self, inputs, eval_points):
+        if eval_points[0] is None:
+            return [None]
+        return self.make_node(eval_points[0], *inputs[1:]).outputs
+
+    def infer_shape(self, node, ishapes):
+        # Really special case
+        if len(ishapes) == 3:
+            xshp, ind1shp, ind2shp = ishapes
+            if len(xshp) == 2 and len(ind1shp) == 1 and len(ind2shp) == 1:
+                # if the graph is correct, we can assume ind1shp[0] and
+                # ind2shp[0] will have the same value.
+                # Try to return the one closest to the graph input.
+                if node.inputs[2].owner is None:
+                    return [ind2shp]
+                else:
+                    return [ind1shp]
+        # Default case, we don't know
+        return node.fgraph.shape_feature.default_infer_shape(node, ishapes)
+
+    def perform(self, node, inputs, out_):
+        out, = out_
+        # TODO: in general, we need to re-pack the inputs into a valid
+        # index, just like subtensor
+        out[0] = inputs[0].__getitem__(inputs[1:])
+        if (numpy.__version__ <= '1.6.1' and
+                out[0].size != numpy.uint32(out[0].size)):
+            warnings.warn(
+                    'Numpy versions 1.6.1 and below have a bug preventing '
+                    'advanced indexing from correctly filling arrays that '
+                    'are too big (>= 2^32 elements). It is possible that '
+                    'out[0] (%s), with shape %s, is not correctly filled.'
+                    % (out[0], out[0].shape))
+        # return
+        #raise NotImplementedError()
+
+    def connection_pattern(self, node):
+
+        rval = [[True]]
+
+        for ipt in node.inputs[1:]:
+            rval.append([False])
+
+        return rval
+
+    def grad(self, inputs, grads):
+        gz, = grads
+        x = inputs[0]
+        rest = inputs[1:]
+        return [advanced_inc_subtensor(theano.tensor.zeros_like(x), gz,
+                                       *rest)] + \
+            [DisconnectedType()()] * len(rest)
+
+
+class AdvancedIncSubtensor(Op):
+    """Increments a subtensor using advanced indexing.
+
+    :note: We need the numpy.inplace_increment() function currently
+        numpy's PR 326 to be able to make an inplace version of this
+        op.
+
+    """
+
+    def __init__(self, inplace=False, set_instead_of_inc=False):
+        self.inplace = inplace
+        self.set_instead_of_inc = set_instead_of_inc
+        # The assert is needed as in the pass the first argument was
+        # something else that was not used.
+        assert isinstance(inplace, bool)
+        if self.inplace:
+            raise NotImplementedError('In place computation is not'
+                                      ' implemented')
+
+        self.allow_legacy_perform = False
+
+    def __hash__(self):
+        return hash((type(self), self.inplace, self.set_instead_of_inc))
+
+    def __eq__(self, other):
+        return (type(self) == type(other)
+                and self.inplace == other.inplace
+                and self.set_instead_of_inc == other.set_instead_of_inc)
+
+    def __str__(self):
+        return "%s{%s, %s}" % (self.__class__.__name__,
+                "inplace=" + str(self.inplace),
+                " set_instead_of_inc=" + str(self. set_instead_of_inc))
+
+    def make_node(self, x, y, *inputs):
+        x = theano.tensor.as_tensor_variable(x)
+        y = theano.tensor.as_tensor_variable(y)
+
+        op = self
+        # If we are incrementing, but the increment compiled function is not
+        # available, we need to support legacy cases.
+        if not self.set_instead_of_inc and inplace_increment is None:
+            legacy_conditions = False
+            if x.ndim == 2 and y.ndim == 1 and len(inputs) == 2:
+                ind1 = theano.tensor.as_tensor_variable(inputs[0])
+                ind2 = theano.tensor.as_tensor_variable(inputs[1])
+                if ind1.ndim == 1 and ind2.ndim == 1:
+                    if ind1.owner and isinstance(ind1.owner.op, ARange):
+                        legacy_conditions = True
+                    elif isinstance(ind1, Constant):
+                        # Make sure no index is duplicated
+                        val = ind1.value
+                        if numpy.unique(val).size == val.size:
+                            legacy_conditions = True
+                    elif ind2.owner and isinstance(ind2.owner.op, ARange):
+                        legacy_conditions = True
+                    elif isinstance(ind2, Constant):
+                        # Make sure no index is duplicated
+                        val = ind2.value
+                        if numpy.unique(val).size == val.size:
+                            legacy_conditions = True
+            if legacy_conditions:
+                op = copy(self)
+                op.allow_legacy_perform = True
+            else:
+                raise NotImplementedError(
+                        'Could not import inplace_increment, so some advanced '
+                        'indexing features are disabled. They will be '
+                        'available if you update NumPy to version 1.8 or '
+                        'later, or to the latest development version.')
+
+        return gof.Apply(op,
+                        (x, y) + inputs,
+                        [theano.tensor.tensor(dtype=x.type.dtype,
+                            broadcastable=x.type.broadcastable)])
+
+    def perform(self, node, inputs, out_):
+        # TODO: 1. opt to make this in place 2. generalize as described in
+        # AdvancedSubtensor's perform TODO
+
+        out, = out_
+        if not self.inplace:
+            out[0] = inputs[0].copy()
+        else:
+            out[0] = inputs[0]
+
+        if self.set_instead_of_inc:
+            out[0][inputs[2:]] = inputs[1]
+        elif inplace_increment is not None:
+            inplace_increment(out[0], tuple(inputs[2:]), inputs[1])
+        elif self.allow_legacy_perform:
+            out[0][inputs[2:]] += inputs[1]
+        else:
+            raise NotImplementedError(
+                    'Could not import inplace_increment, so some advanced '
+                    'indexing features are disabled. They will be '
+                    'available if you update NumPy to version 1.8 or '
+                    'later, or to the latest development version.')
+
+        if (numpy.__version__ <= '1.6.1' and
+                out[0].size != numpy.uint32(out[0].size)):
+            warnings.warn(
+                    'Numpy versions 1.6.1 and below have a bug preventing '
+                    'advanced indexing from correctly filling arrays that '
+                    'are too big (>= 2^32 elements). It is possible that '
+                    'out[0] (%s), with shape %s, is not correctly filled.'
+                    % (out[0], out[0].shape))
+
+    def infer_shape(self, node, ishapes):
+        return [ishapes[0]]
+
+    def connection_pattern(self, node):
+
+        rval = [[True], [True]]
+
+        for ipt in node.inputs[2:]:
+            rval.append([False])
+
+        return rval
+
+    def grad(self, inpt, output_gradients):
+        x, y = inpt[:2]
+        idxs = inpt[2:]
+        outgrad, = output_gradients
+        d_x_wrt_C = outgrad
+        d_y_wrt_C = AdvancedSubtensor()(outgrad, *idxs)
+        return [d_x_wrt_C, d_y_wrt_C] + \
+            [DisconnectedType()() for _ in idxs]
+
+    def R_op(self, inputs, eval_points):
+        if None in eval_points[:2]:
+            return [None]
+        return self.make_node(eval_points[0], eval_points[1],
+                              *inputs[2:]).outputs
+advanced_inc_subtensor = AdvancedIncSubtensor()
+
+
+def take(a, indices, axis=None, mode='raise'):
+    a = theano.tensor.as_tensor_variable(a)
+    indices = theano.tensor.as_tensor_variable(indices)
+    # Reuse advanced_subtensor1 if indices is a vector
+    if indices.ndim == 1:
+        if mode == 'clip':
+            indices = clip(indices, 0, a.shape[axis] - 1)
+        elif mode == 'wrap':
+            indices = indices % a.shape[axis]
+        if axis is None:
+            return advanced_subtensor1(a.flatten(), indices)
+        elif axis == 0:
+            return advanced_subtensor1(a, indices)
+        else:
+            if axis < 0:
+                axis += a.ndim
+            assert axis >= 0
+            shuffle = range(a.ndim)
+            shuffle[0] = axis
+            shuffle[axis] = 0
+            return advanced_subtensor1(
+                a.dimshuffle(shuffle), indices).dimshuffle(shuffle)
+    if axis is None:
+        shape = indices.shape
+        ndim = indices.ndim
+    else:
+        shape = theano.tensor.concatenate(
+                        [a.shape[:axis], indices.shape, a.shape[axis + 1:]])
+        ndim = a.ndim + indices.ndim - 1
+    return take(a, indices.flatten(), axis, mode).reshape(shape, ndim)
--- a/theano/tensor/tests/test_subtensor.py
+++ b/theano/tensor/tests/test_subtensor.py
+from itertools import izip
+import logging
+import sys
+import unittest
+
+from nose.plugins.skip import SkipTest
+import numpy
+
+import theano
+from theano.compat import exc_message
+from theano.compat.six import StringIO
+from theano.compile import DeepCopyOp
+from theano import config
+from theano import gof
+import theano.scalar as scal
+import theano.tensor as tensor
+from theano.tests import unittest_tools as utt
+from theano.tensor.subtensor import (inc_subtensor, set_subtensor,
+                                     Subtensor, IncSubtensor,
+                                     AdvancedSubtensor1, AdvancedSubtensor,
+                                     advanced_subtensor1, inplace_increment,
+                                     AdvancedIncSubtensor1,
+                                     AdvancedIncSubtensor,
+                                     get_canonical_form_slice)
+from theano.tensor import (as_tensor_variable, _shared,
+                           NotScalarConstantError,
+                           fscalar, iscalar, dscalar, cscalar,
+                           vector, dvector, fvector, lvector,
+                           fmatrix, dmatrix, lmatrix, matrix,
+                           ctensor3, dtensor4)
+from theano.tensor.tests.test_basic import rand, randint_ranged, inplace_func
+
+
+class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
+    """
+    This is build in a way that allow to reuse it to test the
+    equivalent gpu op.
+    """
+    def __init__(self, name, shared=tensor._shared,
+                 sub=tensor.Subtensor,
+                 inc_sub=tensor.IncSubtensor,
+                 adv_sub1=tensor.AdvancedSubtensor1,
+                 adv_incsub1=tensor.AdvancedIncSubtensor1,
+                 mode=None,
+                 dtype=theano.config.floatX,
+                 ignore_topo=DeepCopyOp):
+        self.shared = shared
+        self.sub = sub
+        self.inc_sub = inc_sub
+        self.adv_sub1 = adv_sub1
+        self.adv_incsub1 = adv_incsub1
+        if mode is None:
+            mode = theano.compile.mode.get_default_mode()
+        self.mode = mode
+        self.dtype = dtype
+        self.ignore_topo = ignore_topo
+        self.fast_compile = theano.config.mode == 'FAST_COMPILE'
+        self.ops = (sub, inc_sub, adv_sub1, adv_incsub1)
+        return super(T_subtensor, self).__init__(name)
+
+    def function(self, inputs, outputs, accept_inplace=False,
+                 op=None, mode=None, N=1, N_fast=None):
+        """ wrapper around theano.function that also check the output
+
+        :param N: the number of op expected in the toposort
+                  if tuple of length 2, (expected if fast_compile,
+                                         if not fast_compile)
+        """
+        if self.fast_compile and N_fast is not None:
+            N = N_fast
+        if mode is None:
+            mode = self.mode
+        if op is None:
+            op = self.sub
+
+        f = theano.function(inputs, outputs, mode=mode,
+                            accept_inplace=accept_inplace)
+        self.assertFunctionContainsClassN(f, op, N)
+        return f
+
+    def setUp(self):
+        Subtensor.debug = False
+        utt.seed_rng()
+
+    def eval_output_and_check(self, t, list=False):
+        f = inplace_func([], t, mode=self.mode)
+        topo = f.maker.fgraph.toposort()
+        topo_ = [node for node in topo if not isinstance(node.op,
+             self.ignore_topo)]
+        assert len(topo_) == 1
+        if not list:
+            assert isinstance(topo_[0].op, self.sub)
+        else:
+            assert isinstance(topo_[0].op, self.adv_sub1)
+        tval = f()
+        return tval
+
+    def test0_err_invalid(self):
+        #it is impossible to retrieve a view of a 0-d tensor
+        n = self.shared(numpy.ones((), dtype=self.dtype))
+        try:
+            t = n[0]
+        except ValueError, e:
+            self.assertTrue(hasattr(e, 'subtensor_invalid'))
+            return
+        self.fail()
+
+    def test1_err_bounds(self):
+        n = self.shared(numpy.ones(3, dtype=self.dtype))
+        ctv_backup = config.compute_test_value
+        config.compute_test_value = 'off'
+        try:
+            t = n[7]
+        finally:
+            config.compute_test_value = ctv_backup
+        self.assertTrue(isinstance(t.owner.op, Subtensor))
+        # Silence expected error messages
+        _logger = logging.getLogger('theano.gof.opt')
+        oldlevel = _logger.level
+        _logger.setLevel(logging.CRITICAL)
+        try:
+            try:
+                self.eval_output_and_check(t)
+                assert 0
+            except Exception, e:
+                if 'out of bounds' not in exc_message(e):
+                    raise
+        finally:
+            _logger.setLevel(oldlevel)
+
+    def test1_err_subslice(self):
+        n = self.shared(numpy.ones(3, dtype=self.dtype))
+        try:
+            t = n[slice(0, slice(1, 2, None), None)]
+        except Exception, e:
+            ### Relax constraint on the type of Exception,
+            ### since this might be handled by AvancedSubtensor
+            #if e[0] != Subtensor.e_indextype:
+            #    raise
+            return
+        self.fail()
+
+    def test1_ok_range_finite(self):
+        n = self.shared(numpy.arange(3, dtype=self.dtype))
+        t = n[0:2]
+        self.assertTrue(isinstance(t.owner.op, Subtensor))
+        tval = self.eval_output_and_check(t)
+        self.assertTrue(tval.shape == (2,))
+        self.assertTrue((tval == [0, 1]).all())
+
+    def test2_ok_range_finite(self):
+        n = self.shared(numpy.arange(12, dtype=self.dtype).reshape((3, 4)))
+        # Also check negative index
+        for idx in [(slice(0, 2), 3), ((slice(0, 2), -1)), (slice(0, 2), -4)]:
+            t = n[idx]  # l]#0:2,3]
+            self.assertTrue(isinstance(t.owner.op, Subtensor))
+            tval = self.eval_output_and_check(t)
+            self.assertTrue(tval.shape == (2,))
+            self.assertTrue(numpy.allclose(tval, n.get_value()[idx]))
+
+    def test1_0_dims(self):
+        n = self.shared(numpy.ones((), dtype=self.dtype))
+        t = theano.tensor.Subtensor([])(n)
+        self.assertTrue(isinstance(t.owner.op, Subtensor))
+        mode = self.mode
+        self.mode = mode.excluding("local_useless_subtensor")
+        try:
+            self.eval_output_and_check(t)
+        finally:
+            self.mode = mode
+
+    def test1_err_invalid(self):
+        n = self.shared(numpy.ones(1, dtype=self.dtype))
+        try:
+            t = n[0, 0]
+        except ValueError, e:
+            self.assertTrue(hasattr(e, 'subtensor_invalid'))
+            return
+        self.fail()
+
+    def test1_ok_elem(self):
+        n = self.shared(numpy.ones(1, dtype=self.dtype) * 5)
+        t = n[0]
+        self.assertTrue(isinstance(t.owner.op, Subtensor))
+        tval = self.eval_output_and_check(t)
+        self.assertTrue(tval.shape == ())
+        self.assertTrue(tval == 5.0)
+
+    def test1_ok_range_infinite(self):
+        #Subtensor.debug = True
+        n = self.shared(numpy.arange(3, dtype=self.dtype))
+        t = n[1:]
+        self.assertTrue(isinstance(t.owner.op, Subtensor))
+        tval = self.eval_output_and_check(t)
+        self.assertTrue(tval.shape == (2,))
+        self.assertTrue((tval == [1.0, 2.0]).all())
+
+    def test1_ok_strided(self):
+        n = self.shared(numpy.arange(5, dtype=self.dtype))
+        t = n[1::2]
+        self.assertTrue(isinstance(t.owner.op, Subtensor))
+        tval = self.eval_output_and_check(t)
+        self.assertTrue(tval.shape == (2,))
+        self.assertTrue((tval == [1.0, 3.0]).all())
+
+        t = n[0:-1:2]  # 0 to 1 from the end stepping by 2
+        tval = self.eval_output_and_check(t)
+        self.assertTrue(tval.shape == (2,))
+        self.assertTrue((tval == [0.0, 2.0]).all())
+
+    def test2_err_bounds0(self):
+        n = self.shared(numpy.ones((2, 3), dtype=self.dtype) * 5)
+        ctv_backup = config.compute_test_value
+        config.compute_test_value = 'off'
+        try:
+            for idx in [(0, 4), (0, -4)]:
+                t = n[idx]
+                self.assertTrue(isinstance(t.owner.op, Subtensor))
+                # Silence expected warnings
+                _logger = logging.getLogger('theano.gof.opt')
+                oldlevel = _logger.level
+                _logger.setLevel(logging.CRITICAL)
+                try:
+                    self.assertRaises(IndexError,
+                                      self.eval_output_and_check, [t])
+                finally:
+                    _logger.setLevel(oldlevel)
+        finally:
+            config.compute_test_value = ctv_backup
+
+    def test2_err_bounds1(self):
+        n = self.shared((numpy.ones((2, 3), dtype=self.dtype) * 5))
+        t = n[4:5, 3]
+        self.assertTrue(isinstance(t.owner.op, Subtensor))
+        old_stderr = sys.stderr
+        sys.stderr = StringIO()
+        try:
+            self.assertRaises(IndexError,
+                              self.eval_output_and_check, [t])
+        finally:
+            sys.stderr = old_stderr
+
+    def test2_ok_elem(self):
+        n = self.shared(numpy.arange(6, dtype=self.dtype).reshape((2, 3)))
+        t = n[0, 2]
+        self.assertTrue(isinstance(t.owner.op, Subtensor))
+        tval = self.eval_output_and_check(t)
+        self.assertTrue(tval.shape == ())
+        self.assertTrue(numpy.all(tval == 2))
+
+    def test2_ok_row(self):
+        n = self.shared(numpy.arange(6, dtype=self.dtype).reshape((2, 3)))
+        t = n[1]
+        self.assertFalse(any(n.type.broadcastable))
+        self.assertTrue(isinstance(t.owner.op, Subtensor))
+        tval = self.eval_output_and_check(t)
+        self.assertTrue(tval.shape == (3,))
+        self.assertTrue(numpy.all(tval == [3, 4, 5]))
+
+    def test2_ok_col(self):
+        n = self.shared(numpy.arange(6, dtype=self.dtype).reshape((2, 3)))
+        t = n[:, 0]
+        self.assertTrue(isinstance(t.owner.op, Subtensor))
+        self.assertFalse(any(n.type.broadcastable))
+        tval = self.eval_output_and_check(t)
+        self.assertTrue(tval.shape == (2,))
+        self.assertTrue(numpy.all(tval == [0, 3]))
+
+    def test2_ok_rows_finite(self):
+        n = self.shared(numpy.arange(12, dtype=self.dtype).reshape((4, 3)))
+        t = n[1:3, 0]
+        self.assertTrue(isinstance(t.owner.op, Subtensor))
+        tval = self.eval_output_and_check(t)
+        self.assertTrue(tval.shape == (2,))
+        self.assertTrue(numpy.all(tval == [3, 6]))
+
+    def test2_ok_cols_infinite(self):
+        n = self.shared(numpy.arange(12, dtype=self.dtype).reshape((4, 3)))
+        t = n[1, 2:]
+        self.assertTrue(isinstance(t.owner.op, Subtensor))
+        tval = self.eval_output_and_check(t)
+        self.assertTrue(tval.shape == (1,))
+        self.assertTrue(numpy.all(tval == 5))
+
+    def test2_ok_strided(self):
+        n = self.shared(numpy.arange(20, dtype=self.dtype).reshape((4, 5)))
+        t = n[1:4:2, 1:5:2]
+        self.assertTrue(isinstance(t.owner.op, Subtensor))
+        tval = self.eval_output_and_check(t)
+        self.assertTrue(tval.shape == (2, 2))
+        self.assertTrue(numpy.all(tval == [[6, 8], [16, 18]]))
+
+    def test3_ok_mat(self):
+        n = self.shared(numpy.arange(24, dtype=self.dtype).reshape((2, 3, 4)))
+        t = n[0, 0, 0]
+        self.assertTrue(isinstance(t.owner.op, Subtensor))
+        tval = self.eval_output_and_check(t)
+        self.assertTrue(tval.shape == ())
+        self.assertTrue(numpy.all(tval == 0))
+
+    def test_long(self):
+        n = self.shared(numpy.arange(12, dtype=self.dtype).reshape((4, 3)))
+        t = n[1L:4L:2L, 1L]
+        self.assertTrue(isinstance(t.owner.op, Subtensor))
+        tval = self.eval_output_and_check(t)
+        self.assertTrue(tval.shape == (2,))
+        self.assertTrue(numpy.all(tval == [4, 10]))
+
+    def test_long_too_big(self):
+        # Currently, we cast Python longs to int64 when used for indexing.
+        # This test checks that using a long that does not fit raises an error.
+        n = self.shared(numpy.arange(12, dtype=self.dtype).reshape((4, 3)))
+        self.assertRaises(Exception, lambda: n[:(2L ** 63)])
+
+    def test_newaxis(self):
+        """
+        newaxis support comes from logic in the __getitem__ of TensorType
+        Variables, which currently inserts dimshuffle to get the right number
+        of dimensions, and adjusts the slice tuple accordingly.
+
+        So testing is done via square-bracket notation rather than direct
+        interaction with the Subtensor Op (which has no support of its own for
+        newaxis).
+        """
+        newaxis = numpy.newaxis
+
+        n = self.shared(numpy.arange(24, dtype=self.dtype).reshape((2, 3, 4)))
+        assert n.ndim == 3
+
+        n4 = n[newaxis, :, :, :]
+        assert n4.broadcastable == (True, False, False, False), n4
+
+        n4 = n[:, newaxis, :, :]
+        assert n4.broadcastable == (False, True, False, False), n4
+
+        n4 = n[:, :, newaxis, :]
+        assert n4.broadcastable == (False, False, True, False), n4
+
+        n4 = n[:, :, :, newaxis]
+        assert n4.broadcastable == (False, False, False, True), n4
+
+        n3 = n.flatten()[newaxis, :, newaxis]
+        assert n3.broadcastable == (True, False, True), n3
+
+        s = cscalar()
+        s1 = s[newaxis]
+        assert s1.broadcastable == (True,), s1
+
+        vs1, vn3, vn4 = theano.function([s], [s1, n3, n4])(-2.0)
+
+        assert numpy.all(vs1 == [-2.0])
+        assert numpy.all(vn3
+                == numpy.arange(24)[newaxis, :, newaxis])
+        assert numpy.all(vn4
+                == numpy.arange(24).reshape((2, 3, 4))[:, :, :, newaxis])
+
+    def test_grad_1d(self):
+        subi = 0
+        data = numpy.asarray(rand(2, 3), dtype=self.dtype)
+        n = self.shared(data)
+        z = scal.constant(subi)
+        t = n[z:, z]
+        gn = theano.tensor.grad(theano.tensor.sum(theano.tensor.exp(t)), n)
+
+        f = inplace_func([], gn, mode=self.mode)
+        topo = f.maker.fgraph.toposort()
+        topo_ = [node for node in topo if not isinstance(node.op,
+             self.ignore_topo)]
+        if not self.fast_compile:
+            assert len(topo_) == 6
+        assert numpy.sum([isinstance(node.op, self.inc_sub)
+             for node in topo_]) == 1
+        assert numpy.sum([isinstance(node.op, self.sub)
+             for node in topo_]) == 1
+        gval = f()
+
+        good = numpy.zeros_like(data)
+        good[subi:, subi] = numpy.exp(data[subi:, subi])
+        self.assertTrue(numpy.allclose(gval, good), (gval, good))
+
+    def test_grad_0d(self):
+        data = numpy.asarray(rand(2, 3), dtype=self.dtype)
+        n = self.shared(data)
+        t = n[1, 0]
+        gn = theano.tensor.grad(theano.tensor.sum(theano.tensor.exp(t)), n)
+        f = self.function([], gn)
+        topo = f.maker.fgraph.toposort()
+        topo_ = [node for node in topo if not isinstance(node.op,
+             self.ignore_topo)]
+        if not self.fast_compile:
+            assert len(topo_) == 6
+        assert numpy.sum([isinstance(node.op, self.inc_sub)
+             for node in topo_]) == 1
+        assert numpy.sum([isinstance(node.op, self.sub)
+             for node in topo_]) == 1
+
+        gval = f()
+        good = numpy.zeros_like(data)
+        good[1, 0] = numpy.exp(data[1, 0])
+        self.assertTrue(numpy.allclose(gval, good), (gval, good))
+
+    def test_ok_list(self):
+        for data, idx in [(rand(4), [1, 0]),
+                          (rand(4, 5), [2, 3]),
+                          (rand(4, 2, 3), [0, 3]),
+                          (rand(4, 2, 3), [3, 3, 1, 1, 2, 2, 0, 0]),
+                          (rand(4, 2, 3), [3, 3, 1, 1, 2, 2, 0, 0,
+                                           -1, -2, -3, -4]),
+                          # Test 4 dims as gpu code use another algo
+                          # in that case This new algo is not as much
+                          # optimized for that case.
+                          (rand(4, 4, 2, 3), [3,
+                               3, 1, 1, 2, 2, 0, 0, -1, -2, -3, -4]),
+                          # Test with TensorConstant index.
+                          (rand(4, 2, 3),
+                           theano.tensor.constant([3, 3, 1, 1, 2, 2, 0, 0])),
+                          ]:
+            data = numpy.asarray(data, dtype=self.dtype)
+            n = self.shared(data)
+            t = n[idx]
+
+            # We test again AdvancedSubtensor1 as we transfer data to the cpu.
+            self.assertTrue(isinstance(t.owner.op, tensor.AdvancedSubtensor1))
+
+            val = self.eval_output_and_check(t, list=True)
+            if isinstance(idx, list):
+                good = data[idx]
+            else:
+                good = data[idx.data]
+            self.assertTrue(val.ndim == data.ndim)
+            self.assertTrue(numpy.allclose(val, good), (val, good))
+
+            # Test reuse of output memory
+            if isinstance(self.adv_sub1, tensor.AdvancedSubtensor1):
+                op = self.adv_sub1()
+                # When idx is a TensorConstant.
+                if hasattr(idx, "data"):
+                    idx = idx.data
+                test_out = [[None]]
+                op.perform(None, [data, idx], test_out)
+                out1 = test_out[0][0]
+                op.perform(None, [data, idx], test_out)
+                out2 = test_out[0][0]
+                assert out1 is out2
+
+    def test_err_invalid_list(self):
+        n = self.shared(numpy.asarray(5, dtype=self.dtype))
+        self.assertRaises(TypeError, n.__getitem__, [0, 0])
+
+    def test_err_invalid_2list_dtype(self):
+        n = self.shared(numpy.ones((3, 3), dtype=self.dtype) * 5)
+        self.assertRaises(TypeError, n.__getitem__, ([0., 0], [1, 1]))
+
+    def test_err_bound_list(self):
+        n = self.shared(numpy.ones((2, 3), dtype=self.dtype) * 5)
+        l = lvector()
+        t = n[l]
+        # We test again AdvancedSubtensor1 as we transfer data to the cpu.
+        self.assertTrue(isinstance(t.owner.op, tensor.AdvancedSubtensor1))
+
+        f = self.function([l], t, op=self.adv_sub1)
+        topo = f.maker.fgraph.toposort()
+        topo_ = [node for node in topo if not isinstance(node.op,
+             self.ignore_topo)]
+        assert len(topo_) == 1
+        self.assertTrue(isinstance(topo_[0].op, self.adv_sub1))
+        for shp in [[0, 4], [0, -3], [-10]]:
+            self.assertRaises(IndexError, f, shp)
+
+    def test_adv_sub1_broadcast(self):
+        ones = numpy.ones((1, 3), dtype=self.dtype)
+        n = self.shared(ones * 5, broadcastable=(True, False))
+        idx = tensor.lvector()
+        t = n[idx]
+        self.assertTrue(isinstance(t.owner.op, tensor.AdvancedSubtensor1))
+
+        f = self.function([idx], t, op=self.adv_sub1)
+        topo = f.maker.fgraph.toposort()
+        topo_ = [node for node in topo if not isinstance(node.op,
+             self.ignore_topo)]
+        assert len(topo_) == 1
+        self.assertTrue(isinstance(topo_[0].op, self.adv_sub1))
+        self.assertTrue(numpy.allclose(f([0]), ones[0] * 5))
+        self.assertRaises(IndexError, f, [0, 1])
+
+    def test_adv_sub1_idx_broadcast(self):
+        # The idx can be a broadcastable vector.
+        ones = numpy.ones((4, 3), dtype=self.dtype)
+        n = self.shared(ones * 5)
+        idx = tensor.TensorType(dtype='int64', broadcastable=(True,))()
+        assert idx.type.broadcastable == (True,)
+        t = n[idx]
+        self.assertTrue(isinstance(t.owner.op, tensor.AdvancedSubtensor1))
+
+        f = self.function([idx], t, op=self.adv_sub1)
+        topo = f.maker.fgraph.toposort()
+        topo_ = [node for node in topo if not isinstance(node.op,
+             self.ignore_topo)]
+        assert len(topo_) == 1
+        self.assertTrue(isinstance(topo_[0].op, self.adv_sub1))
+        self.assertTrue(numpy.allclose(f([0]), ones[0] * 5))
+
+    def test_shape_i_const(self):
+        # Each axis is treated independently by shape_i/shape operators
+
+        mode_opt = self.mode.including("fast_run")
+        data = self.shared(numpy.array(numpy.arange(5), dtype=self.dtype))
+        for start in [None] + [-8, -5, -1, 0, 1, 5, 8]:
+            outs = []
+            shapes = []
+            for stop in [None] + [-8, -5, -1, 0, 1, 5, 8]:
+                for step in [None] + [-3, -1, 2]:
+                    outs += [data[start:stop:step].shape]
+                    shapes += [data.get_value(
+                        borrow=True)[start:stop:step].shape]
+            f = self.function([], outs, mode=mode_opt,
+                              op=self.ops, N=0)
+            t_shapes = f()
+            for t_shape, shape in zip(t_shapes, shapes):
+                assert numpy.all(t_shape == shape)
+            assert tensor.Subtensor not in [x.op for x in
+                                           f.maker.fgraph.toposort()]
+
+    def test_shape_i_scalar(self):
+        # Each axis is treated independently by shape_i/shape operators
+
+        mode_opt = self.mode.including("fast_run")
+
+        v_data = numpy.array(numpy.arange(5), dtype=self.dtype)
+        t_data = self.shared(v_data)
+        start = tensor.iscalar('b')
+        stop = tensor.iscalar('e')
+        step = tensor.iscalar('s')
+        f = self.function([start, stop, step],
+                          t_data[start:stop:step].shape,
+                          mode=mode_opt,
+                          op=self.ops,
+                          N=0)
+        assert tensor.Subtensor not in [x.op for x in f.maker.
+            fgraph.toposort()]
+        for start in [-8, -5, -4, -1, 0, 1, 4, 5, 8]:
+            for stop in [-8, -5, -4, -1, 0, 1, 4, 5, 8]:
+                for step in [-3, -1, 2, 5]:
+                    assert numpy.all(f(start, stop, step) ==
+                                     v_data[start:stop:step].shape)
+
+    def test_slice_canonical_form_0(self):
+        start = tensor.iscalar('b')
+        stop = tensor.iscalar('e')
+        step = tensor.iscalar('s')
+        length = tensor.iscalar('l')
+        cnf = get_canonical_form_slice(slice(start, stop, step), length)
+        f = self.function([start, stop, step, length], [
+            tensor.as_tensor_variable(cnf[0].start),
+            tensor.as_tensor_variable(cnf[0].stop),
+            tensor.as_tensor_variable(cnf[0].step),
+            tensor.as_tensor_variable(cnf[1])], N=0, op=self.ops)
+
+        length = 5
+        a = numpy.arange(length)
+        for start in [-8, -5, -4, -1, 0, 1, 4, 5, 8]:
+            for stop in  [-8, -5, -4, -1, 0, 1, 4, 5, 8]:
+                for step in [-6, -3, -1, 2, 5]:
+                    out = f(start, stop, step, length)
+                    t_out = a[out[0]:out[1]:out[2]][::out[3]]
+                    v_out = a[start:stop:step]
+                    assert numpy.all(t_out == v_out)
+                    assert numpy.all(t_out.shape == v_out.shape)
+
+    def test_slice_canonical_form_1(self):
+        stop = tensor.iscalar('e')
+        step = tensor.iscalar('s')
+        length = tensor.iscalar('l')
+        cnf = get_canonical_form_slice(slice(None, stop, step), length)
+        f = self.function([stop, step, length], [
+            tensor.as_tensor_variable(cnf[0].start),
+            tensor.as_tensor_variable(cnf[0].stop),
+            tensor.as_tensor_variable(cnf[0].step),
+            tensor.as_tensor_variable(cnf[1])], N=0, op=self.ops)
+
+        length = 5
+        a = numpy.arange(length)
+        for stop in  [-8, -5, -4, -1, 0, 1, 4, 5, 8]:
+            for step in [-6, -3, -1, 2, 5]:
+                out = f(stop, step, length)
+                t_out = a[out[0]:out[1]:out[2]][::out[3]]
+                v_out = a[:stop:step]
+                assert numpy.all(t_out == v_out)
+                assert numpy.all(t_out.shape == v_out.shape)
+
+    def test_slice_canonical_form_2(self):
+        start = tensor.iscalar('b')
+        step = tensor.iscalar('s')
+        length = tensor.iscalar('l')
+        cnf = get_canonical_form_slice(slice(start, None, step), length)
+        f = self.function([start, step, length], [
+            tensor.as_tensor_variable(cnf[0].start),
+            tensor.as_tensor_variable(cnf[0].stop),
+            tensor.as_tensor_variable(cnf[0].step),
+            tensor.as_tensor_variable(cnf[1])], N=0, op=self.ops)
+
+        length = 5
+        a = numpy.arange(length)
+        for start in [-8, -5, -4, -1, 0, 1, 4, 5, 8]:
+            for step in [-6, -3, -1, 2, 5]:
+                out = f(start, step, length)
+                t_out = a[out[0]:out[1]:out[2]][::out[3]]
+                v_out = a[start:None:step]
+                assert numpy.all(t_out == v_out)
+                assert numpy.all(t_out.shape == v_out.shape)
+
+    def test_slice_canonical_form_3(self):
+        start = tensor.iscalar('b')
+        stop = tensor.iscalar('e')
+        length = tensor.iscalar('l')
+        cnf = get_canonical_form_slice(slice(start, stop, None), length)
+        f = self.function([start, stop, length], [
+            tensor.as_tensor_variable(cnf[0].start),
+            tensor.as_tensor_variable(cnf[0].stop),
+            tensor.as_tensor_variable(cnf[0].step),
+            tensor.as_tensor_variable(cnf[1])], N=0, op=self.ops)
+
+        length = 5
+        a = numpy.arange(length)
+        for start in [-8, -5, -4, -1, 0, 1, 4, 5, 8]:
+            for stop in  [-8, -5, -4, -1, 0, 1, 4, 5, 8]:
+                out = f(start, stop, length)
+                t_out = a[out[0]:out[1]:out[2]][::out[3]]
+                v_out = a[start:stop:None]
+                assert numpy.all(t_out == v_out)
+                assert numpy.all(t_out.shape == v_out.shape)
+
+    def test_slice_canonical_form_4(self):
+        step = tensor.iscalar('s')
+        length = tensor.iscalar('l')
+        cnf = get_canonical_form_slice(slice(None, None, step), length)
+        f = self.function([step, length], [
+            tensor.as_tensor_variable(cnf[0].start),
+            tensor.as_tensor_variable(cnf[0].stop),
+            tensor.as_tensor_variable(cnf[0].step),
+            tensor.as_tensor_variable(cnf[1])], N=0, op=self.ops)
+
+        length = 5
+        a = numpy.arange(length)
+        for step in [-6, -3, -1, 2, 5]:
+            out = f(step, length)
+            t_out = a[out[0]:out[1]:out[2]][::out[3]]
+            v_out = a[None:None:step]
+            assert numpy.all(t_out == v_out)
+            assert numpy.all(t_out.shape == v_out.shape)
+
+    def test_slice_canonical_form_5(self):
+        start = tensor.iscalar('b')
+        length = tensor.iscalar('l')
+        cnf = get_canonical_form_slice(slice(start, None, None), length)
+        f = self.function([start, length], [
+            tensor.as_tensor_variable(cnf[0].start),
+            tensor.as_tensor_variable(cnf[0].stop),
+            tensor.as_tensor_variable(cnf[0].step),
+            tensor.as_tensor_variable(cnf[1])], N=0, op=self.ops)
+
+        length = 5
+        a = numpy.arange(length)
+        for start in [-8, -5, -4, -1, 0, 1, 4, 5, 8]:
+            out = f(start, length)
+            t_out = a[out[0]:out[1]:out[2]][::out[3]]
+            v_out = a[start:None:None]
+            assert numpy.all(t_out == v_out)
+            assert numpy.all(t_out.shape == v_out.shape)
+
+    def test_slice_canonical_form_6(self):
+        stop = tensor.iscalar('e')
+        length = tensor.iscalar('l')
+        cnf = get_canonical_form_slice(slice(None, stop, None), length)
+        f = self.function([stop, length], [
+            tensor.as_tensor_variable(cnf[0].start),
+            tensor.as_tensor_variable(cnf[0].stop),
+            tensor.as_tensor_variable(cnf[0].step),
+            tensor.as_tensor_variable(cnf[1])], N=0, op=self.ops)
+
+        length = 5
+        a = numpy.arange(length)
+        for stop in  [-8, -5, -4, -1, 0, 1, 4, 5, 8]:
+            out = f(stop, length)
+            t_out = a[out[0]:out[1]:out[2]][::out[3]]
+            v_out = a[None:stop:None]
+            assert numpy.all(t_out == v_out)
+            assert numpy.all(t_out.shape == v_out.shape)
+
+    def grad_list_(self, idxs, data):
+        n = self.shared(data)
+
+        for idx in idxs:
+            # Should stay on the cpu.
+            idx_ = _shared(numpy.asarray(idx))
+            t = n[idx_]
+            gn = theano.tensor.grad(theano.tensor.sum(theano.tensor.exp(t)), n)
+            f = self.function([], [gn, gn.shape], op=self.adv_incsub1)
+            topo = f.maker.fgraph.toposort()
+            if not self.fast_compile:
+                assert any([isinstance(node.op, self.
+                    adv_incsub1) and node.op.inplace for node in topo])
+            else:
+                assert any([isinstance(node.op, self.
+                    adv_incsub1) for node in topo])
+            assert any([isinstance(node.op, self.adv_sub1) for node in topo])
+            gval, gshape = f()
+            good = numpy.zeros_like(data)
+            # don't work when the same index is used many time
+            # good[idx] += numpy.exp(data[idx])
+            for i in idx:
+                good[i] += numpy.exp(data[i])
+            self.assertTrue(gval.ndim == data.ndim)
+            self.assertTrue(numpy.allclose(gval, good), (gval, good))
+            self.assertTrue(numpy.allclose(gshape, data.shape))
+
+            def fct(t):
+                return theano.tensor.sum(t[idx_])
+            utt.verify_grad(fct, [data])
+
+            # Test the grad of the grad (e.i. AdvancedIncSubtensor1.grad)
+            def fct2(t):
+                return theano.tensor.grad(theano.tensor.sum(t[idx_]), t)
+            utt.verify_grad(fct2, [data])
+
+            # Test shape of AdvancedIncSubtensor1 and AdvancedSubtensor1
+            if not self.fast_compile:
+                ops = (self.adv_incsub1, self.adv_sub1)
+            else:
+                ops = self.ops
+            if idx is idxs[0]:
+                f = self.function([], [gn.shape, n[idx_].shape],
+                                  op=ops,
+                                  N=0, N_fast=2)
+                f()
+
+    def test_wrong_exception_regression(self):
+        a = fscalar()
+        b = fscalar()
+        c = vector()
+        try:
+            c[a:b]
+        except NotImplementedError:
+            self.fail()
+        except TypeError:
+            pass
+        try:
+            c[a:]
+        except NotImplementedError:
+            self.fail()
+        except TypeError:
+            pass
+        try:
+            c[:b]
+        except NotImplementedError:
+            self.fail()
+        except TypeError:
+            pass
+
+    def test_grad_list(self):
+        data = rand(4)
+        data = numpy.asarray(data, dtype=self.dtype)
+        idxs = [[i] for i in range(data.shape[0])]
+        for i in range(data.shape[0]):
+            for j in range(0, data.shape[0], 2):
+                idxs.append([i, j, (i + 1) % data.shape[0]])
+        self.grad_list_(idxs, data)
+
+        data = rand(4, 3)
+        data = numpy.asarray(data, dtype=self.dtype)
+        self.grad_list_(idxs, data)
+
+        data = rand(4, 3, 2)
+        data = numpy.asarray(data, dtype=self.dtype)
+        self.grad_list_(idxs, data)
+
+    def test_shape_list(self):
+        #TODO for all type of subtensor shape
+        for data, idx in [(rand(4), [1, 0]),
+                          (rand(4, 2), [2, 3]),
+                          (rand(4, 2, 3), [0, 3]),
+                          (rand(4, 2, 3), [3, 3, 1, 2, 2, ]),
+                          ]:
+            data = numpy.asarray(data, dtype=self.dtype)
+            n = self.shared(data)
+            t = n[idx]
+            f = self.function([], t.shape, op=self.ops, N=0, N_fast=1)
+            val = f()
+            self.assertTrue(numpy.allclose(val, data[idx].shape))
+
+    def test_grad_advanced_inc_subtensor(self):
+        def inc_slice(*s):
+            def just_numeric_args(a, b):
+                cost = (a[s] + b).sum()
+                cost_wrt_a = theano.tensor.grad(cost, a)
+                cost_wrt_b = theano.tensor.grad(cost, b)
+                grads = cost_wrt_a.sum() + cost_wrt_b.sum()
+                return grads
+            return just_numeric_args
+
+        # vector
+        utt.verify_grad(
+            inc_slice(slice(2, 4, None)),
+            (numpy.asarray([0, 1, 2, 3, 4, 5.]), numpy.asarray([9, 9.]),))
+
+        # matrix
+        utt.verify_grad(
+            inc_slice(slice(1, 2, None), slice(None, None, None)),
+            (numpy.asarray([[0, 1], [2, 3], [4, 5.]]),
+             numpy.asarray([[9, 9.]]),))
+
+        #single element
+        utt.verify_grad(
+            inc_slice(2, 1),
+            (numpy.asarray([[0, 1], [2, 3], [4, 5.]]), numpy.asarray(9.),))
+
+    def test_advanced_inc_and_set(self):
+        """
+        Test advanced increment and set.
+        """
+        rng = numpy.random.RandomState(seed=utt.fetch_seed())
+        all_inputs_var = []
+        all_inputs_num = []
+        all_outputs_var = []
+        all_outputs_num = []
+        for set_instead_of_inc in (False, True):
+            for inplace in (False, True):
+                for data_shape in ((10,), (4, 5), (1, 2, 3), (4, 5, 6, 7)):
+                    data_n_dims = len(data_shape)
+                    data_size = numpy.product(data_shape)
+                    # Corresponding numeric variable.
+                    data_num_init = numpy.arange(data_size, dtype=self.dtype)
+                    data_num_init = data_num_init.reshape(data_shape)
+                    inc_shapes = [data_shape[i:]
+                                  for i in xrange(0, len(data_shape) + 1)]
+                    for inc_shape in inc_shapes:
+                        inc_n_dims = len(inc_shape)
+                        # We copy the numeric value to be 100% sure there is no
+                        # risk of accidentally sharing it.
+                        data_num = data_num_init.copy()
+                        # Symbolic variable to be incremented.
+                        # We create a new one every time in order not to
+                        # have duplicated variables in the function's inputs
+                        data_var = tensor.tensor(
+                                broadcastable=[False] * data_n_dims,
+                                dtype=self.dtype)
+                        # Symbolic variable with rows to be incremented.
+                        idx_var = theano.tensor.vector(dtype='int64')
+                        n_to_inc = rng.randint(data_shape[0])
+                        # Corresponding numeric variable.
+                        idx_num = rng.randint(0, data_shape[0], n_to_inc)
+                        idx_num = idx_num.astype('int64')
+                        # Symbolic variable with increment value.
+                        inc_var = tensor.tensor(
+                                broadcastable=[False] * inc_n_dims,
+                                dtype=self.dtype)
+                        # Trick for the case where `inc_shape` is the same as
+                        # `data_shape`: what we actually want is the first
+                        # shape element to be equal to the number of rows to
+                        # increment.
+                        if len(inc_shape) == len(data_shape):
+                            inc_shape = (n_to_inc,) + inc_shape[1:]
+                        inc_size = numpy.product(inc_shape)
+                        # Corresponding numeric variable.
+                        inc_num = rng.uniform(size=inc_size).astype(self.dtype)
+                        inc_num = inc_num.reshape(inc_shape)
+                        # Result of the incrementation.
+                        # (i) Theano
+                        if set_instead_of_inc:
+                            op = set_subtensor
+                        else:
+                            op = inc_subtensor
+                        output = op(data_var[idx_var], inc_var,
+                                    inplace=inplace)
+                        # (ii) Numpy (note that Numpy increments only once
+                        # duplicated indices, so we cannot directly use +=).
+                        data_copy = data_num.copy()
+                        for j, idx in enumerate(idx_num):
+                            if len(inc_shape) == len(data_shape):
+                                # Special case where there is no broadcasting.
+                                if set_instead_of_inc:
+                                    data_copy[idx] = inc_num[j]
+                                else:
+                                    data_copy[idx] += inc_num[j]
+                            else:
+                                if set_instead_of_inc:
+                                    data_copy[idx] = inc_num
+                                else:
+                                    data_copy[idx] += inc_num
+                        data_var = theano.In(data_var, mutable=True)
+
+                        # Remember data for the Theano function (see below).
+                        all_inputs_var += [data_var, idx_var, inc_var]
+                        all_inputs_num += [data_num, idx_num, inc_num]
+                        all_outputs_var.append(output)
+                        all_outputs_num.append(data_copy)
+                        if False:  # Enable for debugging purpose.
+                            f = self.function([data_var, idx_var, inc_var],
+                                              output, accept_inplace=inplace,
+                                              op=self.adv_incsub1)
+                            if inplace:
+                                # Ensure calling `f` will not alter `data_num`.
+                                data_num = data_num.copy()
+                            f_out = f(data_num.copy(), idx_num, inc_num)
+                            assert numpy.allclose(f_out, data_copy)
+                            if not inplace:
+                                # Sanity check: `data_num` should be intact.
+                                assert (data_num == data_num_init).all()
+
+        # Actual test (we compile a single Theano function to make it faster).
+        orig_warn = theano.config.warn.gpu_set_subtensor1
+        try:
+            theano.config.warn.gpu_set_subtensor1 = False
+            f = self.function(all_inputs_var, all_outputs_var,
+                              accept_inplace=True,
+                              op=self.adv_incsub1,
+                              N=len(all_outputs_var))
+        finally:
+            theano.config.warn.gpu_set_subtensor1 = orig_warn
+
+        f_outs = f(*all_inputs_num)
+        assert len(f_outs) == len(all_outputs_num)
+        for f_out, output_num in izip(f_outs, all_outputs_num):
+            # NB: if this assert fails, it will probably be easier to debug if
+            # you enable the debug code above.
+            assert numpy.allclose(f_out, output_num)
+
+    def test_adv_constant_arg(self):
+        # Test case provided (and bug detected, gh-607) by John Salvatier
+        m = matrix('m')
+        gv = numpy.array([0, 1, 3])
+        g = theano.tensor.constant(gv)
+        i = theano.tensor.lvector('i')
+
+        # s1 used to fail
+        s1 = m[gv, i]
+        s2 = m[g, i]
+
+        assert gof.graph.is_same_graph(s1, s2)
+
+    def test_adv1_inc_sub_notlastdim(self):
+        # Test that taking 1-dimensional advanced indexing
+        # over a dimension that's not the first (outer-most) works.
+        m = matrix('m')
+        i = lvector('i')
+
+        m1 = set_subtensor(m[:, i], 0)
+        m2 = inc_subtensor(m[:, i], 1)
+        f = theano.function([m, i], [m1, m2])
+
+        m_val = rand(3, 5)
+        i_val = randint_ranged(min=0, max=4, shape=(4,))
+        m1_ref = m_val.copy()
+        m2_ref = m_val.copy()
+
+        m1_val, m2_val = f(m_val, i_val)
+        for idx in i_val:
+            m1_ref[:, idx] = 0
+            m2_ref[:, idx] += 1
+
+        assert numpy.allclose(m1_val, m1_ref), (m1_val, m1_ref)
+        assert numpy.allclose(m2_val, m2_ref), (m2_val, m2_ref)
+
+    def test_adv1_inc_sub_notlastdim_2didx(self):
+        # Test that taking 1-dimensional advanced indexing
+        # over a dimension that's not the first (outer-most) works,
+        # if the index is a matrix.
+        m = matrix('m')
+        i = lmatrix('i')
+
+        m1 = set_subtensor(m[:, i], 0)
+        m2 = inc_subtensor(m[:, i], 1)
+        f = theano.function([m, i], [m1, m2])
+
+        m_val = rand(5, 7)
+        i_val = randint_ranged(min=0, max=6, shape=(4, 2))
+        m1_ref = m_val.copy()
+        m2_ref = m_val.copy()
+
+        m1_val, m2_val = f(m_val, i_val)
+        for idx in i_val.ravel():
+            m1_ref[:, idx] = 0
+            m2_ref[:, idx] += 1
+
+        assert numpy.allclose(m1_val, m1_ref), (m1_val, m1_ref)
+        assert numpy.allclose(m2_val, m2_ref), (m2_val, m2_ref)
+
+
+class TestIncSubtensor1(unittest.TestCase):
+    # test inc_subtensor
+    # also tests set_subtensor
+
+    def setUp(self):
+        self.s = tensor.iscalar()
+        self.v = tensor.fvector()
+        self.m = tensor.dmatrix()
+        self.t = tensor.ctensor3()
+
+        self.adv1q = tensor.lvector()  # advanced 1d query
+
+    def test_cant_adv_idx_into_scalar(self):
+        self.assertRaises(TypeError, lambda: self.s[self.adv1q])
+
+    def test_index_into_vec_w_vec(self):
+        a = self.v[self.adv1q]
+        assert a.type == self.v.type
+
+    def test_1d_set_adv_selection(self):
+        a = set_subtensor(self.v[self.adv1q], self.v[self.adv1q])
+
+        assert a.type == self.v.type
+
+        #TODO: compile a function and verify that the subtensor is removed
+        #      completely, because the whole expression is redundant.
+
+        f = theano.function([self.v, self.adv1q], a, allow_input_downcast=True)
+        aval = f([.4, .9, .1], [1, 2])
+        assert numpy.allclose(aval, [.4, 0.9, 0.1])
+
+    def test_1d_inc_adv_selection(self):
+        a = inc_subtensor(self.v[self.adv1q], self.v[self.adv1q])
+
+        assert a.type == self.v.type
+        f = theano.function([self.v, self.adv1q], a, allow_input_downcast=True)
+        aval = f([.4, .9, .1], [1, 2])
+        assert numpy.allclose(aval, [.4, 1.8, 0.2])
+
+    def test_1d_inc_adv_selection_w_broadcasting(self):
+        a = inc_subtensor(self.v[self.adv1q], 3.0)
+
+        assert a.type == self.v.type
+        f = theano.function([self.v, self.adv1q], a, allow_input_downcast=True)
+        aval = f([.4, .9, .1], [1, 2])
+        assert numpy.allclose(aval, [.4, 3.9, 3.1])
+
+    def test_assigning_matrix_to_vector_selection(self):
+        self.assertRaises(TypeError,
+                          lambda: inc_subtensor(self.v[self.adv1q], fmatrix()))
+
+
+inplace_increment_missing = SkipTest(
+    "inc_subtensor with advanced indexing not enabled. "
+    "Installing NumPy 1.8 or the latest development version "
+    "should make that feature available.")
+
+
+class TestAdvancedSubtensor(unittest.TestCase):
+    # test inc_subtensor
+    # also tests set_subtensor
+
+    def setUp(self):
+        self.s = iscalar()
+        self.v = fvector()
+        self.m = dmatrix()
+        self.t = ctensor3()
+
+        self.ix1 = lvector()  # advanced 1d query
+        self.ix12 = lvector()
+        self.ix2 = lmatrix()
+
+    def test_cant_adv_idx_into_scalar(self):
+        self.assertRaises(TypeError, lambda: self.s[self.ix1])
+
+    def test_index_into_vec_w_vec(self):
+        a = self.v[self.ix1]
+        assert a.type == self.v.type, (a.type, self.v.type)
+
+    def test_index_into_vec_w_matrix(self):
+        a = self.v[self.ix2]
+        assert a.dtype == self.v.dtype, (a.dtype, self.v.dtype)
+        assert a.broadcastable == self.ix2.broadcastable, (
+                a.broadcastable, self.ix2.broadcastable)
+
+    def test_inc_adv_subtensor_w_matrix(self):
+        if inplace_increment is None:
+            raise inplace_increment_missing
+
+        subt = self.v[self.ix2]
+        a = inc_subtensor(subt, subt)
+
+        assert a.type == self.v.type, (a.type, self.v.type)
+        f = theano.function([self.v, self.ix2], a, allow_input_downcast=True)
+        aval = f([.4, .9, .1], [[1, 2],
+                                [1, 2]])
+        assert numpy.allclose(aval, [.4, .9 * 3, .1 * 3])
+
+    def test_inc_adv_subtensor_w_2vec(self):
+        if inplace_increment is None:
+            raise inplace_increment_missing
+
+        subt = self.m[self.ix1, self.ix12]
+        a = inc_subtensor(subt, subt)
+
+        typ = TensorType(self.m.type.dtype, self.ix2.type.broadcastable)
+        assert a.type == typ, (a.type, typ)
+        f = theano.function([self.m, self.ix1, self.ix12], a,
+                            allow_input_downcast=True)
+        aval = f([[.4, .9, .1],
+                  [5, 6, 7],
+                  [.5, .3, .15]],
+                 [1, 2, 1],
+                 [0, 1, 0])
+        assert numpy.allclose(aval,
+                [[.4, .9, .1],
+                  [5 * 3, 6, 7],
+                  [.5, .3 * 2, .15]]), aval
+
+    def test_inc_adv_subtensor_with_broadcasting(self):
+        if inplace_increment is None:
+            raise inplace_increment_missing
+
+        a = inc_subtensor(self.m[self.ix1, self.ix12], 2.1)
+
+        assert a.type == self.m.type, (a.type, self.m.type)
+        f = theano.function([self.m, self.ix1, self.ix12], a,
+                            allow_input_downcast=True)
+        aval = f([[.4, .9, .1],
+                  [5, 6, 7],
+                  [.5, .3, .15]],
+                 [1, 2, 1],
+                 [0, 1, 0])
+        assert numpy.allclose(aval,
+                [[.4, .9, .1],
+                  [5 + 2.1 * 2, 6, 7],
+                  [.5, .3 + 2.1, .15]]), aval
+
+    def test_inc_adv_subtensor_with_index_broadcasting(self):
+        if inplace_increment is None:
+            raise inplace_increment_missing
+
+        a = inc_subtensor(self.m[self.ix1, self.ix2], 2.1)
+
+        assert a.type == self.m.type, (a.type, self.m.type)
+        f = theano.function([self.m, self.ix1, self.ix2], a,
+                            allow_input_downcast=True)
+        aval = f([[.4, .9, .1],
+                  [5, 6, 7],
+                  [.5, .3, .15]],
+                 [0, 2, 0],
+                 [[0, 1, 0],
+                  [2, 2, 2]])
+        assert numpy.allclose(aval,
+                [[.4 + 2 * 2.1, .9, .1 + 2 * 2.1],
+                  [5, 6, 7],
+                  [.5, .3 + 2.1, .15 + 2.1]]), aval
+
+
+class TestInferShape(utt.InferShapeTester):
+    def test_infer_shape(self):
+        # IncSubtensor
+        admat = dmatrix()
+        bdmat = dmatrix()
+        advec = dvector()
+        adscal = dscalar()
+        admat_val = rand(5, 4)
+        self._compile_and_check([admat, bdmat],
+                            [inc_subtensor(admat[2:4], bdmat)],
+                            [admat_val, [[1, 2, 3, 4]]], IncSubtensor)
+
+        self._compile_and_check([admat, advec],
+                            [inc_subtensor(admat[2], advec)],
+                            [admat_val, [1, 2, 3, 4]], IncSubtensor)
+
+        self._compile_and_check([admat, adscal],
+                            [inc_subtensor(admat[2, 3], adscal)],
+                            [admat_val, 1], IncSubtensor)
+
+        self._compile_and_check([admat, adscal],
+                            [inc_subtensor(admat[1:3, 2], adscal)],
+                            [admat_val, 1], IncSubtensor)
+
+        self._compile_and_check([admat, bdmat],
+                            [set_subtensor(admat[2:4], bdmat)],
+                            [admat_val, [[1, 2, 3, 4]]], IncSubtensor)
+
+        self._compile_and_check([admat, advec],
+                            [set_subtensor(admat[2], advec)],
+                            [admat_val, [1, 2, 3, 4]], IncSubtensor)
+
+        self._compile_and_check([admat, adscal],
+                            [set_subtensor(admat[2, 3], adscal)],
+                            [admat_val, 1], IncSubtensor)
+
+        self._compile_and_check([admat, adscal],
+                            [set_subtensor(admat[1:3, 2], adscal)],
+                            [admat_val, 1], IncSubtensor)
+
+        adtens4 = dtensor4()
+        bdtens4 = dtensor4()
+        adtens4_val = rand(3, 4, 2, 5)
+        self._compile_and_check([adtens4, bdtens4],
+                            [inc_subtensor(adtens4[::, 2:4, ::, ::], bdtens4)],
+                            [adtens4_val, [[[[1, 2, 3, 4, 5]]]]], IncSubtensor,
+                            warn=False)
+        self._compile_and_check([adtens4, bdmat],
+                            [inc_subtensor(adtens4[2, 2:4, 1, ::], bdmat)],
+                            [adtens4_val, [[1, 2, 3, 4, 5]]], IncSubtensor)
+
+        self._compile_and_check([adtens4, advec],
+                            [inc_subtensor(adtens4[0, 1, ::, 4], advec)],
+                            [adtens4_val, [1, 2]], IncSubtensor)
+
+        self._compile_and_check([adtens4, adscal],
+                            [inc_subtensor(adtens4[1:3, 1, ::, 2:4], adscal)],
+                            [adtens4_val, 1], IncSubtensor)
+
+        self._compile_and_check([adtens4, bdtens4],
+                            [set_subtensor(adtens4[::, 2:4, ::, ::], bdtens4)],
+                            [adtens4_val, [[[[1, 2, 3, 4, 5]]]]], IncSubtensor,
+                            warn=False)
+
+        self._compile_and_check([adtens4, bdmat],
+                            [set_subtensor(adtens4[2, 2:4, 1, ::], bdmat)],
+                            [adtens4_val, [[1, 2, 3, 4, 5]]], IncSubtensor)
+
+        self._compile_and_check([adtens4, advec],
+                            [set_subtensor(adtens4[0, 1, ::, 4], advec)],
+                            [adtens4_val, [1, 2]], IncSubtensor)
+
+        self._compile_and_check([adtens4, adscal],
+                            [set_subtensor(adtens4[1:3, 1, ::, 2:4], adscal)],
+                            [adtens4_val, 1], IncSubtensor)
+
+        # AdvancedIncSubtensor1
+        admat = dmatrix()
+        bdmat = dmatrix()
+        advec = dvector()
+        adscal = dscalar()
+        admat_val = rand(5, 4)
+        aivec_val = [2, 3]
+        self._compile_and_check([admat, bdmat],
+                            [set_subtensor(admat[aivec_val], bdmat)],
+                            [admat_val, [[1, 2, 3, 4]]], AdvancedIncSubtensor1)
+
+        aivec_val = [1, 3, 2]
+        self._compile_and_check([admat, advec],
+                            [set_subtensor(admat[aivec_val], advec)],
+                            [admat_val, [1, 2, 3, 4]], AdvancedIncSubtensor1)
+
+        aivec_val = [0, 3, 0]
+        self._compile_and_check([admat, adscal],
+                            [set_subtensor(admat[aivec_val], adscal)],
+                            [admat_val, 1], AdvancedIncSubtensor1)
+
+        bdtens4 = dtensor4()
+        adtens4_val = rand(4, 3, 2, 5)
+        aivec_val = [2, 3]
+        self._compile_and_check([adtens4, bdtens4],
+                            [set_subtensor(adtens4[aivec_val], bdtens4)],
+                            [adtens4_val, [[[[1, 2, 3, 4, 5]]]]],
+                            AdvancedIncSubtensor1,
+                            warn=False)
+
+        aivec_val = [1, 3, 2]
+        self._compile_and_check([adtens4, advec],
+                            [set_subtensor(adtens4[aivec_val], advec)],
+                            [adtens4_val, [1, 2, 3, 4, 5]],
+                            AdvancedIncSubtensor1)
+
+        aivec_val = [0, 3, 0]
+        self._compile_and_check([adtens4, adscal],
+                            [set_subtensor(adtens4[aivec_val], adscal)],
+                            [adtens4_val, 1],
+                            AdvancedIncSubtensor1)
+
+        aivec_val = [2, 3]
+        self._compile_and_check([admat, bdmat],
+                                [inc_subtensor(admat[aivec_val], bdmat)],
+                                [admat_val, [[1, 2, 3, 4], [5, 6, 7, 8]]],
+                                AdvancedIncSubtensor1)
+
+        aivec_val = [1, 3, 2]
+        self._compile_and_check([admat, advec],
+                            [inc_subtensor(admat[aivec_val], advec)],
+                            [admat_val, [1, 2, 3, 4]], AdvancedIncSubtensor1)
+
+        aivec_val = [0, 3, 0]
+        self._compile_and_check([admat, adscal],
+                            [inc_subtensor(admat[aivec_val], adscal)],
+                            [admat_val, 1], AdvancedIncSubtensor1)
+
+        bdtens4 = dtensor4()
+        adtens4_val = rand(4, 3, 2, 5)
+        aivec_val = [2, 3]
+        self._compile_and_check([adtens4, bdtens4],
+                            [inc_subtensor(adtens4[aivec_val], bdtens4)],
+                            [adtens4_val, [[[[1, 2, 3, 4, 5]]],
+                                           [[[6, 7, 8, 9, 10]]]]],
+                            AdvancedIncSubtensor1,
+                            warn=False)
+
+        aivec_val = [1, 2, 1]
+        self._compile_and_check([adtens4, advec],
+                            [inc_subtensor(adtens4[aivec_val], advec)],
+                            [adtens4_val, [1, 2, 3, 4, 5]],
+                            AdvancedIncSubtensor1)
+
+        aivec_val = [0, 3, 0]
+        self._compile_and_check([adtens4, adscal],
+                            [inc_subtensor(adtens4[aivec_val], adscal)],
+                            [adtens4_val, 2],
+                            AdvancedIncSubtensor1)
+
+        # AdvancedIncSubtensor
+        aivec_val = [1, 3, 2]
+        bivec_val = [0, 3, 3]
+        advec_val = [23, 24, 25]
+        self._compile_and_check([admat, advec],
+                    [set_subtensor(admat[aivec_val, bivec_val], advec)],
+                    [admat_val, advec_val], AdvancedIncSubtensor)
--- a/theano/tensor/type_other.py
+++ b/theano/tensor/type_other.py
+#
+# Slice type and Op. None Type and NoneConst.
+#
+from theano.gof import Apply, Constant, Op, Type
+from theano.gradient import DisconnectedType
+
+
+class MakeSlice(Op):
+    def make_node(self, slc):
+        return Apply(self,
+                     map(as_int_none_variable,
+                         [slc.start, slc.stop, slc.step]),
+                     [slicetype()])
+
+    def perform(self, node, inp, out_):
+        out, = out_
+        out[0] = slice(*inp)
+
+    def __str__(self):
+        return self.__class__.__name__
+
+    def __eq__(self, other):
+        return type(self) == type(other)
+
+    def __hash__(self):
+        return hash(type(self))
+
+    def grad(self, inputs, grads):
+        return [DisconnectedType()() for i in inputs]
+
+make_slice = MakeSlice()
+
+
+class SliceType(Type):
+
+    def filter(self, x, strict=False, allow_downcast=None):
+        if isinstance(x, slice):
+            return x
+        else:
+            raise TypeError('Expected a slice!')
+
+    def __str__(self):
+        return "slice"
+
+slicetype = SliceType()
+
+
+class NoneTypeT(Type):
+
+    def filter(self, x, strict=False, allow_downcast=None):
+        if x is None:
+            return x
+        else:
+            raise TypeError('Expected None!')
+
+    def __str__(self):
+        return "None"
+
+NoneConst = Constant(NoneTypeT(), None, name='None')