Merge pull request #6207 from gvtulder/f-bool-indexing

Add support for boolean indexing

Merge pull request #6207 from gvtulder/f-bool-indexing
5c92b461 · Frédéric Bastien · GitHub · 10214fbc · e1d281e2 · 5c92b461
--- a/doc/library/tensor/basic.txt
+++ b/doc/library/tensor/basic.txt
@@ -1109,42 +1109,9 @@ Like NumPy, Theano distinguishes between *basic* and *advanced* indexing.
 Theano fully supports basic indexing
 (see `NumPy's indexing  <http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html>`_) 
 and `integer advanced indexing
-<http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#integer>`_. We do not
+<http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#integer>`_.
-support boolean masks, as Theano does not have a boolean type (we use int8 for the output of 
+Since version 0.10.0 Theano also supports boolean indexing with boolean
-logic operators).
+NumPy arrays or Theano tensors.
-.. testsetup:: indexing
-   import theano
-   import numpy as np
-NumPy with a mask:
-.. doctest:: indexing
-   >>> n = np.arange(9).reshape(3,3)
-   >>> n[n > 4]
-   array([5, 6, 7, 8])
-Theano indexing with a "mask" (incorrect approach):
-.. doctest:: indexing
-   >>> t = theano.tensor.arange(9).reshape((3,3))
-   >>> t[t > 4].eval()  # an array with shape (3, 3, 3)  # doctest: +ELLIPSIS
-   Traceback (most recent call last):
-     ...
-   TypeError: TensorType does not support boolean mask for indexing such as tensor[x==0].
-   Instead you can use non_zeros() such as tensor[(x == 0).nonzeros()].
-   If you are indexing on purpose with an int8, please cast it to int16.
-Getting a Theano result like NumPy:
-.. doctest:: indexing
-   >>> t[(t > 4).nonzero()].eval()
-   array([5, 6, 7, 8])
 Index-assignment is *not* supported.  If you want to do something like ``a[5]
 = b`` or ``a[5]+=b``, see :func:`theano.tensor.set_subtensor` and :func:`theano.tensor.inc_subtensor` below.

--- a/theano/gpuarray/opt.py
+++ b/theano/gpuarray/opt.py
@@ -73,9 +73,11 @@ from .elemwise import (GpuElemwise, GpuDimShuffle, GpuCAReduceCuda,
 from .subtensor import (GpuIncSubtensor, GpuSubtensor,
                        GpuAdvancedSubtensor,
                        GpuAdvancedSubtensor1,
+                        GpuAdvancedBooleanSubtensor,
                        GpuAdvancedIncSubtensor,
                        GpuAdvancedIncSubtensor1,
                        GpuAdvancedIncSubtensor1_dev20,
+                        GpuAdvancedBooleanIncSubtensor,
                        GpuAllocDiag, GpuExtractDiag)
 from .opt_util import alpha_merge, output_merge, pad_dims, unpad_dims
 from .reduction import GpuMaxAndArgmax
@@ -1078,6 +1080,13 @@ def local_gpua_advanced_subtensor(op, context_name, inputs, outputs):
    return GpuAdvancedSubtensor()
+@register_opt('fast_compile')
+@op_lifter([tensor.AdvancedBooleanSubtensor])
+@register_opt2([tensor.AdvancedBooleanSubtensor], 'fast_compile')
+def local_gpua_advanced_boolean_subtensor(op, context_name, inputs, outputs):
+    return GpuAdvancedBooleanSubtensor()
 @register_opt('fast_compile')
 @op_lifter([tensor.AdvancedIncSubtensor1])
 @register_opt2([tensor.AdvancedIncSubtensor1], 'fast_compile')
@@ -1121,6 +1130,20 @@ def local_gpua_advanced_incsubtensor(op, context_name, inputs, outputs):
        return False
+# Do not register this optimization for now, as it slows down the
+# execution by a lot in important cases.
+# @register_opt('fast_compile')
+# @op_lifter([tensor.AdvancedBooleanIncSubtensor])
+# @register_opt2([tensor.AdvancedBooleanIncSubtensor], 'fast_compile')
+def local_gpua_advanced_boolean_incsubtensor(op, context_name, inputs, outputs):
+    # GpuAdvancedIncSubtensor only works with a single boolean mask,
+    # but not with fancy combinations.
+    if not op.set_instead_of_inc and len(inputs) == 3:
+        return GpuAdvancedBooleanIncSubtensor()
+    else:
+        return False
 @register_inplace()
 @local_optimizer([GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1_dev20])
 def local_advincsub1_gpua_inplace(node):

--- a/theano/gpuarray/subtensor.py
+++ b/theano/gpuarray/subtensor.py
@@ -2,7 +2,7 @@ from __future__ import absolute_import, print_function, division
 import numpy as np
 from six import integer_types
-from six.moves import StringIO
+from six.moves import StringIO, xrange
 from theano import tensor, gof, Op
 from theano.gof import ParamsType
@@ -479,24 +479,46 @@ if (err != GA_NO_ERROR) {
        return (0,)
-class GpuAdvancedSubtensor(HideC, tensor.AdvancedSubtensor):
+def check_and_convert_boolean_masks(input, idx_list):
    """
-    AdvancedSubtensor On the GPU.
+    This function checks if the boolean mask arrays in the index have
+    the right shape and converts them to index arrays by calling nonzero.
+    For each boolean mask, we check if the mask has the
+    same shape as the input. This is enforced in NumPy 0.13.0 and
+    newer, but not by earlier versions. If the size is not the same,
+    this method raises an IndexError.
    """
-    def make_node(self, x, *inputs):
+    dim_seen = 0
-        ctx_name = infer_context_name(x)
+    out_idx_list = []
-        rval = tensor.AdvancedSubtensor.make_node(self, x, *inputs)
+    for index in idx_list:
-        otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
+        if index is np.newaxis:
-                             broadcastable=rval.outputs[0].type.broadcastable,
+            # skip, does not count as an input dimension
-                             context_name=ctx_name)
+            out_idx_list.append(index)
-        x = as_gpuarray_variable(x, ctx_name)
+        elif isinstance(index, np.ndarray) and index.dtype == 'bool':
-        return gof.Apply(self, [x] + rval.inputs[1:], [otype()])
+            for i in xrange(index.ndim):
+                if index.shape[i] != input.shape[dim_seen + i]:
+                    raise IndexError('boolean index did not match indexed array '
+                                     'along dimension %d; dimension is %d but '
+                                     'corresponding boolean dimension is %d' %
+                                     (dim_seen + i, input.shape[dim_seen + i],
+                                      index.shape[i]))
+            dim_seen += index.ndim
+            out_idx_list += index.nonzero()
+        else:
+            dim_seen += 1
+            out_idx_list.append(index)
+    return out_idx_list
+class BaseGpuAdvancedSubtensor(object):
    def perform(self, node, inputs, out_):
        out, = out_
        x = inputs[0]
        idx = inputs[1:]
+        # convert boolean masks to index arrays
+        idx = check_and_convert_boolean_masks(x, idx)
        # detect and transpose array indices
        nidx = []
        nshp = list(x.shape)
@@ -598,21 +620,38 @@ class GpuAdvancedSubtensor(HideC, tensor.AdvancedSubtensor):
        out[0] = o
-class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedIncSubtensor):
+class GpuAdvancedSubtensor(HideC, BaseGpuAdvancedSubtensor, tensor.AdvancedSubtensor):
    """
-    Implement AdvancedIncSubtensor on the gpu.
+    AdvancedSubtensor on the GPU.
+    """
+    def make_node(self, x, *inputs):
+        ctx_name = infer_context_name(x)
+        # This method relies on AdvancedSubtensor.make_node to
+        # call tensor.subtensor.check_and_reject_bool(inputs),
+        # which raises an IndexError if there are any boolean indices.
+        rval = tensor.AdvancedSubtensor.make_node(self, x, *inputs)
+        otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
+                             broadcastable=rval.outputs[0].type.broadcastable,
+                             context_name=ctx_name)
+        x = as_gpuarray_variable(x, ctx_name)
+        return gof.Apply(self, [x] + rval.inputs[1:], [otype()])
+class GpuAdvancedBooleanSubtensor(HideC, BaseGpuAdvancedSubtensor, tensor.AdvancedBooleanSubtensor):
    """
-    def make_node(self, x, y, *inputs):
+    AdvancedBooleanSubtensor on the GPU.
-        ctx_name = infer_context_name(x, y)
+    """
-        rval = tensor.AdvancedIncSubtensor.make_node(self, x, y, *inputs)
+    def make_node(self, x, *inputs):
+        ctx_name = infer_context_name(x)
+        rval = tensor.AdvancedBooleanSubtensor.make_node(self, x, *inputs)
        otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
                             broadcastable=rval.outputs[0].type.broadcastable,
                             context_name=ctx_name)
        x = as_gpuarray_variable(x, ctx_name)
-        y = as_gpuarray_variable(y, ctx_name)
+        return gof.Apply(self, [x] + rval.inputs[1:], [otype()])
-        return gof.Apply(self, [x, y] + rval.inputs[2:], [otype()])
+class BaseGpuAdvancedIncSubtensor(object):
    def perform(self, node, inp, out_):
        out, = out_
        x = inp[0]
@@ -629,6 +668,9 @@ class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedIncSubtensor):
            if isinstance(idx[i], gpuarray.GpuArray):
                idx[i] = np.asarray(idx[i])
+        # convert boolean masks to index arrays
+        idx = check_and_convert_boolean_masks(x, idx)
        # Insert axes for None indexing
        nidx = []
        nshp = list(x.shape)
@@ -725,6 +767,38 @@ class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedIncSubtensor):
        out[0] = x_
+class GpuAdvancedIncSubtensor(HideC, BaseGpuAdvancedIncSubtensor, tensor.AdvancedIncSubtensor):
+    """
+    Implement AdvancedIncSubtensor on the gpu.
+    """
+    def make_node(self, x, y, *inputs):
+        ctx_name = infer_context_name(x, y)
+        rval = tensor.AdvancedIncSubtensor.make_node(self, x, y, *inputs)
+        otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
+                             broadcastable=rval.outputs[0].type.broadcastable,
+                             context_name=ctx_name)
+        x = as_gpuarray_variable(x, ctx_name)
+        y = as_gpuarray_variable(y, ctx_name)
+        return gof.Apply(self, [x, y] + rval.inputs[2:], [otype()])
+class GpuAdvancedBooleanIncSubtensor(HideC, BaseGpuAdvancedIncSubtensor, tensor.AdvancedBooleanIncSubtensor):
+    """
+    Implement AdvancedBooleanIncSubtensor on the gpu.
+    """
+    def make_node(self, x, y, *inputs):
+        ctx_name = infer_context_name(x, y)
+        rval = tensor.AdvancedBooleanIncSubtensor.make_node(self, x, y, *inputs)
+        otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
+                             broadcastable=rval.outputs[0].type.broadcastable,
+                             context_name=ctx_name)
+        x = as_gpuarray_variable(x, ctx_name)
+        y = as_gpuarray_variable(y, ctx_name)
+        return gof.Apply(self, [x, y] + rval.inputs[2:], [otype()])
 class GpuAdvancedIncSubtensor1(Op):
    """
    Implement AdvancedIncSubtensor1 on the gpu.

--- a/theano/gpuarray/tests/test_subtensor.py
+++ b/theano/gpuarray/tests/test_subtensor.py
@@ -13,6 +13,7 @@ from ..elemwise import GpuDimShuffle
 from ..subtensor import (GpuIncSubtensor, GpuSubtensor,
                         GpuAdvancedSubtensor1,
                         GpuAdvancedSubtensor,
+                         GpuAdvancedBooleanSubtensor,
                         GpuAdvancedIncSubtensor,
                         GpuAdvancedIncSubtensor1,
                         GpuAdvancedIncSubtensor1_dev20,
@@ -39,6 +40,8 @@ class G_subtensor(test_subtensor.T_subtensor):
            inc_sub=GpuIncSubtensor,
            adv_sub1=GpuAdvancedSubtensor1,
            adv_incsub1=GpuAdvancedIncSubtensor1,
+            adv_sub=GpuAdvancedSubtensor,
+            adv_bool_sub=GpuAdvancedBooleanSubtensor,
            dimshuffle=GpuDimShuffle,
            mode=mode_with_gpu,
            # avoid errors with limited devices
@@ -66,6 +69,8 @@ class G_subtensorF16(test_subtensor.T_subtensor):
            inc_sub=GpuIncSubtensor,
            adv_sub1=GpuAdvancedSubtensor1,
            adv_incsub1=GpuAdvancedIncSubtensor1,
+            adv_sub=GpuAdvancedSubtensor,
+            adv_bool_sub=GpuAdvancedBooleanSubtensor,
            dimshuffle=GpuDimShuffle,
            mode=mode_with_gpu,
            # avoid errors with limited devices

--- a/theano/tensor/subtensor.py
+++ b/theano/tensor/subtensor.py
 from __future__ import absolute_import, print_function, division
 import sys
 from textwrap import dedent
+import collections
 import warnings
 import logging
@@ -35,9 +36,15 @@ class AdvancedIndexingError(TypeError):
    Raised when Subtensor is asked to perform advanced indexing.
    """
+    pass
-    def __init__(self, *args):
-        TypeError.__init__(self, *args)
+class AdvancedBooleanIndexingError(TypeError):
+    """
+    Raised when Subtensor is asked to perform advanced indexing with boolean masks.
+    """
+    pass
 ##########
@@ -280,11 +287,6 @@ class Subtensor(Op):
    @todo: add support for advanced tensor indexing (in Subtensor_dx too).
    """
-    e_invalid = ('The index list is longer (size %d) than the number of '
-                 'dimensions of the tensor(namely %d). You are asking for '
-                 'a dimension of the tensor that does not exist! You might '
-                 'need to use dimshuffle to add extra dimension to your '
-                 'tensor.')
    e_subslice = 'nested slicing is not supported'
    e_indextype = "Invalid index type or slice for Subtensor"
    debug = 0
@@ -341,6 +343,11 @@ class Subtensor(Op):
                        theano.tensor.wscalar, theano.tensor.bscalar]
        invalid_tensor_types = [theano.tensor.fscalar, theano.tensor.dscalar,
                                theano.tensor.cscalar, theano.tensor.zscalar]
+        if (isinstance(entry, (np.ndarray, theano.tensor.Variable)) and
+                hasattr(entry, 'dtype') and entry.dtype == 'bool'):
+            raise AdvancedBooleanIndexingError(Subtensor.e_indextype, entry)
        if (isinstance(entry, gof.Variable) and
            (entry.type in invalid_scal_types or
             entry.type in invalid_tensor_types)):
@@ -473,10 +480,7 @@ class Subtensor(Op):
        idx_list = list(self.idx_list)
        if len(idx_list) > x.type.ndim:
-            exception = ValueError(Subtensor.e_invalid % (
+            raise IndexError('too many indices for array')
-                len(idx_list), x.type.ndim))
-            exception.subtensor_invalid = True
-            raise exception
        input_types = Subtensor.collapse(idx_list,
                                         lambda entry: isinstance(entry,
@@ -1108,6 +1112,13 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,
        the_op = AdvancedIncSubtensor(inplace,
                                      set_instead_of_inc=set_instead_of_inc)
        return the_op(real_x, y, *ilist)
+    elif isinstance(x.owner.op, AdvancedBooleanSubtensor):
+        real_x = x.owner.inputs[0]
+        ilist = x.owner.inputs[1:]
+        the_op = AdvancedBooleanIncSubtensor(inplace,
+                                             set_instead_of_inc=set_instead_of_inc)
+        return the_op(real_x, y, *ilist)
    elif isinstance(x.owner.op, DimShuffle):
        inner_x = x.owner.inputs[0]
        # In the dimshuffle case, there are in fact two dimshuffles:
@@ -1292,12 +1303,7 @@ class IncSubtensor(Op):
        idx_list = list(self.idx_list)
        if len(idx_list) > x.type.ndim:
-            exception = ValueError(
+            raise IndexError('too many indices for array')
-                Subtensor.e_invalid % (
-                    len(idx_list),
-                    x.type.ndim))
-            exception.subtensor_invalid = True
-            raise exception
        input_types = Subtensor.collapse(
            idx_list,
@@ -2062,8 +2068,8 @@ def as_index_variable(idx):
    if isinstance(idx, gof.Variable) and isinstance(idx.type, NoneTypeT):
        return idx
    idx = theano.tensor.as_tensor_variable(idx)
-    if idx.type.dtype not in theano.tensor.integer_dtypes:
+    if idx.type.dtype not in theano.tensor.discrete_dtypes:
-        raise TypeError('index must be integers')
+        raise TypeError('index must be integers or a boolean mask')
    return idx
@@ -2091,7 +2097,10 @@ def adv_index_broadcastable_pattern(a, idx):
        if isinstance(v.type, SliceType):
            return slice(None, None)
-        return np.zeros((2,) * v.ndim, int)
+        if v.dtype == 'bool':
+            return np.ones((2,) * v.ndim, v.dtype)
+        else:
+            return np.zeros((2,) * v.ndim, int)
    newidx = tuple(map(replace_slice, idx))
@@ -2101,14 +2110,58 @@ def adv_index_broadcastable_pattern(a, idx):
    return tuple([dim == 1 for dim in retshape])
-class AdvancedSubtensor(Op):
+def check_advanced_indexing_dimensions(input, idx_list):
    """
-    Return a subtensor copy, using advanced indexing.
+    This function checks if the index list in idx_list is correct.
+    If there are any boolean masks, we check if the mask has the
+    same shape as the input. This is enforced in NumPy 0.13.0 and
+    newer, but not by earlier versions. If the size is not the same,
+    this method raises an IndexError.
+    """
+    dim_seen = 0
+    for index in idx_list:
+        if index is np.newaxis:
+            # skip, does not count as an input dimension
+            pass
+        elif isinstance(index, np.ndarray) and index.dtype == 'bool':
+            for i in xrange(index.ndim):
+                if index.shape[i] != input.shape[dim_seen + i]:
+                    raise IndexError('boolean index did not match indexed array '
+                                     'along dimension %d; dimension is %d but '
+                                     'corresponding boolean dimension is %d' %
+                                     (dim_seen + i, input.shape[dim_seen + i],
+                                      index.shape[i]))
+            dim_seen += index.ndim
+        else:
+            dim_seen += 1
+def check_and_reject_bool(args_el):
+    try:
+        if (isinstance(args_el, (np.bool_, bool)) or
+                args_el.dtype == 'bool'):
+            raise TypeError('AdvancedSubtensor does not support boolean '
+                            'masks for indexing. Use AdvancedBooleanSubtensor '
+                            'instead. ')
+    except AttributeError:
+        pass
+    if (not isinstance(args_el, theano.tensor.Variable) and
+            isinstance(args_el, collections.Iterable)):
+        for el in args_el:
+            check_and_reject_bool(el)
+class BaseAdvancedSubtensor(Op):
    """
+    Abstract base class for AdvancedSubtensor and AdvancedBooleanSubtensor.
+    Implements advanced indexing with boolean masks.
-    # Should be used by __getitem__ and __getslice__, as follow:
+    """
-    # AdvancedSubtensor()(self, *args),
+    # Should be used by __getitem__ and __getslice__, as follows:
+    # AdvancedSubtensor()(self, *args) or
+    # AdvancedBooleanSubtensor()(self, *args),
    # if args contains and advanced indexing pattern
    __props__ = ()
@@ -2128,24 +2181,12 @@ class AdvancedSubtensor(Op):
        return self.make_node(eval_points[0], *inputs[1:]).outputs
    def infer_shape(self, node, ishapes):
-        # Really special case
-        if len(ishapes) == 3:
-            xshp, ind1shp, ind2shp = ishapes
-            if (len(xshp) == 2 and
-                    ind1shp is not None and len(ind1shp) == 1 and
-                    ind2shp is not None and len(ind2shp) == 1):
-                # if the graph is correct, we can assume ind1shp[0] and
-                # ind2shp[0] will have the same value.
-                # Try to return the one closest to the graph input.
-                if node.inputs[2].owner is None:
-                    return [ind2shp]
-                else:
-                    return [ind1shp]
        # Default case, we don't know
        raise theano.tensor.basic.ShapeError("case not implemented")
    def perform(self, node, inputs, out_):
        out, = out_
+        check_advanced_indexing_dimensions(inputs[0], inputs[1:])
        rval = inputs[0].__getitem__(inputs[1:])
        # When there are no arrays, we are not actually doing advanced
        # indexing, so __getitem__ will not return a copy.
@@ -2163,6 +2204,37 @@ class AdvancedSubtensor(Op):
        return rval
+class AdvancedSubtensor(BaseAdvancedSubtensor):
+    """
+    Return a subtensor copy, using advanced indexing.
+    """
+    # Should be used by __getitem__ and __getslice__, as follows:
+    # AdvancedSubtensor()(self, *args),
+    # if args contains and advanced indexing pattern
+    def make_node(self, x, *index):
+        check_and_reject_bool(index)
+        return super(AdvancedSubtensor, self).make_node(x, *index)
+    def infer_shape(self, node, ishapes):
+        # Really special case
+        if len(ishapes) == 3:
+            xshp, ind1shp, ind2shp = ishapes
+            if (len(xshp) == 2 and
+                    ind1shp is not None and len(ind1shp) == 1 and
+                    ind2shp is not None and len(ind2shp) == 1):
+                # if the graph is correct, we can assume ind1shp[0] and
+                # ind2shp[0] will have the same value.
+                # Try to return the one closest to the graph input.
+                if node.inputs[2].owner is None:
+                    return [ind2shp]
+                else:
+                    return [ind1shp]
+        return super(AdvancedSubtensor, self).infer_shape(node, ishapes)
    def grad(self, inputs, grads):
        gz, = grads
        x = inputs[0]
@@ -2173,8 +2245,29 @@ class AdvancedSubtensor(Op):
 advanced_subtensor = AdvancedSubtensor()
-class AdvancedIncSubtensor(Op):
+class AdvancedBooleanSubtensor(BaseAdvancedSubtensor):
+    """
+    Return a subtensor copy, using advanced indexing with boolean masks.
    """
+    # Should be used by __getitem__ and __getslice__, as follows:
+    # AdvancedBooleanSubtensor()(self, *args),
+    # if args contains and advanced indexing pattern with boolean masks
+    def grad(self, inputs, grads):
+        gz, = grads
+        x = inputs[0]
+        rest = inputs[1:]
+        return [advanced_boolean_inc_subtensor(theano.tensor.zeros_like(x), gz,
+                                               *rest)] + \
+            [DisconnectedType()()] * len(rest)
+advanced_boolean_subtensor = AdvancedBooleanSubtensor()
+class BaseAdvancedIncSubtensor(Op):
+    """
+    Base class for AdvancedIncSubtensor and AdvancedBooleanIncSubtensor.
    Increments a subtensor using advanced indexing.
    """
@@ -2215,6 +2308,8 @@ class AdvancedIncSubtensor(Op):
        # TODO: 1. opt to make this in place 2. generalize as described in
        # AdvancedSubtensor's perform TODO
+        check_advanced_indexing_dimensions(inputs[0], inputs[2:])
        out, = out_
        if not self.inplace:
            out[0] = inputs[0].copy()
@@ -2238,6 +2333,22 @@ class AdvancedIncSubtensor(Op):
        return rval
+    def R_op(self, inputs, eval_points):
+        if None in eval_points[:2]:
+            return [None]
+        return self.make_node(eval_points[0], eval_points[1],
+                              *inputs[2:]).outputs
+class AdvancedIncSubtensor(BaseAdvancedIncSubtensor):
+    """
+    Increments a subtensor using advanced indexing.
+    """
+    def make_node(self, x, y, *inputs):
+        check_and_reject_bool(inputs)
+        return super(AdvancedIncSubtensor, self).make_node(x, y, *inputs)
    def grad(self, inpt, output_gradients):
        x, y = inpt[:2]
        idxs = inpt[2:]
@@ -2265,16 +2376,46 @@ class AdvancedIncSubtensor(Op):
            gy = _sum_grad_over_bcasted_dims(y, gy)
        return [gx, gy] + \
            [DisconnectedType()() for _ in idxs]
-    def R_op(self, inputs, eval_points):
-        if None in eval_points[:2]:
-            return [None]
-        return self.make_node(eval_points[0], eval_points[1],
-                              *inputs[2:]).outputs
 advanced_inc_subtensor = AdvancedIncSubtensor()
 advanced_set_subtensor = AdvancedIncSubtensor(set_instead_of_inc=True)
+class AdvancedBooleanIncSubtensor(BaseAdvancedIncSubtensor):
+    """
+    Increments a subtensor using advanced indexing with boolean masks.
+    """
+    def grad(self, inpt, output_gradients):
+        x, y = inpt[:2]
+        idxs = inpt[2:]
+        outgrad, = output_gradients
+        if x.dtype in theano.tensor.discrete_dtypes:
+            # The output dtype is the same as x
+            gx = x.zeros_like(dtype=theano.config.floatX)
+            if y.dtype in theano.tensor.discrete_dtypes:
+                gy = y.zeros_like(dtype=theano.config.floatX)
+            else:
+                gy = y.zeros_like()
+        elif x.dtype in theano.tensor.complex_dtypes:
+            raise NotImplementedError("No support for complex grad yet")
+        else:
+            if self.set_instead_of_inc:
+                gx = advanced_set_subtensor(
+                    outgrad,
+                    y.zeros_like(),
+                    *idxs)
+            else:
+                gx = outgrad
+            gy = advanced_boolean_subtensor(outgrad, *idxs)
+            # Make sure to sum gy over the dimensions of y that have been
+            # added or broadcasted
+            gy = _sum_grad_over_bcasted_dims(y, gy)
+        return [gx, gy] + \
+            [DisconnectedType()() for _ in idxs]
+advanced_boolean_inc_subtensor = AdvancedBooleanIncSubtensor()
+advanced_boolean_set_subtensor = AdvancedBooleanIncSubtensor(set_instead_of_inc=True)
 def take(a, indices, axis=None, mode='raise'):
    a = theano.tensor.as_tensor_variable(a)
    indices = theano.tensor.as_tensor_variable(indices)

--- a/theano/tensor/tests/test_subtensor.py
+++ b/theano/tensor/tests/test_subtensor.py
@@ -55,6 +55,8 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
                 adv_sub1=tensor.AdvancedSubtensor1,
                 adv_incsub1=tensor.AdvancedIncSubtensor1,
                 adv_sub=tensor.AdvancedSubtensor,
+                 adv_bool_sub=tensor.AdvancedBooleanSubtensor,
+                 adv_bool_inc_sub=tensor.AdvancedBooleanIncSubtensor,
                 mode=None,
                 dtype=theano.config.floatX,
                 type=tensor.TensorType,
@@ -66,6 +68,8 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
        self.adv_sub1 = adv_sub1
        self.adv_incsub1 = adv_incsub1
        self.adv_sub = adv_sub
+        self.adv_bool_sub = adv_bool_sub
+        self.adv_bool_inc_sub = adv_bool_inc_sub
        self.dimshuffle = dimshuffle
        if mode is None:
            mode = theano.compile.mode.get_default_mode()
@@ -75,7 +79,8 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
        self.type = type
        self.ignore_topo = ignore_topo
        self.fast_compile = theano.config.mode == 'FAST_COMPILE'
-        self.ops = (sub, inc_sub, adv_sub1, adv_incsub1)
+        self.ops = (sub, inc_sub, adv_sub1, adv_incsub1,
+                    adv_bool_sub, adv_bool_inc_sub)
        return super(T_subtensor, self).__init__(name)
    def function(self, inputs, outputs, accept_inplace=False,
@@ -120,12 +125,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
    def test0_err_invalid(self):
        # it is impossible to retrieve a view of a 0-d tensor
        n = self.shared(np.ones((), dtype=self.dtype))
-        try:
+        self.assertRaises(IndexError, n.__getitem__, 0)
-            n[0]
-        except ValueError as e:
-            self.assertTrue(hasattr(e, 'subtensor_invalid'))
-            return
-        self.fail()
    @change_flags(compute_test_value='off')
    def test1_err_bounds(self):
@@ -184,12 +184,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
    def test1_err_invalid(self):
        n = self.shared(np.ones(1, dtype=self.dtype))
-        try:
+        self.assertRaises(IndexError, n.__getitem__, (0, 0))
-            n[0, 0]
-        except ValueError as e:
-            self.assertTrue(hasattr(e, 'subtensor_invalid'))
-            return
-        self.fail()
    def test1_ok_elem(self):
        n = self.shared(np.ones(1, dtype=self.dtype) * 5)
@@ -362,6 +357,112 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
            assert_equal(tval.shape, numpy_tval.shape)
            assert_array_equal(tval, numpy_tval)
+    def test_boolean(self):
+        def numpy_inc_subtensor(x, idx, a):
+            x = x.copy()
+            x[idx] += a
+            return x
+        numpy_n = np.arange(6, dtype=self.dtype).reshape((2, 3))
+        n = self.shared(numpy_n)
+        # indexing with a mask for some dimensions
+        mask = np.array([True, False])
+        val = self.eval_output_and_check(n[mask], op_type=self.adv_bool_sub)
+        assert_array_equal(numpy_n[mask], val)
+        val = self.eval_output_and_check(inc_subtensor(n[mask], 1),
+                                         op_type=self.adv_bool_inc_sub)
+        assert_array_equal(numpy_inc_subtensor(numpy_n, mask, 1), val)
+        assert_array_equal(numpy_inc_subtensor(numpy_n, mask, numpy_n[mask]),
+                           inc_subtensor(n[mask], n[mask]).eval())
+        # test gradient
+        utt.verify_grad(lambda m: m[mask], [numpy_n])
+        utt.verify_grad(lambda m: inc_subtensor(m[mask], 1), [numpy_n])
+        # indexing with a comparison (should translate to a boolean mask)
+        assert_array_equal(numpy_n[numpy_n > 2], n[n > 2].eval())
+        assert_array_equal(numpy_n[[0], numpy_n[0] > 2], n[[0], n[0] > 2].eval())
+        assert_array_equal(numpy_n[[1], numpy_n[0] > 2], n[[1], n[0] > 2].eval())
+        # indexing with a mask for the second dimension
+        mask = np.array([True, False, True])
+        assert_array_equal(numpy_n[0, mask], n[0, mask].eval())
+        assert_array_equal(numpy_n[:, mask], n[:, mask].eval())
+        assert_array_equal(numpy_n[:, mask], n[:, self.shared(mask)].eval())
+        assert_array_equal(numpy_n[1:, mask], n[1:, mask].eval())
+        assert_array_equal(numpy_n[:1, mask], n[:1, mask].eval())
+        assert_array_equal(numpy_n[1:, mask, np.newaxis], n[1:, mask, np.newaxis].eval())
+        assert_array_equal(numpy_n[np.newaxis, 1:, mask], n[np.newaxis, 1:, mask].eval())
+        assert_array_equal(numpy_inc_subtensor(numpy_n, [0, mask], 1),
+                           inc_subtensor(n[(0,) + mask.nonzero()], 1).eval())
+        assert_array_equal(numpy_inc_subtensor(numpy_n, [0, mask], 1),
+                           inc_subtensor(n[0, mask], 1).eval())
+        assert_array_equal(numpy_inc_subtensor(numpy_n, [slice(None), mask], 1),
+                           inc_subtensor(n[:, mask], 1).eval())
+        # indexing with a boolean ndarray
+        mask = np.array([[True, False, True], [False, False, True]])
+        assert_array_equal(numpy_n[mask], n[mask].eval())
+        assert_array_equal(numpy_n[mask], n[self.shared(mask)].eval())
+        assert_array_equal(numpy_inc_subtensor(numpy_n, mask, 1),
+                           inc_subtensor(n[mask], 1).eval())
+        # indexing with ellipsis
+        numpy_n4 = np.arange(48, dtype=self.dtype).reshape((2, 3, 4, 2))
+        n4 = self.shared(numpy_n4)
+        assert_array_equal(numpy_n4[numpy_n > 2, ...], n4[n > 2, ...].eval())
+        assert_array_equal(numpy_n4[numpy_n > 2, ..., 1], n4[n > 2, ..., 1].eval())
+        assert_array_equal(numpy_n4[numpy_n > 2, ..., 0, 1], n4[n > 2, ..., 0, 1].eval())
+        assert_array_equal(numpy_inc_subtensor(numpy_n4, [numpy_n > 2, Ellipsis], 1),
+                           inc_subtensor(n4[n > 2, ...], 1).eval())
+        assert_array_equal(numpy_inc_subtensor(numpy_n4, [numpy_n > 2, Ellipsis, 1], 1),
+                           inc_subtensor(n4[n > 2, ..., 1], 1).eval())
+        assert_array_equal(numpy_inc_subtensor(numpy_n4, [numpy_n > 2, Ellipsis, 0, 1], 1),
+                           inc_subtensor(n4[n > 2, ..., 0, 1], 1).eval())
+        # the boolean mask should have the correct shape
+        # - too large, padded with True
+        mask = np.array([True, False, True])
+        self.assertRaises(IndexError, n[mask].eval)
+        self.assertRaises(IndexError, n[mask, ...].eval)
+        self.assertRaises(IndexError, inc_subtensor(n[mask], 1).eval)
+        self.assertRaises(IndexError, inc_subtensor(n[mask, ...], 1).eval)
+        mask = np.array([[True, False, False, True], [False, True, False, True]])
+        self.assertRaises(IndexError, n[mask].eval)
+        self.assertRaises(IndexError, inc_subtensor(n[mask], 1).eval)
+        # - too large, padded with False (this works in NumPy < 0.13.0)
+        mask = np.array([True, False, False])
+        self.assertRaises(IndexError, n[mask].eval)
+        self.assertRaises(IndexError, n[mask, ...].eval)
+        self.assertRaises(IndexError, inc_subtensor(n[mask], 1).eval)
+        self.assertRaises(IndexError, inc_subtensor(n[mask, ...], 1).eval)
+        mask = np.array([[True, False, False, False], [False, True, False, False]])
+        self.assertRaises(IndexError, n[mask].eval)
+        self.assertRaises(IndexError, inc_subtensor(n[mask], 1).eval)
+        # - mask too small (this works in NumPy < 0.13.0)
+        mask = np.array([True])
+        self.assertRaises(IndexError, n[mask].eval)
+        self.assertRaises(IndexError, n[mask, ...].eval)
+        self.assertRaises(IndexError, inc_subtensor(n[mask], 1).eval)
+        self.assertRaises(IndexError, inc_subtensor(n[mask, ...], 1).eval)
+        mask = np.array([[True], [True]])
+        self.assertRaises(IndexError, n[mask].eval)
+        self.assertRaises(IndexError, inc_subtensor(n[mask], 1).eval)
+        # - too many dimensions
+        mask = np.array([[[True, False, False],
+                          [False, True, False]]])
+        self.assertRaises(IndexError, n.__getitem__, mask)
+        self.assertRaises(IndexError, n.__getitem__, mask)
+        # special cases: Python bools and bools nested in Python arrays are not supported
+        self.assertRaises(TypeError, n.__getitem__, (True,))
+        self.assertRaises(TypeError, n.__getitem__, (False,))
+        self.assertRaises(TypeError, n.__getitem__, (True, False))
+        self.assertRaises(TypeError, n.__getitem__, ([True, False]))
+        self.assertRaises(TypeError, n.__getitem__, ([0, 1], [0, False]))
+        self.assertRaises(TypeError, n.__getitem__, ([0, 1], [0, theano.shared(True)]))
    def test_newaxis(self):
        """
        newaxis support comes from logic in the __getitem__ of TensorType
@@ -534,7 +635,7 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
    def test_err_invalid_list(self):
        n = self.shared(np.asarray(5, dtype=self.dtype))
-        self.assertRaises(TypeError, n.__getitem__, [0, 0])
+        self.assertRaises(IndexError, n.__getitem__, [0, 0])
    def test_err_invalid_2list_dtype(self):
        n = self.shared(np.ones((3, 3), dtype=self.dtype) * 5)
@@ -1284,7 +1385,7 @@ class TestIncSubtensor1(unittest.TestCase):
        self.adv1q = tensor.lvector()  # advanced 1d query
    def test_cant_adv_idx_into_scalar(self):
-        self.assertRaises(TypeError, lambda: self.s[self.adv1q])
+        self.assertRaises(IndexError, lambda: self.s[self.adv1q])
    def test_index_into_vec_w_vec(self):
        a = self.v[self.adv1q]
@@ -1433,7 +1534,7 @@ class TestAdvancedSubtensor(unittest.TestCase):
        return tval
    def test_cant_adv_idx_into_scalar(self):
-        self.assertRaises(TypeError, lambda: self.s[self.ix1])
+        self.assertRaises(IndexError, lambda: self.s[self.ix1])
    def test_index_into_vec_w_vec(self):
        a = self.v[self.ix1]
@@ -1890,3 +1991,17 @@ class TestInferShape(utt.InferShapeTester):
                                [admat[1:3, aivec]],
                                [admat_val, aivec_val], AdvancedSubtensor,
                                check_topo=False)
+    def test_boolean(self):
+        n = dmatrix()
+        n_val = np.arange(6).reshape((2, 3))
+        # infer_shape is not implemented, but it should not crash
+        self._compile_and_check([n],
+                                [n[n[:, 0] > 2, n[0, :] > 2]],
+                                [n_val], tensor.AdvancedBooleanSubtensor,
+                                check_topo=False)
+        self._compile_and_check([n],
+                                [n[n[:, 0] > 2]],
+                                [n_val], tensor.AdvancedBooleanSubtensor,
+                                check_topo=False)
--- a/theano/tensor/var.py
+++ b/theano/tensor/var.py
@@ -460,23 +460,16 @@ class _tensor_py_operators(object):
    # SLICING/INDEXING
    def __getitem__(self, args):
-        def check_bool(args_el):
+        def includes_bool(args_el):
-            try:
+            if (isinstance(args_el, (np.bool_, bool)) or
-                if (isinstance(args_el, (np.bool_, bool)) or
+                    (hasattr(args_el, 'dtype') and args_el.dtype == 'bool')):
-                        args_el.dtype == 'bool'):
+                return True
-                    raise TypeError('TensorType does not support boolean '
-                                    'mask for indexing such as tensor[x==0]. '
-                                    'Instead you can use non_zeros() such as '
-                                    'tensor[(x == 0).nonzeros()]. ')
-            except AttributeError:
-                pass
            if (not isinstance(args_el, theano.tensor.Variable) and
                    isinstance(args_el, collections.Iterable)):
                for el in args_el:
-                    check_bool(el)
+                    if includes_bool(el):
+                        return True
-        check_bool(args)
+            return False
        if (isinstance(args, list) and
                any([isinstance(a, slice) for a in args])):
@@ -484,22 +477,48 @@ class _tensor_py_operators(object):
        elif not isinstance(args, tuple):
            args = args,
+        # Count the dimensions, check for bools and find ellipses.
+        ellipses = []
+        index_dim_count = 0
+        for i, arg in enumerate(args):
+            if arg is np.newaxis:
+                # no increase in index_dim_count
+                pass
+            elif arg is Ellipsis:
+                # no increase in index_dim_count
+                ellipses.append(i)
+            elif (isinstance(arg, (np.ndarray, theano.tensor.Variable)) and
+                    hasattr(arg, 'dtype') and arg.dtype == 'bool'):
+                index_dim_count += arg.ndim
+            else:
+                # Python arrays can contain a mixture of bools and integers,
+                # which requires complex rules to handle all special cases.
+                # These rules differ slightly between NumPy versions.
+                # Since earlier versions of Theano did not support any boolean
+                # indexing, it is safe to throw an error if we encounter
+                # any of these difficult cases.
+                if includes_bool(arg):
+                    raise TypeError('TensorType does not support Python bools '
+                                    'for indexing, such as tensor[[True, False]]. '
+                                    'To use a boolean mask, convert the mask to '
+                                    'a NumPy array first, e.g., '
+                                    'tensor[numpy.array([True, False])].')
+                index_dim_count += 1
+        # Check if the number of dimensions isn't too large.
+        if self.ndim < index_dim_count:
+            raise IndexError('too many indices for array')
        # Convert an Ellipsis if provided into an appropriate number of
        # slice(None).
-        ellipses = [i
-                    for i, index in enumerate(args)
-                    if index is Ellipsis]
        if len(ellipses) > 1:
            raise IndexError(
                "an index can only have a single Ellipsis (`...`)")
        elif len(ellipses) == 1:
-            new_axes = sum(1
-                           for index in args
-                           if index is np.newaxis)  # numpy.newaxis is None
            ellipsis_at = ellipses[0]
            args = list(args)
            args[ellipsis_at: ellipsis_at + 1] = (
-                [slice(None)] * (self.ndim - (len(args) - 1 - new_axes)))
+                [slice(None)] * (self.ndim - index_dim_count))
        # Force input to be int64 datatype if input is an empty list or tuple
        # Else leave it as is if it is a real number
@@ -510,8 +529,10 @@ class _tensor_py_operators(object):
        # Determine if advanced indexing is needed or not
        # The logic is already in Subtensor.convert: if it succeeds,
        # standard indexing is used; if it fails with
-        # AdvancedIndexingError, advanced indexing
+        # AdvancedIndexingError, advanced indexing, or
+        # AdvancedBooleanIndexingError, advanced indexing with boolean masks
        advanced = False
+        advanced_boolean = False
        axis = None
        for i, arg in enumerate(args):
            try:
@@ -524,13 +545,20 @@ class _tensor_py_operators(object):
                else:
                    advanced = True
                    axis = i
+            except theano.tensor.subtensor.AdvancedBooleanIndexingError:
-        if advanced:
+                advanced = False
+                advanced_boolean = True
+                break
+        if advanced_boolean:
+            return theano.tensor.subtensor.advanced_boolean_subtensor(self, *args)
+        elif advanced:
            if (axis is not None and
                all(isinstance(a, slice) and
                    equal_slices(a, slice(None)) for a in args[:axis]) and
                all(isinstance(a, slice) and
                    equal_slices(a, slice(None)) for a in args[axis + 1:]) and
+                (not hasattr(args[axis], 'dtype') or args[axis].dtype != 'bool') and
                isinstance(args[axis],
                           (np.ndarray, list,
                            TensorVariable, TensorConstant,