Merge pull request #6207 from gvtulder/f-bool-indexing

Add support for boolean indexing

Merge pull request #6207 from gvtulder/f-bool-indexing
5c92b461 · Frédéric Bastien · GitHub · 10214fbc · e1d281e2 · 5c92b461
--- a/doc/library/tensor/basic.txt
+++ b/doc/library/tensor/basic.txt
@@ -1109,42 +1109,9 @@ Like NumPy, Theano distinguishes between *basic* and *advanced* indexing.
 Theano fully supports basic indexing
 (see `NumPy's indexing  <http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html>`_) 
 and `integer advanced indexing
-<http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#integer>`_. We do not
+<http://docs.scipy.org/doc/numpy/reference/arrays.indexing.html#integer>`_.
-support boolean masks, as Theano does not have a boolean type (we use int8 for the output of 
+Since version 0.10.0 Theano also supports boolean indexing with boolean
-logic operators).
+NumPy arrays or Theano tensors.
-.. testsetup:: indexing
-   import theano
-   import numpy as np
-NumPy with a mask:
-.. doctest:: indexing
-   >>> n = np.arange(9).reshape(3,3)
-   >>> n[n > 4]
-   array([5, 6, 7, 8])
-Theano indexing with a "mask" (incorrect approach):
-.. doctest:: indexing
-   >>> t = theano.tensor.arange(9).reshape((3,3))
-   >>> t[t > 4].eval()  # an array with shape (3, 3, 3)  # doctest: +ELLIPSIS
-   Traceback (most recent call last):
-     ...
-   TypeError: TensorType does not support boolean mask for indexing such as tensor[x==0].
-   Instead you can use non_zeros() such as tensor[(x == 0).nonzeros()].
-   If you are indexing on purpose with an int8, please cast it to int16.
-Getting a Theano result like NumPy:
-.. doctest:: indexing
-   >>> t[(t > 4).nonzero()].eval()
-   array([5, 6, 7, 8])
 Index-assignment is *not* supported.  If you want to do something like ``a[5]
 = b`` or ``a[5]+=b``, see :func:`theano.tensor.set_subtensor` and :func:`theano.tensor.inc_subtensor` below.

--- a/theano/gpuarray/opt.py
+++ b/theano/gpuarray/opt.py
@@ -73,9 +73,11 @@ from .elemwise import (GpuElemwise, GpuDimShuffle, GpuCAReduceCuda,
 from .subtensor import (GpuIncSubtensor, GpuSubtensor,
                        GpuAdvancedSubtensor,
                        GpuAdvancedSubtensor1,
+                        GpuAdvancedBooleanSubtensor,
                        GpuAdvancedIncSubtensor,
                        GpuAdvancedIncSubtensor1,
                        GpuAdvancedIncSubtensor1_dev20,
+                        GpuAdvancedBooleanIncSubtensor,
                        GpuAllocDiag, GpuExtractDiag)
 from .opt_util import alpha_merge, output_merge, pad_dims, unpad_dims
 from .reduction import GpuMaxAndArgmax
@@ -1078,6 +1080,13 @@ def local_gpua_advanced_subtensor(op, context_name, inputs, outputs):
    return GpuAdvancedSubtensor()
+@register_opt('fast_compile')
+@op_lifter([tensor.AdvancedBooleanSubtensor])
+@register_opt2([tensor.AdvancedBooleanSubtensor], 'fast_compile')
+def local_gpua_advanced_boolean_subtensor(op, context_name, inputs, outputs):
+    return GpuAdvancedBooleanSubtensor()
 @register_opt('fast_compile')
 @op_lifter([tensor.AdvancedIncSubtensor1])
 @register_opt2([tensor.AdvancedIncSubtensor1], 'fast_compile')
@@ -1121,6 +1130,20 @@ def local_gpua_advanced_incsubtensor(op, context_name, inputs, outputs):
        return False
+# Do not register this optimization for now, as it slows down the
+# execution by a lot in important cases.
+# @register_opt('fast_compile')
+# @op_lifter([tensor.AdvancedBooleanIncSubtensor])
+# @register_opt2([tensor.AdvancedBooleanIncSubtensor], 'fast_compile')
+def local_gpua_advanced_boolean_incsubtensor(op, context_name, inputs, outputs):
+    # GpuAdvancedIncSubtensor only works with a single boolean mask,
+    # but not with fancy combinations.
+    if not op.set_instead_of_inc and len(inputs) == 3:
+        return GpuAdvancedBooleanIncSubtensor()
+    else:
+        return False
 @register_inplace()
 @local_optimizer([GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1_dev20])
 def local_advincsub1_gpua_inplace(node):

--- a/theano/gpuarray/subtensor.py
+++ b/theano/gpuarray/subtensor.py
@@ -2,7 +2,7 @@ from __future__ import absolute_import, print_function, division
 import numpy as np
 from six import integer_types
-from six.moves import StringIO
+from six.moves import StringIO, xrange
 from theano import tensor, gof, Op
 from theano.gof import ParamsType
@@ -479,24 +479,46 @@ if (err != GA_NO_ERROR) {
        return (0,)
-class GpuAdvancedSubtensor(HideC, tensor.AdvancedSubtensor):
+def check_and_convert_boolean_masks(input, idx_list):
    """
-    AdvancedSubtensor On the GPU.
+    This function checks if the boolean mask arrays in the index have
+    the right shape and converts them to index arrays by calling nonzero.
+    For each boolean mask, we check if the mask has the
+    same shape as the input. This is enforced in NumPy 0.13.0 and
+    newer, but not by earlier versions. If the size is not the same,
+    this method raises an IndexError.
    """
-    def make_node(self, x, *inputs):
+    dim_seen = 0
-        ctx_name = infer_context_name(x)
+    out_idx_list = []
-        rval = tensor.AdvancedSubtensor.make_node(self, x, *inputs)
+    for index in idx_list:
-        otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
+        if index is np.newaxis:
-                             broadcastable=rval.outputs[0].type.broadcastable,
+            # skip, does not count as an input dimension
-                             context_name=ctx_name)
+            out_idx_list.append(index)
-        x = as_gpuarray_variable(x, ctx_name)
+        elif isinstance(index, np.ndarray) and index.dtype == 'bool':
-        return gof.Apply(self, [x] + rval.inputs[1:], [otype()])
+            for i in xrange(index.ndim):
+                if index.shape[i] != input.shape[dim_seen + i]:
+                    raise IndexError('boolean index did not match indexed array '
+                                     'along dimension %d; dimension is %d but '
+                                     'corresponding boolean dimension is %d' %
+                                     (dim_seen + i, input.shape[dim_seen + i],
+                                      index.shape[i]))
+            dim_seen += index.ndim
+            out_idx_list += index.nonzero()
+        else:
+            dim_seen += 1
+            out_idx_list.append(index)
+    return out_idx_list
+class BaseGpuAdvancedSubtensor(object):
    def perform(self, node, inputs, out_):
        out, = out_
        x = inputs[0]
        idx = inputs[1:]
+        # convert boolean masks to index arrays
+        idx = check_and_convert_boolean_masks(x, idx)
        # detect and transpose array indices
        nidx = []
        nshp = list(x.shape)
@@ -598,21 +620,38 @@ class GpuAdvancedSubtensor(HideC, tensor.AdvancedSubtensor):
        out[0] = o
-class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedIncSubtensor):
+class GpuAdvancedSubtensor(HideC, BaseGpuAdvancedSubtensor, tensor.AdvancedSubtensor):
    """
-    Implement AdvancedIncSubtensor on the gpu.
+    AdvancedSubtensor on the GPU.
+    """
+    def make_node(self, x, *inputs):
+        ctx_name = infer_context_name(x)
+        # This method relies on AdvancedSubtensor.make_node to
+        # call tensor.subtensor.check_and_reject_bool(inputs),
+        # which raises an IndexError if there are any boolean indices.
+        rval = tensor.AdvancedSubtensor.make_node(self, x, *inputs)
+        otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
+                             broadcastable=rval.outputs[0].type.broadcastable,
+                             context_name=ctx_name)
+        x = as_gpuarray_variable(x, ctx_name)
+        return gof.Apply(self, [x] + rval.inputs[1:], [otype()])
+class GpuAdvancedBooleanSubtensor(HideC, BaseGpuAdvancedSubtensor, tensor.AdvancedBooleanSubtensor):
    """
-    def make_node(self, x, y, *inputs):
+    AdvancedBooleanSubtensor on the GPU.
-        ctx_name = infer_context_name(x, y)
+    """
-        rval = tensor.AdvancedIncSubtensor.make_node(self, x, y, *inputs)
+    def make_node(self, x, *inputs):
+        ctx_name = infer_context_name(x)
+        rval = tensor.AdvancedBooleanSubtensor.make_node(self, x, *inputs)
        otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
                             broadcastable=rval.outputs[0].type.broadcastable,
                             context_name=ctx_name)
        x = as_gpuarray_variable(x, ctx_name)
-        y = as_gpuarray_variable(y, ctx_name)
+        return gof.Apply(self, [x] + rval.inputs[1:], [otype()])
-        return gof.Apply(self, [x, y] + rval.inputs[2:], [otype()])
+class BaseGpuAdvancedIncSubtensor(object):
    def perform(self, node, inp, out_):
        out, = out_
        x = inp[0]
@@ -629,6 +668,9 @@ class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedIncSubtensor):
            if isinstance(idx[i], gpuarray.GpuArray):
                idx[i] = np.asarray(idx[i])
+        # convert boolean masks to index arrays
+        idx = check_and_convert_boolean_masks(x, idx)
        # Insert axes for None indexing
        nidx = []
        nshp = list(x.shape)
@@ -725,6 +767,38 @@ class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedIncSubtensor):
        out[0] = x_
+class GpuAdvancedIncSubtensor(HideC, BaseGpuAdvancedIncSubtensor, tensor.AdvancedIncSubtensor):
+    """
+    Implement AdvancedIncSubtensor on the gpu.
+    """
+    def make_node(self, x, y, *inputs):
+        ctx_name = infer_context_name(x, y)
+        rval = tensor.AdvancedIncSubtensor.make_node(self, x, y, *inputs)
+        otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
+                             broadcastable=rval.outputs[0].type.broadcastable,
+                             context_name=ctx_name)
+        x = as_gpuarray_variable(x, ctx_name)
+        y = as_gpuarray_variable(y, ctx_name)
+        return gof.Apply(self, [x, y] + rval.inputs[2:], [otype()])
+class GpuAdvancedBooleanIncSubtensor(HideC, BaseGpuAdvancedIncSubtensor, tensor.AdvancedBooleanIncSubtensor):
+    """
+    Implement AdvancedBooleanIncSubtensor on the gpu.
+    """
+    def make_node(self, x, y, *inputs):
+        ctx_name = infer_context_name(x, y)
+        rval = tensor.AdvancedBooleanIncSubtensor.make_node(self, x, y, *inputs)
+        otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
+                             broadcastable=rval.outputs[0].type.broadcastable,
+                             context_name=ctx_name)
+        x = as_gpuarray_variable(x, ctx_name)
+        y = as_gpuarray_variable(y, ctx_name)
+        return gof.Apply(self, [x, y] + rval.inputs[2:], [otype()])
 class GpuAdvancedIncSubtensor1(Op):
    """
    Implement AdvancedIncSubtensor1 on the gpu.

--- a/theano/gpuarray/tests/test_subtensor.py
+++ b/theano/gpuarray/tests/test_subtensor.py
@@ -13,6 +13,7 @@ from ..elemwise import GpuDimShuffle
 from ..subtensor import (GpuIncSubtensor, GpuSubtensor,
                         GpuAdvancedSubtensor1,
                         GpuAdvancedSubtensor,
+                         GpuAdvancedBooleanSubtensor,
                         GpuAdvancedIncSubtensor,
                         GpuAdvancedIncSubtensor1,
                         GpuAdvancedIncSubtensor1_dev20,
@@ -39,6 +40,8 @@ class G_subtensor(test_subtensor.T_subtensor):
            inc_sub=GpuIncSubtensor,
            adv_sub1=GpuAdvancedSubtensor1,
            adv_incsub1=GpuAdvancedIncSubtensor1,
+            adv_sub=GpuAdvancedSubtensor,
+            adv_bool_sub=GpuAdvancedBooleanSubtensor,
            dimshuffle=GpuDimShuffle,
            mode=mode_with_gpu,
            # avoid errors with limited devices
@@ -66,6 +69,8 @@ class G_subtensorF16(test_subtensor.T_subtensor):
            inc_sub=GpuIncSubtensor,
            adv_sub1=GpuAdvancedSubtensor1,
            adv_incsub1=GpuAdvancedIncSubtensor1,
+            adv_sub=GpuAdvancedSubtensor,
+            adv_bool_sub=GpuAdvancedBooleanSubtensor,
            dimshuffle=GpuDimShuffle,
            mode=mode_with_gpu,
            # avoid errors with limited devices

--- a/theano/tensor/subtensor.py
+++ b/theano/tensor/subtensor.py
--- a/theano/tensor/tests/test_subtensor.py
+++ b/theano/tensor/tests/test_subtensor.py
--- a/theano/tensor/var.py
+++ b/theano/tensor/var.py
@@ -460,23 +460,16 @@ class _tensor_py_operators(object):
    # SLICING/INDEXING
    def __getitem__(self, args):
-        def check_bool(args_el):
+        def includes_bool(args_el):
-            try:
            if (isinstance(args_el, (np.bool_, bool)) or
-                        args_el.dtype == 'bool'):
+                    (hasattr(args_el, 'dtype') and args_el.dtype == 'bool')):
-                    raise TypeError('TensorType does not support boolean '
+                return True
-                                    'mask for indexing such as tensor[x==0]. '
-                                    'Instead you can use non_zeros() such as '
-                                    'tensor[(x == 0).nonzeros()]. ')
-            except AttributeError:
-                pass
            if (not isinstance(args_el, theano.tensor.Variable) and
                    isinstance(args_el, collections.Iterable)):
                for el in args_el:
-                    check_bool(el)
+                    if includes_bool(el):
+                        return True
-        check_bool(args)
+            return False
        if (isinstance(args, list) and
                any([isinstance(a, slice) for a in args])):
@@ -484,22 +477,48 @@ class _tensor_py_operators(object):
        elif not isinstance(args, tuple):
            args = args,
+        # Count the dimensions, check for bools and find ellipses.
+        ellipses = []
+        index_dim_count = 0
+        for i, arg in enumerate(args):
+            if arg is np.newaxis:
+                # no increase in index_dim_count
+                pass
+            elif arg is Ellipsis:
+                # no increase in index_dim_count
+                ellipses.append(i)
+            elif (isinstance(arg, (np.ndarray, theano.tensor.Variable)) and
+                    hasattr(arg, 'dtype') and arg.dtype == 'bool'):
+                index_dim_count += arg.ndim
+            else:
+                # Python arrays can contain a mixture of bools and integers,
+                # which requires complex rules to handle all special cases.
+                # These rules differ slightly between NumPy versions.
+                # Since earlier versions of Theano did not support any boolean
+                # indexing, it is safe to throw an error if we encounter
+                # any of these difficult cases.
+                if includes_bool(arg):
+                    raise TypeError('TensorType does not support Python bools '
+                                    'for indexing, such as tensor[[True, False]]. '
+                                    'To use a boolean mask, convert the mask to '
+                                    'a NumPy array first, e.g., '
+                                    'tensor[numpy.array([True, False])].')
+                index_dim_count += 1
+        # Check if the number of dimensions isn't too large.
+        if self.ndim < index_dim_count:
+            raise IndexError('too many indices for array')
        # Convert an Ellipsis if provided into an appropriate number of
        # slice(None).
-        ellipses = [i
-                    for i, index in enumerate(args)
-                    if index is Ellipsis]
        if len(ellipses) > 1:
            raise IndexError(
                "an index can only have a single Ellipsis (`...`)")
        elif len(ellipses) == 1:
-            new_axes = sum(1
-                           for index in args
-                           if index is np.newaxis)  # numpy.newaxis is None
            ellipsis_at = ellipses[0]
            args = list(args)
            args[ellipsis_at: ellipsis_at + 1] = (
-                [slice(None)] * (self.ndim - (len(args) - 1 - new_axes)))
+                [slice(None)] * (self.ndim - index_dim_count))
        # Force input to be int64 datatype if input is an empty list or tuple
        # Else leave it as is if it is a real number
@@ -510,8 +529,10 @@ class _tensor_py_operators(object):
        # Determine if advanced indexing is needed or not
        # The logic is already in Subtensor.convert: if it succeeds,
        # standard indexing is used; if it fails with
-        # AdvancedIndexingError, advanced indexing
+        # AdvancedIndexingError, advanced indexing, or
+        # AdvancedBooleanIndexingError, advanced indexing with boolean masks
        advanced = False
+        advanced_boolean = False
        axis = None
        for i, arg in enumerate(args):
            try:
@@ -524,13 +545,20 @@ class _tensor_py_operators(object):
                else:
                    advanced = True
                    axis = i
+            except theano.tensor.subtensor.AdvancedBooleanIndexingError:
+                advanced = False
+                advanced_boolean = True
+                break
-        if advanced:
+        if advanced_boolean:
+            return theano.tensor.subtensor.advanced_boolean_subtensor(self, *args)
+        elif advanced:
            if (axis is not None and
                all(isinstance(a, slice) and
                    equal_slices(a, slice(None)) for a in args[:axis]) and
                all(isinstance(a, slice) and
                    equal_slices(a, slice(None)) for a in args[axis + 1:]) and
+                (not hasattr(args[axis], 'dtype') or args[axis].dtype != 'bool') and
                isinstance(args[axis],
                           (np.ndarray, list,
                            TensorVariable, TensorConstant,