New op AdvancedBooleanSubtensor.

a9a0d5aa · Gijs van Tulder · 68162534 · a9a0d5aa · a9a0d5aa · a9a0d5aa
--- a/theano/gpuarray/opt.py
+++ b/theano/gpuarray/opt.py
@@ -1075,6 +1075,13 @@ def local_gpua_advanced_subtensor(op, context_name, inputs, outputs):
    return GpuAdvancedSubtensor()


+@register_opt('fast_compile')
+@op_lifter([tensor.AdvancedBooleanSubtensor])
+@register_opt2([tensor.AdvancedBooleanSubtensor], 'fast_compile')
+def local_gpua_advanced_boolean_subtensor(op, context_name, inputs, outputs):
+    return GpuAdvancedSubtensor()
+
+
 @register_opt('fast_compile')
 @op_lifter([tensor.AdvancedIncSubtensor1])
 @register_opt2([tensor.AdvancedIncSubtensor1], 'fast_compile')
@@ -1118,6 +1125,20 @@ def local_gpua_advanced_incsubtensor(op, context_name, inputs, outputs):
        return False


+# Do not register this optimization for now, as it slows down the
+# execution by a lot in important cases.
+# @register_opt('fast_compile')
+# @op_lifter([tensor.AdvancedBooleanIncSubtensor])
+# @register_opt2([tensor.AdvancedBooleanIncSubtensor], 'fast_compile')
+def local_gpua_advanced_boolean_incsubtensor(op, context_name, inputs, outputs):
+    # GpuAdvancedIncSubtensor only works with a single boolean mask,
+    # but not with fancy combinations.
+    if not op.set_instead_of_inc and len(inputs) == 3:
+        return GpuAdvancedIncSubtensor()
+    else:
+        return False
+
+
 @register_inplace()
 @local_optimizer([GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1_dev20])
 def local_advincsub1_gpua_inplace(node):

--- a/theano/gpuarray/subtensor.py
+++ b/theano/gpuarray/subtensor.py
@@ -512,13 +512,13 @@ def check_and_convert_boolean_masks(input, idx_list):
    return out_idx_list


-class GpuAdvancedSubtensor(HideC, tensor.AdvancedSubtensor):
+class GpuAdvancedSubtensor(HideC, tensor.AdvancedBooleanSubtensor):
    """
-    AdvancedSubtensor On the GPU.
+    AdvancedBooleanSubtensor On the GPU.
    """
    def make_node(self, x, *inputs):
        ctx_name = infer_context_name(x)
-        rval = tensor.AdvancedSubtensor.make_node(self, x, *inputs)
+        rval = tensor.AdvancedBooleanSubtensor.make_node(self, x, *inputs)
        otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
                             broadcastable=rval.outputs[0].type.broadcastable,
                             context_name=ctx_name)
@@ -634,14 +634,14 @@ class GpuAdvancedSubtensor(HideC, tensor.AdvancedSubtensor):
        out[0] = o


-class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedIncSubtensor):
+class GpuAdvancedIncSubtensor(HideC, tensor.AdvancedBooleanIncSubtensor):
    """
-    Implement AdvancedIncSubtensor on the gpu.
+    Implement AdvancedBooleanIncSubtensor on the gpu.

    """
    def make_node(self, x, y, *inputs):
        ctx_name = infer_context_name(x, y)
-        rval = tensor.AdvancedIncSubtensor.make_node(self, x, y, *inputs)
+        rval = tensor.AdvancedBooleanIncSubtensor.make_node(self, x, y, *inputs)
        otype = GpuArrayType(dtype=rval.outputs[0].type.dtype,
                             broadcastable=rval.outputs[0].type.broadcastable,
                             context_name=ctx_name)

--- a/theano/gpuarray/tests/test_subtensor.py
+++ b/theano/gpuarray/tests/test_subtensor.py
@@ -39,6 +39,8 @@ class G_subtensor(test_subtensor.T_subtensor):
            inc_sub=GpuIncSubtensor,
            adv_sub1=GpuAdvancedSubtensor1,
            adv_incsub1=GpuAdvancedIncSubtensor1,
+            adv_sub=GpuAdvancedSubtensor,
+            adv_bool_sub=GpuAdvancedSubtensor,
            dimshuffle=GpuDimShuffle,
            mode=mode_with_gpu,
            # avoid errors with limited devices
@@ -66,6 +68,8 @@ class G_subtensorF16(test_subtensor.T_subtensor):
            inc_sub=GpuIncSubtensor,
            adv_sub1=GpuAdvancedSubtensor1,
            adv_incsub1=GpuAdvancedIncSubtensor1,
+            adv_sub=GpuAdvancedSubtensor,
+            adv_bool_sub=GpuAdvancedSubtensor,
            dimshuffle=GpuDimShuffle,
            mode=mode_with_gpu,
            # avoid errors with limited devices

--- a/theano/tensor/subtensor.py
+++ b/theano/tensor/subtensor.py
 from __future__ import absolute_import, print_function, division
 import sys
 from textwrap import dedent
+import collections
 import warnings
 import logging

@@ -40,6 +41,16 @@ class AdvancedIndexingError(TypeError):
        TypeError.__init__(self, *args)


+class AdvancedBooleanIndexingError(TypeError):
+    """
+    Raised when Subtensor is asked to perform advanced indexing with boolean masks.
+
+    """
+
+    def __init__(self, *args):
+        TypeError.__init__(self, *args)
+
+
 ##########
 # Helpful functions to deal with Subtensor and IncSubtensor
 ##########
@@ -339,7 +350,7 @@ class Subtensor(Op):

        if (isinstance(entry, (np.ndarray, theano.tensor.Variable)) and
                hasattr(entry, 'dtype') and entry.dtype == 'bool'):
-            raise AdvancedIndexingError(Subtensor.e_indextype, entry)
+            raise AdvancedBooleanIndexingError(Subtensor.e_indextype, entry)

        if (isinstance(entry, gof.Variable) and
            (entry.type in invalid_scal_types or
@@ -1105,6 +1116,13 @@ def inc_subtensor(x, y, inplace=False, set_instead_of_inc=False,
        the_op = AdvancedIncSubtensor(inplace,
                                      set_instead_of_inc=set_instead_of_inc)
        return the_op(real_x, y, *ilist)
+    elif isinstance(x.owner.op, AdvancedBooleanSubtensor):
+        real_x = x.owner.inputs[0]
+        ilist = x.owner.inputs[1:]
+
+        the_op = AdvancedBooleanIncSubtensor(inplace,
+                                             set_instead_of_inc=set_instead_of_inc)
+        return the_op(real_x, y, *ilist)
    elif isinstance(x.owner.op, DimShuffle):
        inner_x = x.owner.inputs[0]
        # In the dimshuffle case, there are in fact two dimshuffles:
@@ -2122,14 +2140,32 @@ def check_advanced_indexing_dimensions(input, idx_list):
            dim_seen += 1


-class AdvancedSubtensor(Op):
+def check_and_reject_bool(args_el):
+    try:
+        if (isinstance(args_el, (np.bool_, bool)) or
+                args_el.dtype == 'bool'):
+            raise TypeError('AdvancedSubtensor does not support boolean '
+                            'masks for indexing. Use AdvancedBooleanSubtensor '
+                            'instead. ')
+    except AttributeError:
+        pass
+
+    if (not isinstance(args_el, theano.tensor.Variable) and
+            isinstance(args_el, collections.Iterable)):
+        for el in args_el:
+            check_and_reject_bool(el)
+
+
+class BaseAdvancedSubtensor(Op):
    """
-    Return a subtensor copy, using advanced indexing.
+    Abstract base class for AdvancedSubtensor and AdvancedBooleanSubtensor.
+    Implements advanced indexing with boolean masks.

    """

-    # Should be used by __getitem__ and __getslice__, as follow:
-    # AdvancedSubtensor()(self, *args),
+    # Should be used by __getitem__ and __getslice__, as follows:
+    # AdvancedSubtensor()(self, *args) or
+    # AdvancedBooleanSubtensor()(self, *args),
    # if args contains and advanced indexing pattern
    __props__ = ()

@@ -2149,19 +2185,6 @@ class AdvancedSubtensor(Op):
        return self.make_node(eval_points[0], *inputs[1:]).outputs

    def infer_shape(self, node, ishapes):
-        # Really special case
-        if len(ishapes) == 3:
-            xshp, ind1shp, ind2shp = ishapes
-            if (len(xshp) == 2 and
-                    ind1shp is not None and len(ind1shp) == 1 and
-                    ind2shp is not None and len(ind2shp) == 1):
-                # if the graph is correct, we can assume ind1shp[0] and
-                # ind2shp[0] will have the same value.
-                # Try to return the one closest to the graph input.
-                if node.inputs[2].owner is None:
-                    return [ind2shp]
-                else:
-                    return [ind1shp]
        # Default case, we don't know
        raise theano.tensor.basic.ShapeError("case not implemented")

@@ -2192,11 +2215,71 @@ class AdvancedSubtensor(Op):
        return [advanced_inc_subtensor(theano.tensor.zeros_like(x), gz,
                                       *rest)] + \
            [DisconnectedType()()] * len(rest)
+
+
+class AdvancedSubtensor(BaseAdvancedSubtensor):
+    """
+    Return a subtensor copy, using advanced indexing.
+
+    """
+
+    # Should be used by __getitem__ and __getslice__, as follows:
+    # AdvancedSubtensor()(self, *args),
+    # if args contains and advanced indexing pattern
+
+    def make_node(self, x, *index):
+        check_and_reject_bool(index)
+        return super(AdvancedSubtensor, self).make_node(x, *index)
+
+    def infer_shape(self, node, ishapes):
+        # Really special case
+        if len(ishapes) == 3:
+            xshp, ind1shp, ind2shp = ishapes
+            if (len(xshp) == 2 and
+                    ind1shp is not None and len(ind1shp) == 1 and
+                    ind2shp is not None and len(ind2shp) == 1):
+                # if the graph is correct, we can assume ind1shp[0] and
+                # ind2shp[0] will have the same value.
+                # Try to return the one closest to the graph input.
+                if node.inputs[2].owner is None:
+                    return [ind2shp]
+                else:
+                    return [ind1shp]
+        return super(AdvancedSubtensor, self).infer_shape(node, ishapes)
+
+    def grad(self, inputs, grads):
+        gz, = grads
+        x = inputs[0]
+        rest = inputs[1:]
+        return [advanced_inc_subtensor(theano.tensor.zeros_like(x), gz,
+                                       *rest)] + \
+            [DisconnectedType()()] * len(rest)
 advanced_subtensor = AdvancedSubtensor()


-class AdvancedIncSubtensor(Op):
+class AdvancedBooleanSubtensor(BaseAdvancedSubtensor):
+    """
+    Return a subtensor copy, using advanced indexing with boolean masks.
+
+    """
+
+    # Should be used by __getitem__ and __getslice__, as follows:
+    # AdvancedBooleanSubtensor()(self, *args),
+    # if args contains and advanced indexing pattern with boolean masks
+
+    def grad(self, inputs, grads):
+        gz, = grads
+        x = inputs[0]
+        rest = inputs[1:]
+        return [advanced_boolean_inc_subtensor(theano.tensor.zeros_like(x), gz,
+                                               *rest)] + \
+            [DisconnectedType()()] * len(rest)
+advanced_boolean_subtensor = AdvancedBooleanSubtensor()
+
+
+class BaseAdvancedIncSubtensor(Op):
    """
+    Base class for AdvancedIncSubtensor and AdvancedBooleanIncSubtensor.
    Increments a subtensor using advanced indexing.
    """

@@ -2262,6 +2345,22 @@ class AdvancedIncSubtensor(Op):

        return rval

+    def R_op(self, inputs, eval_points):
+        if None in eval_points[:2]:
+            return [None]
+        return self.make_node(eval_points[0], eval_points[1],
+                              *inputs[2:]).outputs
+
+
+class AdvancedIncSubtensor(BaseAdvancedIncSubtensor):
+    """
+    Increments a subtensor using advanced indexing.
+    """
+
+    def make_node(self, x, y, *inputs):
+        check_and_reject_bool(inputs)
+        return super(AdvancedIncSubtensor, self).make_node(x, y, *inputs)
+
    def grad(self, inpt, output_gradients):
        x, y = inpt[:2]
        idxs = inpt[2:]
@@ -2289,16 +2388,46 @@ class AdvancedIncSubtensor(Op):
            gy = _sum_grad_over_bcasted_dims(y, gy)
        return [gx, gy] + \
            [DisconnectedType()() for _ in idxs]
-
-    def R_op(self, inputs, eval_points):
-        if None in eval_points[:2]:
-            return [None]
-        return self.make_node(eval_points[0], eval_points[1],
-                              *inputs[2:]).outputs
 advanced_inc_subtensor = AdvancedIncSubtensor()
 advanced_set_subtensor = AdvancedIncSubtensor(set_instead_of_inc=True)


+class AdvancedBooleanIncSubtensor(BaseAdvancedIncSubtensor):
+    """
+    Increments a subtensor using advanced indexing with boolean masks.
+    """
+
+    def grad(self, inpt, output_gradients):
+        x, y = inpt[:2]
+        idxs = inpt[2:]
+        outgrad, = output_gradients
+        if x.dtype in theano.tensor.discrete_dtypes:
+            # The output dtype is the same as x
+            gx = x.zeros_like(dtype=theano.config.floatX)
+            if y.dtype in theano.tensor.discrete_dtypes:
+                gy = y.zeros_like(dtype=theano.config.floatX)
+            else:
+                gy = y.zeros_like()
+        elif x.dtype in theano.tensor.complex_dtypes:
+            raise NotImplementedError("No support for complex grad yet")
+        else:
+            if self.set_instead_of_inc:
+                gx = advanced_set_subtensor(
+                    outgrad,
+                    y.zeros_like(),
+                    *idxs)
+            else:
+                gx = outgrad
+            gy = advanced_boolean_subtensor(outgrad, *idxs)
+            # Make sure to sum gy over the dimensions of y that have been
+            # added or broadcasted
+            gy = _sum_grad_over_bcasted_dims(y, gy)
+        return [gx, gy] + \
+            [DisconnectedType()() for _ in idxs]
+advanced_boolean_inc_subtensor = AdvancedBooleanIncSubtensor()
+advanced_boolean_set_subtensor = AdvancedBooleanIncSubtensor(set_instead_of_inc=True)
+
+
 def take(a, indices, axis=None, mode='raise'):
    a = theano.tensor.as_tensor_variable(a)
    indices = theano.tensor.as_tensor_variable(indices)

--- a/theano/tensor/tests/test_subtensor.py
+++ b/theano/tensor/tests/test_subtensor.py
@@ -55,6 +55,8 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
                 adv_sub1=tensor.AdvancedSubtensor1,
                 adv_incsub1=tensor.AdvancedIncSubtensor1,
                 adv_sub=tensor.AdvancedSubtensor,
+                 adv_bool_sub=tensor.AdvancedBooleanSubtensor,
+                 adv_bool_inc_sub=tensor.AdvancedBooleanIncSubtensor,
                 mode=None,
                 dtype=theano.config.floatX,
                 type=tensor.TensorType,
@@ -66,6 +68,8 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
        self.adv_sub1 = adv_sub1
        self.adv_incsub1 = adv_incsub1
        self.adv_sub = adv_sub
+        self.adv_bool_sub = adv_bool_sub
+        self.adv_bool_inc_sub = adv_bool_inc_sub
        self.dimshuffle = dimshuffle
        if mode is None:
            mode = theano.compile.mode.get_default_mode()
@@ -75,7 +79,8 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
        self.type = type
        self.ignore_topo = ignore_topo
        self.fast_compile = theano.config.mode == 'FAST_COMPILE'
-        self.ops = (sub, inc_sub, adv_sub1, adv_incsub1)
+        self.ops = (sub, inc_sub, adv_sub1, adv_incsub1,
+                    adv_bool_sub, adv_bool_inc_sub)
        return super(T_subtensor, self).__init__(name)

    def function(self, inputs, outputs, accept_inplace=False,
@@ -361,19 +366,25 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
        numpy_n = np.arange(6, dtype=self.dtype).reshape((2, 3))
        n = self.shared(numpy_n)

-        # indexing with a comparison (should translate to a boolean mask)
-        assert_array_equal(numpy_n[numpy_n > 2], n[n > 2].eval())
-        assert_array_equal(numpy_n[[0], numpy_n[0] > 2], n[[0], n[0] > 2].eval())
-        assert_array_equal(numpy_n[[1], numpy_n[0] > 2], n[[1], n[0] > 2].eval())
-
        # indexing with a mask for some dimensions
        mask = np.array([True, False])
-        assert_array_equal(numpy_n[mask], n[mask].eval())
-        assert_array_equal(numpy_inc_subtensor(numpy_n, mask, 1),
-                           inc_subtensor(n[mask], 1).eval())
+        val = self.eval_output_and_check(n[mask], op_type=self.adv_bool_sub)
+        assert_array_equal(numpy_n[mask], val)
+        val = self.eval_output_and_check(inc_subtensor(n[mask], 1),
+                                         op_type=self.adv_bool_inc_sub)
+        assert_array_equal(numpy_inc_subtensor(numpy_n, mask, 1), val)
        assert_array_equal(numpy_inc_subtensor(numpy_n, mask, numpy_n[mask]),
                           inc_subtensor(n[mask], n[mask]).eval())

+        # test gradient
+        utt.verify_grad(lambda m: m[mask], [numpy_n])
+        utt.verify_grad(lambda m: inc_subtensor(m[mask], 1), [numpy_n])
+
+        # indexing with a comparison (should translate to a boolean mask)
+        assert_array_equal(numpy_n[numpy_n > 2], n[n > 2].eval())
+        assert_array_equal(numpy_n[[0], numpy_n[0] > 2], n[[0], n[0] > 2].eval())
+        assert_array_equal(numpy_n[[1], numpy_n[0] > 2], n[[1], n[0] > 2].eval())
+
        # indexing with a mask for the second dimension
        mask = np.array([True, False, True])
        assert_array_equal(numpy_n[0, mask], n[0, mask].eval())
@@ -383,6 +394,8 @@ class T_subtensor(unittest.TestCase, utt.TestOptimizationMixin):
        assert_array_equal(numpy_n[:1, mask], n[:1, mask].eval())
        assert_array_equal(numpy_n[1:, mask, np.newaxis], n[1:, mask, np.newaxis].eval())
        assert_array_equal(numpy_n[np.newaxis, 1:, mask], n[np.newaxis, 1:, mask].eval())
+        assert_array_equal(numpy_inc_subtensor(numpy_n, [0, mask], 1),
+                           inc_subtensor(n[(0,) + mask.nonzero()], 1).eval())
        assert_array_equal(numpy_inc_subtensor(numpy_n, [0, mask], 1),
                           inc_subtensor(n[0, mask], 1).eval())
        assert_array_equal(numpy_inc_subtensor(numpy_n, [slice(None), mask], 1),

--- a/theano/tensor/var.py
+++ b/theano/tensor/var.py
@@ -529,8 +529,10 @@ class _tensor_py_operators(object):
        # Determine if advanced indexing is needed or not
        # The logic is already in Subtensor.convert: if it succeeds,
        # standard indexing is used; if it fails with
-        # AdvancedIndexingError, advanced indexing
+        # AdvancedIndexingError, advanced indexing, or
+        # AdvancedBooleanIndexingError, advanced indexing with boolean masks
        advanced = False
+        advanced_boolean = False
        axis = None
        for i, arg in enumerate(args):
            try:
@@ -543,8 +545,14 @@ class _tensor_py_operators(object):
                else:
                    advanced = True
                    axis = i
-
-        if advanced:
+            except theano.tensor.subtensor.AdvancedBooleanIndexingError:
+                advanced = False
+                advanced_boolean = True
+                break
+
+        if advanced_boolean:
+            return theano.tensor.subtensor.advanced_boolean_subtensor(self, *args)
+        elif advanced:
            if (axis is not None and
                all(isinstance(a, slice) and
                    equal_slices(a, slice(None)) for a in args[:axis]) and