Merge pull request #992 from nouiz/grad_switch

Grad switch

Merge pull request #992 from nouiz/grad_switch
c8216133 · goodfeli · 15725e30 · b6584d8d · c8216133 · c8216133
--- a/EMAIL.txt
+++ b/EMAIL.txt
@@ -25,7 +25,7 @@ This is a release candidate for a major version, with lots of new
 features, bug fixes, and some interface changes (deprecated or
 potentially misleading features were removed).

-The upgrade is recommended for developpers who want to help test and
+The upgrade is recommended for developers who want to help test and
 report bugs, or want to use new features now.  If you have updated
 to 0.5rc1, you are highly encouraged to update to 0.5rc2.

@@ -106,18 +106,14 @@ http://deeplearning.net/tutorial/
 Acknowledgments
 ---------------

+
+
 I would like to thank all contributors of Theano. For this particular
 release, many people have helped, notably (in alphabetical order):
-Hani Almousli, Frédéric Bastien, Justin Bayer, Arnaud Bergeron, James
-Bergstra, Valentin Bisson, Josh Bleecher Snyder, Yann Dauphin, Olivier
-Delalleau, Guillaume Desjardins, Sander Dieleman, Xavier Glorot, Ian
-Goodfellow, Philippe Hamel, Pascal Lamblin, Eric Laufer, Grégoire
-Mesnil, Razvan Pascanu, Matthew Rocklin, Graham Taylor, Sebastian Urban,
-David Warde-Farley, and Yao Li.
+[Generate the list of commiters: git shortlog -s <previous_tag>...| cut -c8-]

 I would also like to thank users who submitted bug reports, notably:
-Nicolas Boulanger-Lewandowski, Olivier Chapelle, Michael Forbes, Timothy
-Lillicrap, and John Salvatier.
+[TODO]

 Also, thank you to all NumPy and Scipy developers as Theano builds on
 their strengths.

--- a/doc/library/tensor/basic.txt
+++ b/doc/library/tensor/basic.txt
@@ -986,6 +986,11 @@ Condition
      x,y = T.dmatrices('x','y')
      z = T.switch(T.lt(a,b), x, y)

+.. function:: where(cond, ift, iff)
+
+   Alias for `switch`. where is the numpy name.
+
+
 .. function:: clip(x, min, max)

    Return a variable representing x, but with all elements greater than

--- a/theano/scalar/basic.py
+++ b/theano/scalar/basic.py
@@ -1051,7 +1051,12 @@ class Switch(ScalarOp):
        else:
            second_part = None

-        return (None, first_part, second_part)
+        # cond does affect the elements of the output so it is connected.
+        # For the sake of making the gradient convenient we assume that
+        # condition + epsilon always triggers the same branch as condition
+        condition_grad = cond.zeros_like().astype(theano.config.floatX)
+
+        return (condition_grad, first_part, second_part)

    def output_types(self, (cond_t, ift_t, iff_t)):
        return upcast_out(ift_t, iff_t)

--- a/theano/sparse/tests/test_basic.py
+++ b/theano/sparse/tests/test_basic.py
@@ -1527,7 +1527,7 @@ class SpSumTester(utt.InferShapeTester):
        for format in sparse.sparse_formats:
            for axis in self.possible_axis:
                variable, data = sparse_random_inputs(format,
-                                                      shape=(10, 10))
+                                                      shape=(9, 10))
                self._compile_and_check(variable,
                                        [self.op(variable[0], axis=axis)],
                                        data,
@@ -1538,7 +1538,7 @@ class SpSumTester(utt.InferShapeTester):
            for axis in self.possible_axis:
                for struct in [True, False]:
                    variable, data = sparse_random_inputs(format,
-                                                          shape=(10, 10))
+                                                          shape=(9, 10))
                    verify_grad_sparse(
                        self.op_class(axis=axis, sparse_grad=struct),
                        data,
@@ -1744,7 +1744,7 @@ class Remove0Tester(utt.InferShapeTester):
            assert result.size == target.size, msg

    def test_infer_shape(self):
-        mat = (numpy.arange(9) + 1).reshape((3, 3))
+        mat = (numpy.arange(12) + 1).reshape((4, 3))
        mat[0, 1] = mat[1, 0] = mat[2, 2] = 0

        x_csc = theano.sparse.csc_matrix(dtype=theano.config.floatX)

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -2605,11 +2605,11 @@ def isinf(a):
 # Condition
 ##########################

-@_scal_elemwise
+@_scal_elemwise_with_nfunc('where', 3, 1)
 def switch(cond, ift, iff):
    """if cond then ift else iff"""

-
+where = switch
 ##########################
 # Bit-wise
 ##########################

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -39,7 +39,7 @@ from theano.tensor import (_shared, wvector, bvector, autocast_float_as,
        tile, patternbroadcast, Eye, Shape, Default, Dot, PermuteRowElements,
        ScalarFromTensor, TensorFromScalar, dtensor4, Rebroadcast, Alloc,
        dtensor3, SpecifyShape, Mean, IncSubtensor, AdvancedIncSubtensor1,
-        itensor3, Tile, AdvancedIncSubtensor)
+        itensor3, Tile, AdvancedIncSubtensor, switch)
 from theano.tests import unittest_tools as utt
 from theano.printing import debugprint

@@ -618,6 +618,36 @@ SubInplaceTester = makeBroadcastTester(op=inplace.sub_inplace,
                                         grad = _grad_broadcast_binary_normal,
                                         inplace = True)

+
+SwitchTester = makeBroadcastTester(
+    op=switch,
+    expected=numpy.where,
+    good=dict(all_true=(numpy.asarray(1, dtype=config.floatX),
+                        rand(4, 5), rand(4, 5)),
+              false_true=(numpy.asarray(0, dtype=config.floatX),
+                          rand(4, 5), rand(4, 5)),
+              mixed=(randint_ranged(0, 1, (4, 5)),
+                     rand(4, 5), rand(4, 5))
+          ),
+    bad_build=dict(all_true=(numpy.asarray(1, dtype=config.floatX),
+                             rand(4, 5))),
+    bad_runtime=dict(all_true=(numpy.asarray(1, dtype=config.floatX),
+                               rand(3, 5), rand(4, 5)),
+                     false_true=(numpy.asarray(0, dtype=config.floatX),
+                                 rand(4, 6), rand(4, 5)),
+                 ),
+    # We suppose that cond+eps do not switch branch in switch.grad()
+    # So we can't call verify_grad with cond 0.
+    grad=dict(all_true=(numpy.asarray(1, dtype=config.floatX),
+                        rand(4, 5), rand(4, 5)),
+#              false_true=(numpy.asarray(0, dtype=config.floatX),
+#                          rand(4, 5), rand(4, 5)),
+#              mixed=(randint_ranged(0, 1, (4, 5)).astype(config.floatX),
+#                     rand(4, 5), rand(4, 5))
+          ),
+)
+
+
 MaximumTester = makeBroadcastTester(op=maximum,
                                  expected = lambda *inputs: check_floatX(inputs, numpy.maximum(*inputs)),
                                  good = _good_broadcast_binary_normal,

--- a/theano/tensor/tests/test_extra_ops.py
+++ b/theano/tensor/tests/test_extra_ops.py
@@ -242,7 +242,8 @@ class TestRepeatOp(utt.InferShapeTester):
    def test_infer_shape(self):
        for ndim in range(4):
            x = T.TensorType(config.floatX, [False] * ndim)()
-            a = np.random.random((10, ) * ndim).astype(config.floatX)
+            shp = (numpy.arange(ndim) + 1) * 5
+            a = np.random.random(shp).astype(config.floatX)

            for axis in self._possible_axis(ndim):
                for dtype in tensor.discrete_dtypes:
@@ -261,6 +262,9 @@ class TestRepeatOp(utt.InferShapeTester):
                        if axis is None:
                            r = np.random.random_integers(
                                    5, size=a.size).astype(dtype)
+                        elif a.size > 0:
+                            r = np.random.random_integers(
+                                    5, size=a.shape[axis]).astype(dtype)
                        else:
                            r = np.random.random_integers(
                                    5, size=(10,)).astype(dtype)