merge.

6287e9f6 · Pascal Lamblin · 56d03140 · a5eb5b3c · 6287e9f6 · 6287e9f6
--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -3209,8 +3209,8 @@ class AdvancedIncSubtensor(Op):
                            broadcastable = x.type.broadcastable)])
            raise NotImplementedError('Advanced indexing increment of x (of dimension %i) by y (of dimension %i) with these argument dimensions (%s) not supported yet'\
                    % (x.ndim, y.ndim, ','.join(str(input.ndim) for input in inputs)))
-        raise NotImplementedError('Advanced indexing increment of x by y with arguments (%s) not supported yet'\
+        raise NotImplementedError('Advanced indexing increment of x (of dim %i) by y (of dim %i) with arguments (%s) not supported yet'\
-                % ','.join(str(input) for input in inputs))
+                % (x.ndim, y.ndim, ','.join(str(input) for input in inputs)))
    def perform(self, node, inputs, (out,)):
        # TODO: same thing as in AdvancedSubtensor's perform TODO
@@ -3590,7 +3590,7 @@ class numeric_grad:
 def verify_grad(op, pt, n_tests=2, rng=None, eps=None, tol=None, mode=None, cast_to_output_type=False):
    """ WRITEME
    Raises an Exception if the difference between the analytic gradient and
    numerical gradient (computed through the Finite Difference Method) exceeds
    the given tolerance.
@@ -3607,7 +3607,7 @@ def verify_grad(op, pt, n_tests=2, rng=None, eps=None, tol=None, mode=None, cast
           try to make it a SMALL graph.  Often verify grad is run in
           debug mode, which can be very slow if it has to verify a lot
           of intermediate computations.
    """
    pt = [numpy.array(p) for p in pt]
@@ -3619,9 +3619,8 @@ def verify_grad(op, pt, n_tests=2, rng=None, eps=None, tol=None, mode=None, cast
        tol = __builtin__.max(_type_tol[str(p.dtype)] for p in pt)
    if rng is None:
-        rng = numpy.random
+        raise TypeError('rng should be a valid instance of numpy.random.RandomState.',
-        from theano import tests as theano_tests # TODO This is an ugly import. Fix?
+                'You may want to use theano.tests.unittest_tools.verify_grad instead of theano.tensor.verify_grad.')
-        theano_tests.unittest_tools.seed_rng()
    def function(inputs, output):
        if mode is None:
@@ -3633,9 +3632,9 @@ def verify_grad(op, pt, n_tests=2, rng=None, eps=None, tol=None, mode=None, cast
    for test_num in xrange(n_tests):
        tensor_pt = [value(p.copy(), name='input %i'%i) for i,p in enumerate(pt)]
        #op can be either a function or an actual Op instance
-        o_output = op(*tensor_pt) 
+        o_output = op(*tensor_pt)
        if isinstance(o_output,list) > 1:
            raise NotImplementedError('cant (yet) autotest gradient of op with multiple outputs')

--- a/theano/tensor/nnet/nnet.py
+++ b/theano/tensor/nnet/nnet.py
@@ -708,9 +708,14 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
        # advanced indexing is not working yet. When it works, do it to avoid
        # potentially misleading behavior in gradient computations! (although
        # typically we should not need the gradient w.r.t. dy).
-        # y_idx_range = tensor.arange(y_idx.shape[0])
+        y_idx_range = tensor.arange(y_idx.shape[0])
-        # return [g_dx * tensor.AdvancedIncSubtensor((y_idx_range, y_idx))(sm, -1, y_idx_range, y_idx), dy.dimshuffle(0, 'x') * g_dx, None]
+        g_dy = tensor.sum(
-        return [None, dy.dimshuffle(0, 'x') * g_dx, None]
+                g_dx * tensor.AdvancedIncSubtensor((y_idx_range, y_idx))(
+                    sm, tensor.fill(dy, -1), y_idx_range, y_idx),
+                axis=1)
+        g_sm = dy.dimshuffle(0, 'x') * g_dx
+        g_y_idx = None
+        return [g_dy, g_sm, g_y_idx]
    def c_code_cache_version(self):
        return (2,)
    def c_code(self, node, name, (dnll, sm, y_idx), (dx,), sub):

--- a/theano/tensor/nnet/tests/test_nnet.py
+++ b/theano/tensor/nnet/tests/test_nnet.py
@@ -95,6 +95,16 @@ class T_CrossentropySoftmax1HotWithBiasDx(unittest.TestCase):
        softmax_output = numpy.random.rand(10, 5)
        softmax_output /= softmax_output.sum(axis=1).reshape(10, 1)
        utt.verify_grad(f, [softmax_output])
+    def test1(self):
+        rng = numpy.random.RandomState(utt.fetch_seed())
+        softmax_output = rng.rand(10, 5)
+        softmax_output /= softmax_output.sum(axis=1).reshape(10, 1)
+        def f(dy):
+            return (theano.tensor.nnet.crossentropy_softmax_1hot_with_bias_dx(
+                dy,
+                softmax_output,
+                rng.randint(low=0, high=5, size=10)))
+        utt.verify_grad(f, [rng.rand(10)])
 class T_CrossentropySoftmaxArgmax1HotWithBias(unittest.TestCase):
    def setUp(self):