merge

6b16e20c · James Bergstra · f9dd5a84 · a9792022 · 6b16e20c · 6b16e20c
--- a/theano/misc/latence_gpu_transfert.py
+++ b/theano/misc/latence_gpu_transfert.py
+import time
+
+import numpy
+
+import theano
+
+y = theano.tensor.fvector()
+x = theano.shared(numpy.zeros(1,dtype='float32'))
+f1 = theano.function([y],updates={x:y})
+f2 = theano.function([],theano.sandbox.cuda.host_from_gpu(x))
+print f1.maker.env.toposort()
+print f2.maker.env.toposort()
+for i in [1,10,100,1000, 10000, 100000,1000000, 10000000]:
+    o = numpy.zeros(i, dtype='float32')
+    t0=time.time();f1(o);t1=time.time();
+    tf1=t1-t0
+    t0=time.time();f2();t1=time.time();
+
+    print  "%8i %6.1f ns %7.1f ns"%(i, tf1*1e6,(t1-t0)*1e6)
--- a/theano/sandbox/rng_mrg.py
+++ b/theano/sandbox/rng_mrg.py
@@ -815,11 +815,11 @@ class MRG_RandomStreams(object):
        else:
            final_samples = normal_samples[:prod(size)]

-        final_samples = avg + std * final_samples
-
        if size:
            final_samples = final_samples.reshape(size)

+        final_samples = avg + std * final_samples
+
        return final_samples

 @local_optimizer([None])

--- a/theano/sandbox/test_rng_mrg.py
+++ b/theano/sandbox/test_rng_mrg.py
@@ -294,21 +294,29 @@ def basictest(f, steps, sample_size, prefix="", allow_01=False, inputs=[],
        ival = numpy.asarray(ival)
        if i == 0:
            mean = numpy.array(ival, copy=True)
-            avg_std = numpy.std(ival)
+            #avg_std = numpy.std(ival)
+            avg_std = numpy.sqrt(numpy.mean((ival - target_avg)**2))
            min_ = ival.min()
            max_ = ival.max()
        else:
            alpha = 1.0 / (1+i)
            mean = alpha * ival + (1-alpha)*mean
-            avg_std = alpha * numpy.std(ival) + (1-alpha)*avg_std
+            #avg_std = alpha * numpy.std(ival) + (1-alpha)*avg_std
+            avg_std = alpha * numpy.sqrt(numpy.mean((ival - target_avg)**2)) + (1-alpha)*avg_std
            min_ = min(min_,ival.min())
            max_ = max(max_,ival.max())
        if not allow_01:
            assert min_ > 0
            assert max_ < 1

-    print prefix, 'mean', numpy.mean(mean)
-    assert abs(numpy.mean(mean) - target_avg) < mean_rtol, 'bad mean? %f %f'%(numpy.mean(mean), target_avg)
+    if hasattr(target_avg, 'shape'): # looks if target_avg is an array
+        diff = numpy.mean(abs(mean - target_avg))
+        print prefix, 'mean diff with mean', diff
+        assert diff < mean_rtol, 'bad mean? %f %f' % (mean, target_avg)
+    else: # if target_avg is a scalar, then we can do the mean of `mean` to get something more precise
+        mean = numpy.mean(mean)
+        print prefix, 'mean', mean
+        assert abs(mean - target_avg) < mean_rtol, 'bad mean? %f %f'%(numpy.mean(mean), target_avg)
    print prefix, 'std', avg_std
    if target_std is not None:
        assert abs(avg_std - target_std) < .01, 'bad std? %f %f'%(avg_std, target_std)
@@ -450,30 +458,32 @@ def test_binomial():
 def test_normal0():

    steps = 50
+    std = 2.
    if mode in ['DEBUG_MODE','DebugMode','FAST_COMPILE']:
        sample_size = (25,30)
-        rtol=.02
+        default_rtol=.02
    else:
        sample_size = (999,50)
-        rtol=.01
+        default_rtol=.01
    sample_size_odd = (sample_size[0],sample_size[1]-1)
    x = tensor.matrix()
-    for size, const_size, var_input, input in [
-            (sample_size, sample_size, [], []),
-            (x.shape, sample_size, [x], [numpy.zeros(sample_size, dtype=config.floatX)]),
-            (sample_size_odd, sample_size_odd, [], []),#test odd value
-            (x.shape, sample_size_odd, [x], [numpy.zeros(sample_size_odd, dtype=config.floatX)]),#test odd value
+    for size, const_size, var_input, input, avg, rtol in [
+            (sample_size, sample_size, [], [], -5., default_rtol),
+            (x.shape, sample_size, [x], [numpy.zeros(sample_size, dtype=config.floatX)], -5., default_rtol),
+            (sample_size_odd, sample_size_odd, [], [], -5., default_rtol),#test odd value
+            (x.shape, sample_size_odd, [x], [numpy.zeros(sample_size_odd, dtype=config.floatX)], -5., default_rtol),#test odd value
+            (sample_size, sample_size, [], [], numpy.arange(numpy.prod(sample_size), dtype='float32').reshape(sample_size), 10.*std/numpy.sqrt(steps)),
            ]:
        print ''
        print 'ON CPU:'

        R = MRG_RandomStreams(234, use_cuda=False)
-        n = R.normal(size=size, avg=-5.0, std=2.0)
+        n = R.normal(size=size, avg=avg, std=std)
        f = theano.function(var_input, n, mode=mode)
        theano.printing.debugprint(f)
        out  = f(*input)
        print 'random?[:10]\n', out[0,0:10]
-        basictest(f, steps, const_size, target_avg=-5.0, target_std=2.0, prefix='mrg ', allow_01=True, inputs=input, mean_rtol=rtol)
+        basictest(f, steps, const_size, target_avg=avg, target_std=std, prefix='mrg ', allow_01=True, inputs=input, mean_rtol=rtol)

        sys.stdout.flush()

@@ -481,7 +491,7 @@ def test_normal0():
            print ''
            print 'ON GPU:'
            R = MRG_RandomStreams(234, use_cuda=True)
-            n = R.normal(size=size, avg=-5.0, std=2.0, dtype='float32')
+            n = R.normal(size=size, avg=avg, std=std, dtype='float32')
            assert n.dtype == 'float32' #well, it's really that this test w GPU doesn't make sense otw
            f = theano.function(var_input, theano.Out(
                theano.sandbox.cuda.basic_ops.gpu_from_host(n),
@@ -493,7 +503,7 @@ def test_normal0():
            print 'random?[:10]\n', gpu_out[0,0:10]
            print '----'
            sys.stdout.flush()
-            basictest(f, steps, const_size, target_avg=-5.0, target_std=2.0, prefix='gpu mrg ', allow_01=True, inputs=input, mean_rtol=rtol)
+            basictest(f, steps, const_size, target_avg=avg, target_std=std, prefix='gpu mrg ', allow_01=True, inputs=input, mean_rtol=rtol)
            # Need to allow some rounding error as their is float
            # computation that are done on the gpu vs cpu
            assert numpy.allclose(out, gpu_out, rtol=5e-6, atol=5e-6)
@@ -503,10 +513,10 @@ def test_normal0():
        print 'ON CPU w NUMPY:'
        RR = theano.tensor.shared_randomstreams.RandomStreams(234)

-        nn = RR.normal(size=size, avg=-5.0, std=2.0)
+        nn = RR.normal(size=size, avg=avg, std=std)
        ff = theano.function(var_input, nn)

-        basictest(ff, steps, const_size, target_avg=-5.0, target_std=2.0, prefix='numpy ', allow_01=True, inputs=input, mean_rtol=rtol)
+        basictest(ff, steps, const_size, target_avg=avg, target_std=std, prefix='numpy ', allow_01=True, inputs=input, mean_rtol=rtol)

 def basic_multinomialtest(f, steps, sample_size, target_pvals, prefix="", mean_rtol=0.04):


--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -4676,7 +4676,8 @@ outer = Outer()
 # Gradient
 #########################

-def grad(cost, wrt, g_cost=None, consider_constant=[], warn_type=False):
+def grad(cost, wrt, g_cost=None, consider_constant=[], warn_type=False,
+         assume_continuously_differentiable = False):
    """
    :type cost: Scalar (0-dimensional) `Variable`
    :type wrt: `Variable` or list of `Variable`s.
@@ -4688,6 +4689,14 @@ def grad(cost, wrt, g_cost=None, consider_constant=[], warn_type=False):
    :param warn_type: a value of True will cause warnings to be logged for any Op that emits a
        gradient that does not match its input type.

+    :param assume_continuously_differentiable : flag that says if grad is strict about what it returns.
+        If set to false it will raise an exception for any argument in
+        ``wrt`` for which there is no gradient either because some op does
+        not know how to compute the gradient with respect to that argument
+        or the argument is not part of the computational graph. If the flag
+        is set to true, the ``grad`` method returns zeros like the argument
+        ( i.e. it makes the assumption that the gradient should be 0).
+
    :rtype: `Variable` or list of `Variable`s (depending upon `wrt`)

    :return: symbolic expression of gradient of `cost` with respect to `wrt`.
@@ -4729,12 +4738,13 @@ def grad(cost, wrt, g_cost=None, consider_constant=[], warn_type=False):
        wrt = [wrt]
    ret = []
    for p in wrt:
-        if p not in gmap:
+        if p not in gmap and not assume_continuously_differentiable:
            raise ValueError(("grad method was asked to compute the graident "
                             "with respect to a variable that is not part of "
-                             "the computational graph of the cost"),p)
+                             "the computational graph of the cost or is used "
+                             "by a non-differentiable operator "),p)
        else:
-            ret.append(gmap[p])
+            ret.append(gmap.get(p, zeros_like(p)))

    if len(ret) == 1:
        return ret[0]
@@ -5008,7 +5018,8 @@ def verify_grad(fun, pt, n_tests=2, rng=None, eps=None, abs_tol=None, rel_tol=No
    if cast_to_output_type:
        g_cost = cast(g_cost, o_output.dtype)

-    symbolic_grad = grad(cost, tensor_pt, g_cost)
+    symbolic_grad = grad(cost, tensor_pt, g_cost,
+                         assume_continuously_differentiable = True)
    #if o_output.dtype in ['float32','float64']:
    #    assert all([x.dtype == o_output.dtype for x in symbolic_grad]),("Expected grad of type %s, got %s "%( symbolic_grad.dtype, o_output.dtyp))


--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -3234,7 +3234,8 @@ class test_grad(unittest.TestCase):
        """grad: Test returning a single zero value from grad"""
        o = test_grad.O()
        a1 = o.make_node()
-        g = grad(a1.outputs[0], a1.outputs[1])
+        g = grad(a1.outputs[0], a1.outputs[1],
+                 assume_continuously_differentiable = True)
        self.assertTrue(g.owner.op == fill)
        self.assertTrue(g.owner.inputs[1].data == 0)
        try:
@@ -3247,7 +3248,8 @@ class test_grad(unittest.TestCase):
        """grad: Test returning some zero value from grad"""
        o = test_grad.O()
        a1 = o.make_node()
-        g0,g1,g2 = grad(a1.outputs[0], a1.inputs + [scalar('z')])
+        g0,g1,g2 = grad(a1.outputs[0], a1.inputs + [scalar('z')],
+                        assume_continuously_differentiable = True)
        self.assertTrue(o.gval0 is g0)
        self.assertTrue(o.gval1 is g1)
        self.assertTrue(g2.owner.op == fill)
@@ -3256,7 +3258,8 @@ class test_grad(unittest.TestCase):
    def test_zero_gradient_shape(self):
        """Ensure that a zero gradient has the proper shape."""
        x = dmatrix()
-        f = theano.function([x], grad(dscalar(), x))
+        f = theano.function([x], grad(dscalar(), x,
+                                      assume_continuously_differentiable= True))
        a = numpy.ones((3, 7))
        self.assertTrue((f(a) == 0).all())  # Zero gradient.
        self.assertTrue(a.shape == f(a).shape)  # With proper shape.