Merge pull request #1382 from lamblin/fix_gpu_grad

Make GPU Ops accept TensorType output gradients (transfer them to GPU)

Merge pull request #1382 from lamblin/fix_gpu_grad
e96ae62e · Frédéric Bastien · 6df579b2 · 996fc67f · e96ae62e · e96ae62e
--- a/theano/sandbox/cuda/__init__.py
+++ b/theano/sandbox/cuda/__init__.py
@@ -282,7 +282,8 @@ if cuda_available:
            ftensor3, ftensor4,
            scalar, vector, matrix, row, col,
            tensor3, tensor4)
-    from basic_ops import host_from_gpu, gpu_from_host, as_cuda_array
+    from basic_ops import (host_from_gpu, gpu_from_host,
+            as_cuda_array, as_cuda_ndarray_variable)
    import opt
    import cuda_ndarray
    from rng_curand import CURAND_RandomStreams

--- a/theano/sandbox/cuda/basic_ops.py
+++ b/theano/sandbox/cuda/basic_ops.py
@@ -135,6 +135,7 @@ class GpuFromHost(GpuOp):
    def grad(self, inputs, grads):
        gz, = grads
+        gz = as_cuda_ndarray_variable(gz)
        return [host_from_gpu(gz)]
    def R_op(self, inputs, eval_points):
@@ -2921,6 +2922,7 @@ class GpuContiguous(GpuOp):
        x, = inputs
        dout, = dout
+        dout = as_cuda_ndarray_variable(dout)
        return [dout]

--- a/theano/sandbox/cuda/tests/test_gradient.py
+++ b/theano/sandbox/cuda/tests/test_gradient.py
+import unittest
+import theano
+from theano import tensor
+from theano.sandbox import cuda
+# Skip test if cuda_ndarray is not available.
+from nose.plugins.skip import SkipTest
+import theano.sandbox.cuda as cuda_ndarray
+if cuda_ndarray.cuda_available == False:
+    raise SkipTest('Optional package cuda disabled')
+class TestGradient(unittest.TestCase):
+    verbose = 0
+    def test_gpu_out_multiple_clients(self):
+        # Test that when the output of gpu_from_host is used by more
+        # than one Op, the gradient still works.
+        # A problem used to be that GpuFromHost.grad expected the output
+        # gradient to be on GPU, but the summation of the different
+        # incoming gradients was done on CPU.
+        x = tensor.fmatrix('x')
+        z = cuda.gpu_from_host(x)
+        n1 = tensor.nnet.sigmoid(z)
+        n2 = tensor.dot(z, z.T)
+        s1 = n1.sum()
+        s2 = n2.sum()
+        c = s1 + s2
+        dc_dx = theano.grad(c, x)
+        if self.verbose:
+            theano.printing.debugprint(c, print_type=True)
+            theano.printing.debugprint(dc_dx, print_type=True)