提交 e96ae62e authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #1382 from lamblin/fix_gpu_grad

Make GPU Ops accept TensorType output gradients (transfer them to GPU)
...@@ -282,7 +282,8 @@ if cuda_available: ...@@ -282,7 +282,8 @@ if cuda_available:
ftensor3, ftensor4, ftensor3, ftensor4,
scalar, vector, matrix, row, col, scalar, vector, matrix, row, col,
tensor3, tensor4) tensor3, tensor4)
from basic_ops import host_from_gpu, gpu_from_host, as_cuda_array from basic_ops import (host_from_gpu, gpu_from_host,
as_cuda_array, as_cuda_ndarray_variable)
import opt import opt
import cuda_ndarray import cuda_ndarray
from rng_curand import CURAND_RandomStreams from rng_curand import CURAND_RandomStreams
......
...@@ -135,6 +135,7 @@ class GpuFromHost(GpuOp): ...@@ -135,6 +135,7 @@ class GpuFromHost(GpuOp):
def grad(self, inputs, grads): def grad(self, inputs, grads):
gz, = grads gz, = grads
gz = as_cuda_ndarray_variable(gz)
return [host_from_gpu(gz)] return [host_from_gpu(gz)]
def R_op(self, inputs, eval_points): def R_op(self, inputs, eval_points):
...@@ -2921,6 +2922,7 @@ class GpuContiguous(GpuOp): ...@@ -2921,6 +2922,7 @@ class GpuContiguous(GpuOp):
x, = inputs x, = inputs
dout, = dout dout, = dout
dout = as_cuda_ndarray_variable(dout)
return [dout] return [dout]
......
import unittest
import theano
from theano import tensor
from theano.sandbox import cuda
# Skip test if cuda_ndarray is not available.
from nose.plugins.skip import SkipTest
import theano.sandbox.cuda as cuda_ndarray
if cuda_ndarray.cuda_available == False:
raise SkipTest('Optional package cuda disabled')
class TestGradient(unittest.TestCase):
verbose = 0
def test_gpu_out_multiple_clients(self):
# Test that when the output of gpu_from_host is used by more
# than one Op, the gradient still works.
# A problem used to be that GpuFromHost.grad expected the output
# gradient to be on GPU, but the summation of the different
# incoming gradients was done on CPU.
x = tensor.fmatrix('x')
z = cuda.gpu_from_host(x)
n1 = tensor.nnet.sigmoid(z)
n2 = tensor.dot(z, z.T)
s1 = n1.sum()
s2 = n2.sum()
c = s1 + s2
dc_dx = theano.grad(c, x)
if self.verbose:
theano.printing.debugprint(c, print_type=True)
theano.printing.debugprint(dc_dx, print_type=True)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论