提交 df0292e6 authored 作者: abergeron's avatar abergeron

Merge pull request #4600 from nouiz/gpu_to_gpu

Fix GpuToGpu
......@@ -529,15 +529,22 @@ class GpuToGpu(Op):
def c_code(self, node, name, inputs, outputs, sub):
return """
Py_XDECREF(%(out)s);
%(out)s = pygpu_transfer(%(inp)s, %(ctx)s, 0);
%(out)s = pygpu_empty(%(inp)s->ga.nd,
%(inp)s->ga.dimensions,
%(inp)s->ga.typecode,
GpuArray_IS_C_CONTIGUOUS(&(%(inp)s->ga)) ? GA_C_ORDER:GA_F_ORDER,
%(ctx)s, Py_None);
if (%(out)s == NULL) {
%(fail)s
}
if (pygpu_transfer(%(out)s, %(inp)s)) {
%(fail)s
}
""" % {'inp': inputs[0], 'ctx': sub['params'],
'out': outputs[0], 'fail': sub['fail']}
def c_code_cache_version(self):
return (0,)
return (1,)
class GpuAlloc(HideC, Alloc):
......
......@@ -18,7 +18,7 @@ from theano.tests import unittest_tools as utt
from ..type import (GpuArrayType, get_context,
gpuarray_shared_constructor)
from ..basic_ops import (
host_from_gpu, HostFromGpu, GpuFromHost, GpuReshape,
host_from_gpu, HostFromGpu, GpuFromHost, GpuReshape, GpuToGpu,
GpuAlloc, GpuAllocEmpty, GpuContiguous,
gpu_join, GpuJoin, GpuSplit, GpuEye, gpu_contiguous)
from ..subtensor import GpuSubtensor
......@@ -182,6 +182,21 @@ def test_transfer_cpu_gpu():
assert numpy.all(fv == av)
def test_transfer_gpu_gpu():
g = GpuArrayType(dtype='float32', broadcastable=(False, False),
context_name=test_ctx_name)()
av = numpy.asarray(rng.rand(5, 4), dtype='float32')
gv = gpuarray.array(av, context=get_context(test_ctx_name))
mode = mode_with_gpu.excluding('cut_gpua_host_transfers', 'local_cut_gpua_host_gpua')
f = theano.function([g], GpuToGpu(test_ctx_name)(g), mode=mode)
topo = f.maker.fgraph.toposort()
assert len(topo) == 1
assert isinstance(topo[0].op, GpuToGpu)
fv = f(gv)
assert GpuArrayType.values_eq(fv, gv)
def test_transfer_strided():
# This is just to ensure that it works in theano
# libgpuarray has a much more comprehensive suit of tests to
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论