提交 1296be25 authored 作者: Frederic's avatar Frederic

Do the same on the new gpu back-end

上级 4f06e78d
...@@ -20,7 +20,8 @@ from theano.gof.python25 import all, any ...@@ -20,7 +20,8 @@ from theano.gof.python25 import all, any
from theano.tensor.nnet.conv import ConvOp from theano.tensor.nnet.conv import ConvOp
from theano.sandbox.gpuarray.type import GpuArrayType from theano.sandbox.gpuarray.type import GpuArrayType
from theano.sandbox.gpuarray.basic_ops import ( from theano.sandbox.gpuarray.basic_ops import (
host_from_gpu, gpu_from_host, HostFromGpu, GpuSplit, host_from_gpu, gpu_from_host, HostFromGpu, GpuFromHost,
GpuSplit,
gpu_alloc, GpuAlloc, GpuReshape, GpuEye, gpu_join, GpuJoin, gpu_alloc, GpuAlloc, GpuReshape, GpuEye, gpu_join, GpuJoin,
) )
from theano.sandbox.gpuarray.blas import gpu_dot22, GpuGemv, GpuGemm, GpuGer from theano.sandbox.gpuarray.blas import gpu_dot22, GpuGemv, GpuGemm, GpuGer
...@@ -342,6 +343,21 @@ def local_gpua_split(node): ...@@ -342,6 +343,21 @@ def local_gpua_split(node):
@register_opt('fast_compile') @register_opt('fast_compile')
@op_lifter([tensor.Subtensor]) @op_lifter([tensor.Subtensor])
def local_gpua_subtensor(node): def local_gpua_subtensor(node):
x = node.inputs[0]
if (x.owner and isinstance(x.owner.op, HostFromGpu)):
gpu_x = x.owner.inputs[0]
if (gpu_x.owner and
isinstance(gpu_x.owner.op, GpuFromHost) and
# And it is a shared var or an input of the graph.
not gpu_x.owner.inputs[0].owner):
if len(x.clients) == 1:
if any([n == 'output' or any([isinstance(v.type, GpuArrayType)
for v in n.inputs + n.outputs])
for n,_ in node.outputs[0].clients]):
return
else:
return [host_from_gpu(gpu_from_host(node.outputs[0]))]
return GpuSubtensor(node.op.idx_list) return GpuSubtensor(node.op.idx_list)
......
...@@ -10,6 +10,7 @@ from theano.sandbox.gpuarray.basic_ops import ( ...@@ -10,6 +10,7 @@ from theano.sandbox.gpuarray.basic_ops import (
GpuAlloc, GpuReshape, gpu_alloc, gpu_from_host, host_from_gpu) GpuAlloc, GpuReshape, gpu_alloc, gpu_from_host, host_from_gpu)
from theano.sandbox.gpuarray.elemwise import ( from theano.sandbox.gpuarray.elemwise import (
GpuCAReduceCuda, GpuCAReduceCPY, GpuElemwise) GpuCAReduceCuda, GpuCAReduceCPY, GpuElemwise)
from theano.sandbox.gpuarray.subtensor import GpuSubtensor
from theano.sandbox.gpuarray.tests.test_basic_ops import ( from theano.sandbox.gpuarray.tests.test_basic_ops import (
rand_gpuarray, mode_with_gpu, mode_without_gpu rand_gpuarray, mode_with_gpu, mode_without_gpu
) )
...@@ -164,3 +165,44 @@ def test_local_gpu_elemwise_careduce(): ...@@ -164,3 +165,44 @@ def test_local_gpu_elemwise_careduce():
assert len(topo) == 3 assert len(topo) == 3
assert topo[1].op.pre_scalar_op == theano.scalar.sqr assert topo[1].op.pre_scalar_op == theano.scalar.sqr
f(numpy.random.rand(3, 4).astype(theano.config.floatX)) f(numpy.random.rand(3, 4).astype(theano.config.floatX))
def test_local_gpu_subtensor():
# Test shared forced on CPU.
t = tensor._shared(numpy.zeros(20, "float32"))
f = theano.function([], t[3:4], mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert any([type(node.op) is tensor.Subtensor for node in topo])
assert not any([isinstance(node.op, GpuSubtensor) for node in topo])
# Test graph input.
t = tensor.fmatrix()
f = theano.function([t], t[3:4], mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert any([type(node.op) is tensor.Subtensor for node in topo])
assert not any([isinstance(node.op, GpuSubtensor) for node in topo])
# Test multiple use of the input
# We want the subtensor to be on the GPU to prevent multiple transfer.
t = tensor.fmatrix()
f = theano.function([t], [t[3:4], t+1], mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert not any([type(node.op) is tensor.Subtensor for node in topo])
assert any([isinstance(node.op, GpuSubtensor) for node in topo])
# Test multiple use of the input + input as output
# We want the subtensor to be on the GPU to prevent multiple transfer.
t = tensor.fmatrix()
f = theano.function([t], [t[3:4], t+1, t], mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert not any([type(node.op) is tensor.Subtensor for node in topo])
assert any([isinstance(node.op, GpuSubtensor) for node in topo])
# Test shared forced on CPU end we do computation on the output of
# the subtensor.
t = tensor._shared(numpy.zeros(20, "float32"))
f = theano.function([], t[3:4]+1, mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert any([type(node.op) is tensor.Subtensor for node in topo])
assert not any([isinstance(node.op, GpuSubtensor) for node in topo])
assert any([isinstance(node.op, GpuElemwise) for node in topo])
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论