提交 fbc384cf authored 作者: Frederic Bastien's avatar Frederic Bastien 提交者: sentient07

Special Alloc and AllocEmpty to move them to the GPU in one pass.

上级 12fa1c5b
......@@ -323,7 +323,20 @@ class GraphToGPU(NavigatorOptimizer):
context_name = i.type.context_name
move_to_GPU = True
break
if (not move_to_GPU and
isinstance(node.op, (theano.tensor.Alloc,
theano.tensor.AllocEmpty))):
# If the Alloc[Empty] have a client that will be moved
# to the GPU, we should move the Alloc* on the GPU.
# We approximate this by supposing that if we have an
# optimization for one of the clients op, then we will
# move the client to the GPU.
for c, _ in node.outputs[0].clients:
if (c != 'output' and
(self.local_optimizers_map.get(c.op, []) +
self.local_optimizers_map.get(type(c.op)))):
move_to_GPU = True
new_ops = None
outputs = []
# Apply the lifter
......
......@@ -138,11 +138,21 @@ def test_local_gpualloc_memset_0():
ones = numpy.ones((2,), dtype='float32')
# Test with 0 from CPU op.
# Should not be transfered as the only client is the output
a = tensor.alloc(z, i)
f = theano.function([i], a, mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert len(topo) == 2
assert isinstance(topo[0].op, GpuAlloc) and topo[0].op.memset_0
assert len(topo) == 1
assert isinstance(topo[0].op, theano.tensor.Alloc)
assert (numpy.asarray(f(6)) == 0).all()
# Test with 0 from CPU op.
# Should be transfered as it is used by another op.
a = tensor.alloc(z, i)
f = theano.function([i], a.cumsum(), mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert len(topo) == 3
assert isinstance(topo[0].op, GpuAlloc)
assert (numpy.asarray(f(6)) == 0).all()
# Test with 0
......@@ -177,19 +187,30 @@ def test_local_gpualloc_empty():
ii = theano.tensor.iscalar()
# Test with vector
# Should not be moved as the only client is the uutput
a = tensor.AllocEmpty('float32')(i)
f = theano.function([i], a, mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert len(topo) == 2
assert len(topo) == 1
assert isinstance(topo[0].op, theano.tensor.AllocEmpty)
# This return not initilized data, so we can only check the shape
assert f(3).shape == (3,)
# Test with vector
# Should be moved
a = tensor.AllocEmpty('float32')(i)
f = theano.function([i], a.cumsum(), mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert len(topo) == 3
assert isinstance(topo[0].op, GpuAllocEmpty)
# This return not initilized data, so we can only check the shape
assert f(3).shape == (3,)
# Test with matrix
a = tensor.AllocEmpty('float32')(i, ii)
f = theano.function([i, ii], a, mode=mode_with_gpu)
f = theano.function([i, ii], a.cumsum(axis=0), mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert len(topo) == 2
assert len(topo) == 3
assert isinstance(topo[0].op, GpuAllocEmpty)
# This return not initilized data, so we can only check the shape
assert f(3, 4).shape == (3, 4)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论