提交 fbc384cf authored 作者: Frederic Bastien's avatar Frederic Bastien 提交者: sentient07

Special Alloc and AllocEmpty to move them to the GPU in one pass.

上级 12fa1c5b
...@@ -323,7 +323,20 @@ class GraphToGPU(NavigatorOptimizer): ...@@ -323,7 +323,20 @@ class GraphToGPU(NavigatorOptimizer):
context_name = i.type.context_name context_name = i.type.context_name
move_to_GPU = True move_to_GPU = True
break break
if (not move_to_GPU and
isinstance(node.op, (theano.tensor.Alloc,
theano.tensor.AllocEmpty))):
# If the Alloc[Empty] have a client that will be moved
# to the GPU, we should move the Alloc* on the GPU.
# We approximate this by supposing that if we have an
# optimization for one of the clients op, then we will
# move the client to the GPU.
for c, _ in node.outputs[0].clients:
if (c != 'output' and
(self.local_optimizers_map.get(c.op, []) +
self.local_optimizers_map.get(type(c.op)))):
move_to_GPU = True
new_ops = None new_ops = None
outputs = [] outputs = []
# Apply the lifter # Apply the lifter
......
...@@ -138,11 +138,21 @@ def test_local_gpualloc_memset_0(): ...@@ -138,11 +138,21 @@ def test_local_gpualloc_memset_0():
ones = numpy.ones((2,), dtype='float32') ones = numpy.ones((2,), dtype='float32')
# Test with 0 from CPU op. # Test with 0 from CPU op.
# Should not be transfered as the only client is the output
a = tensor.alloc(z, i) a = tensor.alloc(z, i)
f = theano.function([i], a, mode=mode_with_gpu) f = theano.function([i], a, mode=mode_with_gpu)
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
assert len(topo) == 2 assert len(topo) == 1
assert isinstance(topo[0].op, GpuAlloc) and topo[0].op.memset_0 assert isinstance(topo[0].op, theano.tensor.Alloc)
assert (numpy.asarray(f(6)) == 0).all()
# Test with 0 from CPU op.
# Should be transfered as it is used by another op.
a = tensor.alloc(z, i)
f = theano.function([i], a.cumsum(), mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert len(topo) == 3
assert isinstance(topo[0].op, GpuAlloc)
assert (numpy.asarray(f(6)) == 0).all() assert (numpy.asarray(f(6)) == 0).all()
# Test with 0 # Test with 0
...@@ -177,19 +187,30 @@ def test_local_gpualloc_empty(): ...@@ -177,19 +187,30 @@ def test_local_gpualloc_empty():
ii = theano.tensor.iscalar() ii = theano.tensor.iscalar()
# Test with vector # Test with vector
# Should not be moved as the only client is the uutput
a = tensor.AllocEmpty('float32')(i) a = tensor.AllocEmpty('float32')(i)
f = theano.function([i], a, mode=mode_with_gpu) f = theano.function([i], a, mode=mode_with_gpu)
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
assert len(topo) == 2 assert len(topo) == 1
assert isinstance(topo[0].op, theano.tensor.AllocEmpty)
# This return not initilized data, so we can only check the shape
assert f(3).shape == (3,)
# Test with vector
# Should be moved
a = tensor.AllocEmpty('float32')(i)
f = theano.function([i], a.cumsum(), mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert len(topo) == 3
assert isinstance(topo[0].op, GpuAllocEmpty) assert isinstance(topo[0].op, GpuAllocEmpty)
# This return not initilized data, so we can only check the shape # This return not initilized data, so we can only check the shape
assert f(3).shape == (3,) assert f(3).shape == (3,)
# Test with matrix # Test with matrix
a = tensor.AllocEmpty('float32')(i, ii) a = tensor.AllocEmpty('float32')(i, ii)
f = theano.function([i, ii], a, mode=mode_with_gpu) f = theano.function([i, ii], a.cumsum(axis=0), mode=mode_with_gpu)
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
assert len(topo) == 2 assert len(topo) == 3
assert isinstance(topo[0].op, GpuAllocEmpty) assert isinstance(topo[0].op, GpuAllocEmpty)
# This return not initilized data, so we can only check the shape # This return not initilized data, so we can only check the shape
assert f(3, 4).shape == (3, 4) assert f(3, 4).shape == (3, 4)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论