提交 a569f569 authored 作者: Frederic Bastien's avatar Frederic Bastien

make the Print op more gpu friendtly.

上级 258dcb6c
...@@ -383,6 +383,16 @@ def local_gpu_rebroadcast(node): ...@@ -383,6 +383,16 @@ def local_gpu_rebroadcast(node):
gpu_x = x.owner.inputs[0] gpu_x = x.owner.inputs[0]
return [host_from_gpu(node.op(gpu_x))] return [host_from_gpu(node.op(gpu_x))]
@register_opt()
@local_optimizer([])
def local_print_op(node):
if isinstance(node.op, tensor.printing.Print):
x, = node.inputs
if x.owner and x.owner.op == host_from_gpu:
gpu_x, = x.owner.inputs
return [host_from_gpu(node.op(gpu_x))]
return False
def cast(x, dtype): def cast(x, dtype):
stype = scal.Scalar(dtype) stype = scal.Scalar(dtype)
cast_op = theano.tensor.Elemwise(scal.Identity(scal.specific_out(stype))) cast_op = theano.tensor.Elemwise(scal.Identity(scal.specific_out(stype)))
......
...@@ -21,8 +21,6 @@ else: ...@@ -21,8 +21,6 @@ else:
mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu') mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpu') mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpu')
import theano.sandbox.cuda as cuda
def test_no_shared_var_graph(): def test_no_shared_var_graph():
"""Test that the InputToGpuOptimizer optimizer make graph that don't have shared variable compiled too. """Test that the InputToGpuOptimizer optimizer make graph that don't have shared variable compiled too.
...@@ -125,6 +123,20 @@ def test_opt_gpujoin_joinvectors_elemwise_then_minusone(): ...@@ -125,6 +123,20 @@ def test_opt_gpujoin_joinvectors_elemwise_then_minusone():
assert numpy.allclose(numpy.asarray(f()), concat) assert numpy.allclose(numpy.asarray(f()), concat)
def test_print_op():
""" Test that print ops don't block gpu optimization"""
b = tensor.fmatrix()
f = theano.function([b],theano.printing.Print()(b)*2, mode=mode_with_gpu)
#theano.printing.debugprint(f)
#print f.maker.env.toposort()
#[GpuFromHost(<TensorType(float32, matrix)>), <theano.printing.Print object at 0x3581210>(GpuFromHost.0), GpuElemwise{mul}(CudaNdarray{[[ 2.]]}, <theano.printing.Print object at 0x3581210>.0), HostFromGpu(GpuElemwise{mul}.0)]
topo = f.maker.env.toposort()
assert topo[0].op == cuda.gpu_from_host
assert isinstance(topo[1].op, theano.printing.Print)
assert isinstance(topo[2].op, cuda.GpuElemwise)
assert topo[3].op == cuda.host_from_gpu
f(numpy.random.random((5,5)))
def test_elemwise_fusion(): def test_elemwise_fusion():
""" Test the the GpuElemwise fusion work correctly""" """ Test the the GpuElemwise fusion work correctly"""
shape = (3,4) shape = (3,4)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论