提交 c6d5ee22 authored 作者: Frederic Bastien's avatar Frederic Bastien

optimize Alloc to GpuAlloc more often.

上级 3c51a2f1
......@@ -598,15 +598,13 @@ else:
@local_optimizer([tensor.Alloc])
def local_gpualloc(node):
if node.op == tensor.alloc:
#I commented the 2 first case as I don't see how this can happen.
#If you see it happening, add a test for it or contact the mailing list.
# if node.inputs[0].owner and node.inputs[0].owner.op==host_from_gpu:#if the input was on the gpu
# new_node = host_from_gpu(gpu_alloc(*node.inputs))
# return [new_node]
# elif all([c!='output' and c.op == gpu_from_host for c,idx in node.outputs[0].clients]):#if all clients are on gpu
# new_node = host_from_gpu(gpu_alloc(*node.inputs))
# return [new_node]
if all([c.op == tensor.join and all([i.owner and i.owner.op in [host_from_gpu,tensor.alloc] for i in c.inputs[1:]]) for c,idx in node.outputs[0].clients]):#if the client is a subtensor with input on gpu or alloc
if node.inputs[0].owner and node.inputs[0].owner.op==host_from_gpu:#if the input was on the gpu
new_node = host_from_gpu(gpu_alloc(*node.inputs))
return [new_node]
if all([c!='output' and c.op == gpu_from_host for c,idx in node.outputs[0].clients]):#if all clients are on gpu
new_node = host_from_gpu(gpu_alloc(*node.inputs))
return [new_node]
if all([c!='output' and c.op == tensor.join and all([i.owner and i.owner.op in [host_from_gpu,tensor.alloc] for i in c.inputs[1:]]) for c,idx in node.outputs[0].clients]):#if the client is a subtensor with input on gpu or alloc
new_node = host_from_gpu(gpu_alloc(*node.inputs))
return [new_node]
......@@ -628,6 +628,20 @@ def test_gpujoin_no_rebroadcast():
assert not any([isinstance(x.op,T.Rebroadcast) for x in l])
def test_gpualloc_input_on_gpu():
a_val = numpy.asarray(numpy.random.rand(4,5),dtype='float32')
a = tcn.shared_constructor(a_val)
b = T.fscalar()
f = theano.function([b], T.ones_like(a)+b, mode=mode_without_gpu)
f_gpu = theano.function([b], T.ones_like(a)+b, mode=mode_with_gpu)
assert sum([node.op == T.alloc for node in f.maker.env.toposort()])==1
assert sum([node.op == B.gpu_alloc for node in f_gpu.maker.env.toposort()])==1
assert numpy.allclose(numpy.ones(a.value.shape)+9,f_gpu(9))
assert numpy.allclose(f(5),f_gpu(5))
def test_gpujoin_gpualloc():
a = T.fmatrix('a')
a_val = numpy.asarray(numpy.random.rand(4,5),dtype='float32')
......@@ -646,12 +660,24 @@ def test_gpujoin_gpualloc():
assert sum([node.op == B.gpu_join for node in f_gpu2.maker.env.toposort()])==1
assert numpy.allclose(f(a_val,b_val),f_gpu2(a_val,b_val))
#print f.maker.env.toposort()
#print f_gpu.maker.env.toposort()
#print f_gpu2.maker.env.toposort()
#print f(a_val,b_val)
#print f_gpu(a_val,b_val)
#print f_gpu2(a_val,b_val)
def test_gpualloc_output_to_gpu():
a_val = numpy.asarray(numpy.random.rand(4,5),dtype='float32')
a = tcn.shared_constructor(a_val)
b = T.fscalar()
f = theano.function([b], T.ones_like(a)+b, mode=mode_without_gpu)
f_gpu = theano.function([b], B.gpu_from_host(T.ones_like(a))+b, mode=mode_with_gpu)
print f.maker.env.toposort()
print f_gpu.maker.env.toposort()
print f(2)
print f_gpu(2)
assert sum([node.op == T.alloc for node in f.maker.env.toposort()])==1
assert sum([node.op == B.gpu_alloc for node in f_gpu.maker.env.toposort()])==1
assert numpy.allclose(numpy.ones(a.value.shape)+9,f_gpu(9))
assert numpy.allclose(f(5),f_gpu(5))
if __name__ == '__main__':
test_gpujoin_twomatrices_joincolumns()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论