提交 170abace authored 作者: fsavard's avatar fsavard

Added optimization for GpuJoin (one case), plus one unit test for it

上级 b9d6183a
......@@ -465,3 +465,66 @@ def local_gpu_downsample_factor_max_grad(node):
gpu_ds_grad = GpuDownsampleFactorMaxGrad(node.op.ds, node.op.ignore_border)
return [host_from_gpu(gpu_ds_grad(x.owner.inputs[0], gpu_from_host(z), gpu_from_host(gz)))]
from theano.sandbox.cuda.basic_ops import gpu_join
@register_opt()
@local_optimizer([])
def local_gpu_join(node):
"""
Inspired by the opt for convop.
Very loose notation follows.
Subgraphs concerned first look like
[array of HostTensor] -> HostToGpu -> GpuToHost
-> Join -> HostToGpu -> GpuToHost
First we apply this Opt:
join(host_from_gpu) -> host_from_gpu(gpu_join)
then, as an intermediate result, there should be
host_from_gpu(gpu_join) -> HostToGpu -> GpuToHost
this unnecessary GpuToHost -> HostToGpu should be removed
by other opts, leaving us with
host_from_gpu(gpu_join)
For intermediate places in the graph not covered by the first opt, the following could be useful:
gpu_from_host(join) -> gpu_join(gpu_from_host)
not implemented yet.
"""
if isinstance(node.op, tensor.Join):
# optimizing this case:
# join(host_from_gpu) -> host_from_gpu(gpu_join)
# print "OPT: we've got a Join instance"
axis_and_tensors = node.inputs
#print "OPT: axis_and_tensors=", axis_and_tensors
matches = [not t.owner is None and t.owner.op == host_from_gpu for t in axis_and_tensors[1:]]
#print "OPT: matches =", matches
# if all input tensors are host_from_gpu'ified
if numpy.all(matches):
# the extra gpu_from_host introduced here will
# be removed by further optimizations
new_tensors = [gpu_from_host(t) for t in axis_and_tensors[1:]]
new_a_and_t = [axis_and_tensors[0]]+new_tensors
replacement_node = host_from_gpu(gpu_join(*new_a_and_t))
# print "OPT: replacement_node", replacement_node
return [replacement_node]
......@@ -386,12 +386,13 @@ def test_zeros_basic_3d_tensor():
assert numpy.allclose(numpy.asarray(_a), numpy.zeros((3,4,5)))
def test_zeros_basic_vector():
_a = cuda_ndarray.CudaNdarray.zeros((300))
assert numpy.allclose(numpy.asarray(_a), numpy.zeros((300)))
_a = cuda_ndarray.CudaNdarray.zeros((300,))
assert numpy.allclose(numpy.asarray(_a), numpy.zeros((300,)))
if __name__ == '__main__':
test_zeros_basic_3d_tensor()
test_zeros_basic_vector()
test_setitem_matrixvector1()
test_setitem_matrix_tensor3()
test_setitem_broadcast_must_fail()
......
......@@ -71,3 +71,29 @@ def test_softmax_with_bias():
xv=numpy.random.rand(7,8)
bv=numpy.random.rand(8)
assert numpy.allclose(f(xv,bv),f2(xv,bv))
def test_opt_gpujoin_onlyajoin():
_a = numpy.asarray([[1,2],[3,4]],dtype='float32')
_b = numpy.asarray([[5,6,7],[8,9,10]],dtype='float32')
a = theano.shared(_a)
b = theano.shared(_b)
c = tensor.join(1,a,b)
f = theano.function([], c)
#theano.printing.debugprint(f)
graph_nodes = f.maker.env.toposort()
assert isinstance(graph_nodes[-1].op, cuda.HostFromGpu)
assert isinstance(graph_nodes[-2].op, cuda.GpuJoin)
assert numpy.all(f() == numpy.concatenate([_a,_b], axis=1))
if __name__ == '__main__':
test_opt_gpujoin_onlyajoin()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论