提交 fe7f1d09 authored 作者: James Bergstra's avatar James Bergstra

modified cuda opt to insert GpuOuter as special case of GpuDot22

上级 5d652817
...@@ -423,23 +423,19 @@ def local_gpu_gemm(node): ...@@ -423,23 +423,19 @@ def local_gpu_gemm(node):
@local_optimizer([]) @local_optimizer([])
def local_gpu_outer(node): def local_gpu_outer(node):
""" """
gpu_from_host(outer) -> gpu_outer(gpu_from_host) gpu_dot22(col, row) -> gpu_outer
outer(host_from_gpu) -> host_from_gpu(gpu_outer)
""" """
if node.op == gpu_from_host: if node.op == gpu_dot22:
host_input = node.inputs[0] l, r = node.inputs
if host_input.owner and host_input.owner.op == tensor.basic.outer: if l.type.broadcastable[1] and r.type.broadcastable[0]:
x, y = host_input.owner.inputs # TODO: we would like to remove the double-dimshuffle when l or r is
# gpu_outer will refuse to work with float64 so future-proof # already the output of a GpuDimshuffle. To do this, refactor the
if x.type.dtype == 'float32' and y.type.dtype == 'float32': # logic in tensor/opt.py that collapses dimshuffle chains so that we
return [gpu_outer(gpu_from_host(x), gpu_from_host(y))] # can call it from here.
if node.op == tensor.basic.outer: lvec = GpuDimShuffle(l.broadcastable, [0])(l)
x, y = node.inputs rvec = GpuDimShuffle(r.broadcastable, [1])(r)
x_on_gpu = (x.owner and x.owner.op == host_from_gpu and x.type.dtype == 'float32') return [gpu_outer(lvec, rvec)]
y_on_gpu = (y.owner and y.owner.op == host_from_gpu and x.type.dtype == 'float32')
if x_on_gpu or y_on_gpu:
return [host_from_gpu(gpu_outer(as_cuda_ndarray_variable(x), as_cuda_ndarray_variable(y)))]
return False return False
@register_opt() @register_opt()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论