提交 bb466e6d authored 作者: James Bergstra's avatar James Bergstra

made local_gpualloc pad the shape of the fill value so that the gpu copy works

上级 607102c6
......@@ -638,16 +638,20 @@ compile.optdb.register('gpu_inplace_opt', gpu_insert_inplace_optimizer, 75, 'fas
@register_opt()
@local_optimizer([tensor.Alloc])
def local_gpualloc(node):
replace=False
if node.op == tensor.alloc:
if node.inputs[0].owner and node.inputs[0].owner.op==host_from_gpu:#if the input was on the gpu
new_node = host_from_gpu(gpu_alloc(*node.inputs))
return [new_node]
replace = True
if all([c!='output' and c.op == gpu_from_host for c,idx in node.outputs[0].clients]):#if all clients are on gpu
new_node = host_from_gpu(gpu_alloc(*node.inputs))
return [new_node]
replace=True
if all([c!='output' and c.op == tensor.join and all([i.owner and i.owner.op in [host_from_gpu,tensor.alloc] for i in c.inputs[1:]]) for c,idx in node.outputs[0].clients]):#if the client is a subtensor with input on gpu or alloc
new_node = host_from_gpu(gpu_alloc(*node.inputs))
return [new_node]
replace=True
if replace:
val = node.inputs[0]
shp = node.inputs[1:]
val2 = tensor.shape_padleft(val, len(shp) - val.ndim)
new_node = host_from_gpu(gpu_alloc(val2, *shp))
return [new_node]
@register_opt()
@local_optimizer([])
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论