提交 8c20dd95 authored 作者: James Bergstra's avatar James Bergstra

Fixed optimization in cuda/opt - tightened the check for replacing elemwise with

gpuelemwise
上级 febb463d
...@@ -73,13 +73,11 @@ gpu_cut_copies.register('cut_gpu_constant_transfers', tensor.opt.constant_foldin ...@@ -73,13 +73,11 @@ gpu_cut_copies.register('cut_gpu_constant_transfers', tensor.opt.constant_foldin
def local_gpu_elemwise_0(node): def local_gpu_elemwise_0(node):
if isinstance(node.op, tensor.Elemwise): if isinstance(node.op, tensor.Elemwise):
if numpy.any([hasattr(i.owner, 'op') and isinstance(i.owner.op, HostFromGpu) for i in node.inputs]): if numpy.any([hasattr(i.owner, 'op') and isinstance(i.owner.op, HostFromGpu) for i in node.inputs]):
if numpy.any([o.type.dtype == 'float64' for o in node.outputs]): if numpy.all([i.type.dtype == 'float32' for i in node.inputs]):
print 'WARNING: THERE ARE STILL float64s in your graph local_gpu_elemwise_0', node if numpy.all([o.type.dtype == 'float32' for o in node.outputs]):
else: new_op = GpuElemwise(node.op.scalar_op, node.op.inplace_pattern)
# move the add to a GpuAdd #TODO: change this when fusion makes Elemwise with multiple outputs
new_op = GpuElemwise(node.op.scalar_op, node.op.inplace_pattern) return [host_from_gpu(new_op(*(gpu_from_host(i) for i in node.inputs)))]
return [host_from_gpu(new_op(*(gpu_from_host(i) for i in node.inputs)))]
return False
@register_opt() @register_opt()
@local_optimizer([]) @local_optimizer([])
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论