提交 b9b10ced authored 作者: Frederic Bastien's avatar Frederic Bastien

Add optional opt that convert all *AllocEmpty to *Alloc.

上级 ef8ee794
...@@ -2540,6 +2540,19 @@ def local_gpu_allocempty(node): ...@@ -2540,6 +2540,19 @@ def local_gpu_allocempty(node):
return False return False
# Don't register by default.
@gof.local_optimizer([GpuAllocEmpty])
def local_gpu_alloc_empty_to_zeros(node):
if isinstance(node.op, GpuAllocEmpty):
return [gpu_alloc(theano.tensor.constant(0, dtype='float32'),
*node.inputs)]
optdb.register('local_gpu_alloc_empty_to_zeros',
local_gpu_alloc_empty_to_zeros,
# After move to gpu and merge2, before inplace.
49.3,
'alloc_empty_to_zeros',)
def typeInfer(node): def typeInfer(node):
return typeConstructor return typeConstructor
......
...@@ -300,6 +300,19 @@ def local_gpualloc_memset_0(node): ...@@ -300,6 +300,19 @@ def local_gpualloc_memset_0(node):
return [new_op(*node.inputs)] return [new_op(*node.inputs)]
# Don't register by default.
@gof.local_optimizer([GpuAllocEmpty])
def local_gpua_alloc_empty_to_zeros(node):
if isinstance(node.op, GpuAllocEmpty):
return [GpuAlloc()(theano.tensor.constant(0, dtype='float32'),
*node.inputs)]
optdb.register('local_gpua_alloc_empty_to_zeros',
local_gpua_alloc_empty_to_zeros,
# After move to gpu and merge2, before inplace.
49.3,
'alloc_empty_to_zeros',)
@register_opt() @register_opt()
@local_optimizer([GpuContiguous]) @local_optimizer([GpuContiguous])
def local_gpu_contiguous_gpu_contiguous(node): def local_gpu_contiguous_gpu_contiguous(node):
......
...@@ -1766,6 +1766,26 @@ def local_useless_alloc(node): ...@@ -1766,6 +1766,26 @@ def local_useless_alloc(node):
return [node.inputs[0]] return [node.inputs[0]]
# Don't register by default.
@gof.local_optimizer([T.AllocEmpty])
def local_alloc_empty_to_zeros(node):
"""This convert AllocEmpty to Alloc of 0.
This help investigate NaN with NanGuardMode. Not registered by
default. To activate it, use the Theano flag
optimizer_including=alloc_empty_to_zeros. This also enable
the GPU version of this optimizations.
"""
if isinstance(node.op, T.AllocEmpty):
return [T.zeros(node.inputs, dtype=node.outputs[0].dtype)]
compile.optdb.register('local_alloc_empty_to_zeros',
local_alloc_empty_to_zeros,
# After move to gpu and merge2, before inplace.
49.3,
'alloc_empty_to_zeros',)
@register_specialize @register_specialize
@register_canonicalize @register_canonicalize
@gof.local_optimizer([T.shape]) @gof.local_optimizer([T.shape])
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论