提交 1dfceb5d authored 作者: Frederic Bastien's avatar Frederic Bastien

Fix opt that convert alloc_empty to zeros. Sometimes, it try to apply too frequently and crash.

上级 2c9f2c4d
...@@ -21,6 +21,7 @@ from theano.sandbox.cuda import dnn_version as version ...@@ -21,6 +21,7 @@ from theano.sandbox.cuda import dnn_version as version
from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable, from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
host_from_gpu, host_from_gpu,
gpu_contiguous, HostFromGpu, gpu_contiguous, HostFromGpu,
gpu_alloc, GpuAlloc,
gpu_alloc_empty, GpuAllocEmpty, gpu_alloc_empty, GpuAllocEmpty,
GpuElemwise) GpuElemwise)
from theano.sandbox.cuda.blas import (GpuConv, GpuDownsampleFactorMax, from theano.sandbox.cuda.blas import (GpuConv, GpuDownsampleFactorMax,
...@@ -2246,9 +2247,13 @@ if True: ...@@ -2246,9 +2247,13 @@ if True:
inputs = list(node.inputs) inputs = list(node.inputs)
dest = inputs[2] dest = inputs[2]
if (dest.owner and if (dest.owner and
isinstance(dest.owner.op, GpuAllocEmpty) and type(dest.owner.op) is GpuAllocEmpty and
len(dest.clients) > 1): len(dest.clients) > 1):
inputs[2] = gpu_alloc_empty(*dest.owner.inputs) inputs[2] = gpu_alloc_empty(*dest.owner.inputs)
elif (dest.owner and
type(dest.owner.op) is GpuAlloc and
len(dest.clients) > 1):
inputs[2] = gpu_alloc(*dest.owner.inputs)
return [GpuDnnConv(algo=node.op.algo, inplace=True)(*inputs)] return [GpuDnnConv(algo=node.op.algo, inplace=True)(*inputs)]
@local_optimizer([GpuDnnConvGradW], inplace=True) @local_optimizer([GpuDnnConvGradW], inplace=True)
...@@ -2258,9 +2263,13 @@ if True: ...@@ -2258,9 +2263,13 @@ if True:
inputs = list(node.inputs) inputs = list(node.inputs)
dest = inputs[2] dest = inputs[2]
if (dest.owner and if (dest.owner and
isinstance(dest.owner.op, GpuAllocEmpty) and type(dest.owner.op) is GpuAllocEmpty and
len(dest.clients) > 1): len(dest.clients) > 1):
inputs[2] = gpu_alloc_empty(*dest.owner.inputs) inputs[2] = gpu_alloc_empty(*dest.owner.inputs)
elif (dest.owner and
type(dest.owner.op) is GpuAlloc and
len(dest.clients) > 1):
inputs[2] = gpu_alloc(*dest.owner.inputs)
return [GpuDnnConvGradW(inplace=True)(*inputs)] return [GpuDnnConvGradW(inplace=True)(*inputs)]
@local_optimizer([GpuDnnConvGradI], inplace=True) @local_optimizer([GpuDnnConvGradI], inplace=True)
...@@ -2270,9 +2279,13 @@ if True: ...@@ -2270,9 +2279,13 @@ if True:
inputs = list(node.inputs) inputs = list(node.inputs)
dest = inputs[2] dest = inputs[2]
if (dest.owner and if (dest.owner and
isinstance(dest.owner.op, GpuAllocEmpty) and type(dest.owner.op) is GpuAllocEmpty and
len(dest.clients) > 1): len(dest.clients) > 1):
inputs[2] = gpu_alloc_empty(*dest.owner.inputs) inputs[2] = gpu_alloc_empty(*dest.owner.inputs)
elif (dest.owner and
type(dest.owner.op) is GpuAlloc and
len(dest.clients) > 1):
inputs[2] = gpu_alloc(*dest.owner.inputs)
return [GpuDnnConvGradI(inplace=True)(*inputs)] return [GpuDnnConvGradI(inplace=True)(*inputs)]
optdb.register('local_dnn_conv_inplace', optdb.register('local_dnn_conv_inplace',
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论