提交 350f5d55 authored 作者: abergeron's avatar abergeron

Merge pull request #4222 from nouiz/opt_fix

Opt fix
...@@ -21,6 +21,7 @@ from theano.sandbox.cuda import dnn_version as version ...@@ -21,6 +21,7 @@ from theano.sandbox.cuda import dnn_version as version
from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable, from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable,
host_from_gpu, host_from_gpu,
gpu_contiguous, HostFromGpu, gpu_contiguous, HostFromGpu,
gpu_alloc, GpuAlloc,
gpu_alloc_empty, GpuAllocEmpty, gpu_alloc_empty, GpuAllocEmpty,
GpuElemwise) GpuElemwise)
from theano.sandbox.cuda.blas import (GpuConv, GpuDownsampleFactorMax, from theano.sandbox.cuda.blas import (GpuConv, GpuDownsampleFactorMax,
...@@ -2246,9 +2247,13 @@ if True: ...@@ -2246,9 +2247,13 @@ if True:
inputs = list(node.inputs) inputs = list(node.inputs)
dest = inputs[2] dest = inputs[2]
if (dest.owner and if (dest.owner and
isinstance(dest.owner.op, GpuAllocEmpty) and type(dest.owner.op) is GpuAllocEmpty and
len(dest.clients) > 1): len(dest.clients) > 1):
inputs[2] = gpu_alloc_empty(*dest.owner.inputs) inputs[2] = gpu_alloc_empty(*dest.owner.inputs)
elif (dest.owner and
type(dest.owner.op) is GpuAlloc and
len(dest.clients) > 1):
inputs[2] = gpu_alloc(*dest.owner.inputs)
return [GpuDnnConv(algo=node.op.algo, inplace=True)(*inputs)] return [GpuDnnConv(algo=node.op.algo, inplace=True)(*inputs)]
@local_optimizer([GpuDnnConvGradW], inplace=True) @local_optimizer([GpuDnnConvGradW], inplace=True)
...@@ -2258,9 +2263,13 @@ if True: ...@@ -2258,9 +2263,13 @@ if True:
inputs = list(node.inputs) inputs = list(node.inputs)
dest = inputs[2] dest = inputs[2]
if (dest.owner and if (dest.owner and
isinstance(dest.owner.op, GpuAllocEmpty) and type(dest.owner.op) is GpuAllocEmpty and
len(dest.clients) > 1): len(dest.clients) > 1):
inputs[2] = gpu_alloc_empty(*dest.owner.inputs) inputs[2] = gpu_alloc_empty(*dest.owner.inputs)
elif (dest.owner and
type(dest.owner.op) is GpuAlloc and
len(dest.clients) > 1):
inputs[2] = gpu_alloc(*dest.owner.inputs)
return [GpuDnnConvGradW(inplace=True)(*inputs)] return [GpuDnnConvGradW(inplace=True)(*inputs)]
@local_optimizer([GpuDnnConvGradI], inplace=True) @local_optimizer([GpuDnnConvGradI], inplace=True)
...@@ -2270,9 +2279,13 @@ if True: ...@@ -2270,9 +2279,13 @@ if True:
inputs = list(node.inputs) inputs = list(node.inputs)
dest = inputs[2] dest = inputs[2]
if (dest.owner and if (dest.owner and
isinstance(dest.owner.op, GpuAllocEmpty) and type(dest.owner.op) is GpuAllocEmpty and
len(dest.clients) > 1): len(dest.clients) > 1):
inputs[2] = gpu_alloc_empty(*dest.owner.inputs) inputs[2] = gpu_alloc_empty(*dest.owner.inputs)
elif (dest.owner and
type(dest.owner.op) is GpuAlloc and
len(dest.clients) > 1):
inputs[2] = gpu_alloc(*dest.owner.inputs)
return [GpuDnnConvGradI(inplace=True)(*inputs)] return [GpuDnnConvGradI(inplace=True)(*inputs)]
optdb.register('local_dnn_conv_inplace', optdb.register('local_dnn_conv_inplace',
......
...@@ -1942,7 +1942,9 @@ def local_gpu_downsample_factor_max_grad_grad(node): ...@@ -1942,7 +1942,9 @@ def local_gpu_downsample_factor_max_grad_grad(node):
if isinstance(node.op, pool.DownsampleFactorMaxGradGrad): if isinstance(node.op, pool.DownsampleFactorMaxGradGrad):
assert node.op.__props__ == ('ds', 'ignore_border', 'st', assert node.op.__props__ == ('ds', 'ignore_border', 'st',
'padding', 'mode') 'padding', 'mode')
if node.op.padding != (0, 0) or node.op.mode != 'max': if (node.op.padding != (0, 0) or
node.op.mode != 'max' or
node.op.st != (1, 1)):
return return
x, z, gx = node.inputs x, z, gx = node.inputs
if (x.owner and isinstance(x.owner.op, HostFromGpu)): if (x.owner and isinstance(x.owner.op, HostFromGpu)):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论