提交 76aa8972 authored 作者: Alexander Matyasko's avatar Alexander Matyasko

Add op lifter for gpu pool grad grad

上级 03c9bbe2
...@@ -29,6 +29,7 @@ from theano.tensor.nnet.abstract_conv import (BaseAbstractConv, ...@@ -29,6 +29,7 @@ from theano.tensor.nnet.abstract_conv import (BaseAbstractConv,
AbstractConv3d, AbstractConv3d,
AbstractConv3d_gradWeights, AbstractConv3d_gradWeights,
AbstractConv3d_gradInputs) AbstractConv3d_gradInputs)
import theano.tensor.signal.pool as pool
from theano.tests.breakpoint import PdbBreakpoint from theano.tests.breakpoint import PdbBreakpoint
...@@ -46,7 +47,8 @@ from .blas import (gpu_dot22, GpuGemm, GpuGer, GpuGemmBatch, ...@@ -46,7 +47,8 @@ from .blas import (gpu_dot22, GpuGemm, GpuGer, GpuGemmBatch,
gpugemmbatch_no_inplace, gpugemmbatch_no_inplace,
gpugemv_no_inplace, gpugemv_inplace, gpugemv_no_inplace, gpugemv_inplace,
GpuCorrMM, GpuCorrMM_gradInputs, GpuCorrMM_gradWeights, GpuCorrMM, GpuCorrMM_gradInputs, GpuCorrMM_gradWeights,
GpuCorr3dMM, GpuCorr3dMM_gradInputs, GpuCorr3dMM_gradWeights) GpuCorr3dMM, GpuCorr3dMM_gradInputs, GpuCorr3dMM_gradWeights,
GpuDownsampleFactorMaxGradGrad)
from .blocksparse import (GpuSparseBlockGemv, GpuSparseBlockOuter, from .blocksparse import (GpuSparseBlockGemv, GpuSparseBlockOuter,
gpu_sparse_block_outer, gpu_sparse_block_outer,
gpu_sparse_block_outer_inplace, gpu_sparse_block_outer_inplace,
...@@ -62,7 +64,7 @@ from .subtensor import (GpuIncSubtensor, GpuSubtensor, ...@@ -62,7 +64,7 @@ from .subtensor import (GpuIncSubtensor, GpuSubtensor,
GpuAdvancedSubtensor1, GpuAdvancedSubtensor1,
GpuAdvancedIncSubtensor1, GpuAdvancedIncSubtensor1,
GpuAdvancedIncSubtensor1_dev20) GpuAdvancedIncSubtensor1_dev20)
from .opt_util import alpha_merge, output_merge from .opt_util import alpha_merge, output_merge, pad_dims, unpad_dims
_logger = logging.getLogger("theano.gpuarray.opt") _logger = logging.getLogger("theano.gpuarray.opt")
...@@ -1589,6 +1591,34 @@ def local_gpua_lift_abstractconv_graph(op, context_name, inputs, outputs): ...@@ -1589,6 +1591,34 @@ def local_gpua_lift_abstractconv_graph(op, context_name, inputs, outputs):
return [op(*inps)] return [op(*inps)]
@register_opt()
@op_lifter([pool.DownsampleFactorMaxGradGrad])
@register_opt2([pool.DownsampleFactorMaxGradGrad])
def local_gpu_downsample_factor_max_grad_grad(op, ctx_name, inputs, outputs):
from theano.sandbox.cuda.opt import _check_constant_args_pool
assert op.__props__ == ('ignore_border', 'mode', 'ndim')
inp, out, out_grad, ws, st, pad = inputs
nd = op.ndim
ret = _check_constant_args_pool(nd, ws, st, pad, op)
if ret is None:
return
ws, st, pad = ret
if nd != 2 or max(pad) != 0 or op.mode != 'max':
return
inp = gpu_contiguous(as_gpuarray_variable(inp, ctx_name))
out = gpu_contiguous(as_gpuarray_variable(out, ctx_name))
out_grad = gpu_contiguous(as_gpuarray_variable(out_grad, ctx_name))
op = GpuDownsampleFactorMaxGradGrad(ws, st, op.ignore_border)
if inp.ndim == nd + 2:
return op(inp, out, out_grad)
else:
inp_4D = pad_dims(inp, 2, 2)
out_4D = pad_dims(out, 2, 2)
out_grad_4D = pad_dims(out_grad, 2, 2)
output_4D = op(inp_4D, out_4D, out_grad_4D)
return unpad_dims(output_4D, inp, 2, 2)
@register_opt("low_memory") @register_opt("low_memory")
@local_optimizer([GpuCAReduceCuda]) @local_optimizer([GpuCAReduceCuda])
def local_gpu_elemwise_careduce(node): def local_gpu_elemwise_careduce(node):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论