提交 e181b34f authored 作者: f0k's avatar f0k

conv_gemm optimizer chooses between GpuCorrMM and GpuCorrMM_gradWeights for valid convolution

上级 c46a0243
...@@ -1362,8 +1362,24 @@ def local_conv_gemm(node): ...@@ -1362,8 +1362,24 @@ def local_conv_gemm(node):
if (border_mode == 'valid'): if (border_mode == 'valid'):
# need to flip the kernel for valid convolution # need to flip the kernel for valid convolution
kern = kern[:, :, ::-1, ::-1] kern = kern[:, :, ::-1, ::-1]
# call GpuCorrMM # call GpuCorrMM or GpuCorrMM_gradWeights
# TODO: call GpuCorrMM_gradWeights instead if appropriate # (GpuCorrMM seems faster if batchsize * kernelHeight * kernelWidth
# is smaller than inputChannels * outputHeight * outputWidth.
# GpuConv does not store information on the batchsize and not always
# on the channels, so we only use what information we have.)
if ((subsample == (1,1)) and
(node.op.imshp is not None) and
(None not in node.op.imshp[-2:]) and
(node.op.kshp is not None) and
(None not in node.op.kshp) and
(node.op.kshp[0] * node.op.kshp[1] >
(node.op.imshp[-2] - node.op.kshp[0] + 1) *
(node.op.imshp[-1] - node.op.kshp[1] + 1))):
return [gpu_contiguous(GpuCorrMM_gradWeights('valid', subsample, pad)(
gpu_contiguous(img.dimshuffle(1, 0, 2, 3)),
gpu_contiguous(kern.dimshuffle(1, 0, 2, 3))
).dimshuffle(1, 0, 2, 3))]
else:
return [GpuCorrMM('valid', subsample, pad)( return [GpuCorrMM('valid', subsample, pad)(
gpu_contiguous(img), gpu_contiguous(kern))] gpu_contiguous(img), gpu_contiguous(kern))]
elif (border_mode == 'full'): elif (border_mode == 'full'):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论