提交 641c325b authored 作者: Frederic's avatar Frederic

Fix each opt individually instead of doing it in the meta conv.

上级 d524d461
......@@ -898,14 +898,7 @@ class LocalMetaOptimizer(LocalOptimizer):
timings.sort()
if self.verbose:
print "= %s" % timings[0][2]
ret = timings[0][1]
for i in range(len(ret)):
if ret[i].type != node.outputs[i].type:
# If the type is different, it is probably due to
# the broadcast pattern. Try to fix that.
ret[i] = theano.tensor.patternbroadcast(
ret[i], node.outputs[i].broadcastable)
return ret
return timings[0][1]
return
def provide_inputs(self, node, inputs):
......
......@@ -1331,9 +1331,13 @@ if True:
border_mode = node.op.border_mode
subsample = node.op.subsample
direction_hint = node.op.direction_hint
return [dnn_conv(img, kern,
border_mode=border_mode, subsample=subsample,
direction_hint=direction_hint)]
rval = dnn_conv(img, kern,
border_mode=border_mode, subsample=subsample,
direction_hint=direction_hint)
if node.outputs[0].broadcastable != rval.broadcastable:
rval = tensor.patternbroadcast(
rval, node.outputs[0].type.broadcastable)
return [rval]
# This optimizer is registered in opt.py as part of the meta-optimizer.
# It tries exactly the opposite code path of what local_conv_dnn() uses,
......@@ -1360,9 +1364,13 @@ if True:
direction_hint = 'forward'
else:
direction_hint = 'bprop weights'
return [dnn_conv(img, kern,
border_mode=border_mode, subsample=subsample,
direction_hint=direction_hint)]
rval = dnn_conv(img, kern,
border_mode=border_mode, subsample=subsample,
direction_hint=direction_hint)
if node.outputs[0].broadcastable != rval.broadcastable:
rval = tensor.patternbroadcast(
rval, node.outputs[0].type.broadcastable)
return [rval]
@register_opt('cudnn')
@local_optimizer([GpuDownsampleFactorMax])
......
......@@ -1148,10 +1148,11 @@ def _gpu_conv_to_fftconv(node):
(node.op.imshp[0] is not None)):
kwargs['filter_shape'] = (node.op.nkern, node.op.imshp[0]) + node.op.kshp
rval = conv2d_fft(node.inputs[0], node.inputs[1], **kwargs)
if ('image_shape' in kwargs) or ('filter_shape' in kwargs):
if node.outputs[0].broadcastable != rval.broadcastable:
# With given shape information, conv2d_fft may return a different
# broadcast pattern than GpuConv. This is forbidden, so we fix it.
rval = tensor.patternbroadcast(rval, node.outputs[0].type.broadcastable)
rval = tensor.patternbroadcast(
rval, node.outputs[0].type.broadcastable)
return rval
......@@ -1292,7 +1293,11 @@ def local_conv_gemm(node):
if (border_mode == 'valid'):
# need to flip the kernel for valid convolution
kern = kern[:, :, ::-1, ::-1]
# call GpuCorrMM or GpuCorrMM_gradWeights
# By default use GpuCorrMM
rval = GpuCorrMM('valid', subsample, pad)(
gpu_contiguous(img), gpu_contiguous(kern))
# call GpuCorrMM_gradWeights if good
# (the latter is faster if batchsize * kernelHeight * kernelWidth
# is larger than inputChannels * outputHeight * outputWidth.
# GpuConv does not always store information on the batchsize and
......@@ -1317,21 +1322,23 @@ def local_conv_gemm(node):
# (we need to wrap the result in as_cuda_ndarray_variable,
# because we are not allowed to replace a CudaNdarray with
# a DimShuffle instance in a graph optimization)
return [theano.sandbox.cuda.as_cuda_ndarray_variable(
GpuCorrMM_gradWeights('valid', subsample, pad)(
rval = theano.sandbox.cuda.as_cuda_ndarray_variable(
GpuCorrMM_gradWeights('valid', subsample, pad)(
gpu_contiguous(img.dimshuffle(1, 0, 2, 3)),
gpu_contiguous(kern.dimshuffle(1, 0, 2, 3))
).dimshuffle(1, 0, 2, 3))]
# use GpuCorrMM if we did not choose GpuCorrMM_gradWeights above
return [GpuCorrMM('valid', subsample, pad)(
gpu_contiguous(img), gpu_contiguous(kern))]
).dimshuffle(1, 0, 2, 3))
elif (border_mode == 'full'):
# need to dimshuffle the kernel for full convolution
kern = kern.dimshuffle(1, 0, 2, 3)
# call GpuCorrMM_gradInputs
return [GpuCorrMM_gradInputs('valid', subsample, pad)(
gpu_contiguous(kern), gpu_contiguous(img))]
rval = GpuCorrMM_gradInputs('valid', subsample, pad)(
gpu_contiguous(kern), gpu_contiguous(img))
if node.outputs[0].broadcastable != rval.broadcastable:
# With given shape information, conv2d_fft may return a different
# broadcast pattern than GpuConv. This is forbidden, so we fix it.
rval = tensor.patternbroadcast(
rval, node.outputs[0].type.broadcastable)
return [rval]
# First we register the optimizer that moves convolutions to the GPU.
register_opt()(local_gpu_conv)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论