提交 641c325b authored 作者: Frederic's avatar Frederic

Fix each opt individually instead of doing it in the meta conv.

上级 d524d461
...@@ -898,14 +898,7 @@ class LocalMetaOptimizer(LocalOptimizer): ...@@ -898,14 +898,7 @@ class LocalMetaOptimizer(LocalOptimizer):
timings.sort() timings.sort()
if self.verbose: if self.verbose:
print "= %s" % timings[0][2] print "= %s" % timings[0][2]
ret = timings[0][1] return timings[0][1]
for i in range(len(ret)):
if ret[i].type != node.outputs[i].type:
# If the type is different, it is probably due to
# the broadcast pattern. Try to fix that.
ret[i] = theano.tensor.patternbroadcast(
ret[i], node.outputs[i].broadcastable)
return ret
return return
def provide_inputs(self, node, inputs): def provide_inputs(self, node, inputs):
......
...@@ -1331,9 +1331,13 @@ if True: ...@@ -1331,9 +1331,13 @@ if True:
border_mode = node.op.border_mode border_mode = node.op.border_mode
subsample = node.op.subsample subsample = node.op.subsample
direction_hint = node.op.direction_hint direction_hint = node.op.direction_hint
return [dnn_conv(img, kern, rval = dnn_conv(img, kern,
border_mode=border_mode, subsample=subsample, border_mode=border_mode, subsample=subsample,
direction_hint=direction_hint)] direction_hint=direction_hint)
if node.outputs[0].broadcastable != rval.broadcastable:
rval = tensor.patternbroadcast(
rval, node.outputs[0].type.broadcastable)
return [rval]
# This optimizer is registered in opt.py as part of the meta-optimizer. # This optimizer is registered in opt.py as part of the meta-optimizer.
# It tries exactly the opposite code path of what local_conv_dnn() uses, # It tries exactly the opposite code path of what local_conv_dnn() uses,
...@@ -1360,9 +1364,13 @@ if True: ...@@ -1360,9 +1364,13 @@ if True:
direction_hint = 'forward' direction_hint = 'forward'
else: else:
direction_hint = 'bprop weights' direction_hint = 'bprop weights'
return [dnn_conv(img, kern, rval = dnn_conv(img, kern,
border_mode=border_mode, subsample=subsample, border_mode=border_mode, subsample=subsample,
direction_hint=direction_hint)] direction_hint=direction_hint)
if node.outputs[0].broadcastable != rval.broadcastable:
rval = tensor.patternbroadcast(
rval, node.outputs[0].type.broadcastable)
return [rval]
@register_opt('cudnn') @register_opt('cudnn')
@local_optimizer([GpuDownsampleFactorMax]) @local_optimizer([GpuDownsampleFactorMax])
......
...@@ -1148,10 +1148,11 @@ def _gpu_conv_to_fftconv(node): ...@@ -1148,10 +1148,11 @@ def _gpu_conv_to_fftconv(node):
(node.op.imshp[0] is not None)): (node.op.imshp[0] is not None)):
kwargs['filter_shape'] = (node.op.nkern, node.op.imshp[0]) + node.op.kshp kwargs['filter_shape'] = (node.op.nkern, node.op.imshp[0]) + node.op.kshp
rval = conv2d_fft(node.inputs[0], node.inputs[1], **kwargs) rval = conv2d_fft(node.inputs[0], node.inputs[1], **kwargs)
if ('image_shape' in kwargs) or ('filter_shape' in kwargs): if node.outputs[0].broadcastable != rval.broadcastable:
# With given shape information, conv2d_fft may return a different # With given shape information, conv2d_fft may return a different
# broadcast pattern than GpuConv. This is forbidden, so we fix it. # broadcast pattern than GpuConv. This is forbidden, so we fix it.
rval = tensor.patternbroadcast(rval, node.outputs[0].type.broadcastable) rval = tensor.patternbroadcast(
rval, node.outputs[0].type.broadcastable)
return rval return rval
...@@ -1292,7 +1293,11 @@ def local_conv_gemm(node): ...@@ -1292,7 +1293,11 @@ def local_conv_gemm(node):
if (border_mode == 'valid'): if (border_mode == 'valid'):
# need to flip the kernel for valid convolution # need to flip the kernel for valid convolution
kern = kern[:, :, ::-1, ::-1] kern = kern[:, :, ::-1, ::-1]
# call GpuCorrMM or GpuCorrMM_gradWeights # By default use GpuCorrMM
rval = GpuCorrMM('valid', subsample, pad)(
gpu_contiguous(img), gpu_contiguous(kern))
# call GpuCorrMM_gradWeights if good
# (the latter is faster if batchsize * kernelHeight * kernelWidth # (the latter is faster if batchsize * kernelHeight * kernelWidth
# is larger than inputChannels * outputHeight * outputWidth. # is larger than inputChannels * outputHeight * outputWidth.
# GpuConv does not always store information on the batchsize and # GpuConv does not always store information on the batchsize and
...@@ -1317,21 +1322,23 @@ def local_conv_gemm(node): ...@@ -1317,21 +1322,23 @@ def local_conv_gemm(node):
# (we need to wrap the result in as_cuda_ndarray_variable, # (we need to wrap the result in as_cuda_ndarray_variable,
# because we are not allowed to replace a CudaNdarray with # because we are not allowed to replace a CudaNdarray with
# a DimShuffle instance in a graph optimization) # a DimShuffle instance in a graph optimization)
return [theano.sandbox.cuda.as_cuda_ndarray_variable( rval = theano.sandbox.cuda.as_cuda_ndarray_variable(
GpuCorrMM_gradWeights('valid', subsample, pad)( GpuCorrMM_gradWeights('valid', subsample, pad)(
gpu_contiguous(img.dimshuffle(1, 0, 2, 3)), gpu_contiguous(img.dimshuffle(1, 0, 2, 3)),
gpu_contiguous(kern.dimshuffle(1, 0, 2, 3)) gpu_contiguous(kern.dimshuffle(1, 0, 2, 3))
).dimshuffle(1, 0, 2, 3))] ).dimshuffle(1, 0, 2, 3))
# use GpuCorrMM if we did not choose GpuCorrMM_gradWeights above
return [GpuCorrMM('valid', subsample, pad)(
gpu_contiguous(img), gpu_contiguous(kern))]
elif (border_mode == 'full'): elif (border_mode == 'full'):
# need to dimshuffle the kernel for full convolution # need to dimshuffle the kernel for full convolution
kern = kern.dimshuffle(1, 0, 2, 3) kern = kern.dimshuffle(1, 0, 2, 3)
# call GpuCorrMM_gradInputs # call GpuCorrMM_gradInputs
return [GpuCorrMM_gradInputs('valid', subsample, pad)( rval = GpuCorrMM_gradInputs('valid', subsample, pad)(
gpu_contiguous(kern), gpu_contiguous(img))] gpu_contiguous(kern), gpu_contiguous(img))
if node.outputs[0].broadcastable != rval.broadcastable:
# With given shape information, conv2d_fft may return a different
# broadcast pattern than GpuConv. This is forbidden, so we fix it.
rval = tensor.patternbroadcast(
rval, node.outputs[0].type.broadcastable)
return [rval]
# First we register the optimizer that moves convolutions to the GPU. # First we register the optimizer that moves convolutions to the GPU.
register_opt()(local_gpu_conv) register_opt()(local_gpu_conv)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论