提交 c907bf60 authored 作者: abergeron's avatar abergeron

Merge pull request #2455 from nouiz/meta_conv

In the meta optimizer, try to fix broadcast pattern change.
...@@ -948,16 +948,17 @@ class ModuleCache(object): ...@@ -948,16 +948,17 @@ class ModuleCache(object):
if (key[0] and not key_broken and if (key[0] and not key_broken and
self.check_for_broken_eq): self.check_for_broken_eq):
self.check_key(key, key_data.key_pkl) self.check_key(key, key_data.key_pkl)
self._update_mappings(key, key_data, module.__file__) self._update_mappings(key, key_data, module.__file__, check_in_keys=not key_broken)
return module return module
else: else:
return None return None
def _update_mappings(self, key, key_data, name): def _update_mappings(self, key, key_data, name, check_in_keys):
all_keys = key_data.keys all_keys = key_data.keys
if not all_keys: if not all_keys:
all_keys = [key] all_keys = [key]
assert key in all_keys if check_in_keys:
assert key in all_keys
for k in all_keys: for k in all_keys:
if k in self.entry_from_key: if k in self.entry_from_key:
assert self.entry_from_key[k] == name assert self.entry_from_key[k] == name
...@@ -988,10 +989,10 @@ class ModuleCache(object): ...@@ -988,10 +989,10 @@ class ModuleCache(object):
key_pkl=key_pkl, key_pkl=key_pkl,
entry=name) entry=name)
key_broken = False
if key[0]: if key[0]:
try: try:
key_data.save_pkl() key_data.save_pkl()
key_broken = False
except cPickle.PicklingError: except cPickle.PicklingError:
key_broken = True key_broken = True
key_data.remove_key(key) key_data.remove_key(key)
...@@ -1006,7 +1007,7 @@ class ModuleCache(object): ...@@ -1006,7 +1007,7 @@ class ModuleCache(object):
" following op(s) implement" " following op(s) implement"
" c_code_cache_version(). This makes them" " c_code_cache_version(). This makes them"
" recompiled for each process." + str(ops)) " recompiled for each process." + str(ops))
self._update_mappings(key, key_data, module.__file__) self._update_mappings(key, key_data, module.__file__, not key_broken)
return key_data return key_data
def module_from_key(self, key, lnk=None, keep_lock=False): def module_from_key(self, key, lnk=None, keep_lock=False):
......
...@@ -595,3 +595,14 @@ if (py_%(name)s == NULL) { %(freefunc)s(%(name)s); } ...@@ -595,3 +595,14 @@ if (py_%(name)s == NULL) { %(freefunc)s(%(name)s); }
def __str__(self): def __str__(self):
return "%s{%s}" % (self.__class__.__name__, self.ctype) return "%s{%s}" % (self.__class__.__name__, self.ctype)
class CDataTypeConstant(graph.Constant):
def signature(self):
# The Op.c_code* methoss can't access the data, so it can't
# change the code depending of it. So there is no need to put
# it in the signature. Also, under Python 2, PyCObject aren't
# pickable. So using the PyCObject in the signature would
# disable the c code cache for op that have it as an input.
return (self.type,)
CDataType.Constant = CDataTypeConstant
...@@ -1331,9 +1331,13 @@ if True: ...@@ -1331,9 +1331,13 @@ if True:
border_mode = node.op.border_mode border_mode = node.op.border_mode
subsample = node.op.subsample subsample = node.op.subsample
direction_hint = node.op.direction_hint direction_hint = node.op.direction_hint
return [dnn_conv(img, kern, rval = dnn_conv(img, kern,
border_mode=border_mode, subsample=subsample, border_mode=border_mode, subsample=subsample,
direction_hint=direction_hint)] direction_hint=direction_hint)
if node.outputs[0].broadcastable != rval.broadcastable:
rval = tensor.patternbroadcast(
rval, node.outputs[0].type.broadcastable)
return [rval]
# This optimizer is registered in opt.py as part of the meta-optimizer. # This optimizer is registered in opt.py as part of the meta-optimizer.
# It tries exactly the opposite code path of what local_conv_dnn() uses, # It tries exactly the opposite code path of what local_conv_dnn() uses,
...@@ -1360,9 +1364,13 @@ if True: ...@@ -1360,9 +1364,13 @@ if True:
direction_hint = 'forward' direction_hint = 'forward'
else: else:
direction_hint = 'bprop weights' direction_hint = 'bprop weights'
return [dnn_conv(img, kern, rval = dnn_conv(img, kern,
border_mode=border_mode, subsample=subsample, border_mode=border_mode, subsample=subsample,
direction_hint=direction_hint)] direction_hint=direction_hint)
if node.outputs[0].broadcastable != rval.broadcastable:
rval = tensor.patternbroadcast(
rval, node.outputs[0].type.broadcastable)
return [rval]
@register_opt('cudnn') @register_opt('cudnn')
@local_optimizer([GpuDownsampleFactorMax]) @local_optimizer([GpuDownsampleFactorMax])
......
...@@ -1148,10 +1148,11 @@ def _gpu_conv_to_fftconv(node): ...@@ -1148,10 +1148,11 @@ def _gpu_conv_to_fftconv(node):
(node.op.imshp[0] is not None)): (node.op.imshp[0] is not None)):
kwargs['filter_shape'] = (node.op.nkern, node.op.imshp[0]) + node.op.kshp kwargs['filter_shape'] = (node.op.nkern, node.op.imshp[0]) + node.op.kshp
rval = conv2d_fft(node.inputs[0], node.inputs[1], **kwargs) rval = conv2d_fft(node.inputs[0], node.inputs[1], **kwargs)
if ('image_shape' in kwargs) or ('filter_shape' in kwargs): if node.outputs[0].broadcastable != rval.broadcastable:
# With given shape information, conv2d_fft may return a different # With given shape information, conv2d_fft may return a different
# broadcast pattern than GpuConv. This is forbidden, so we fix it. # broadcast pattern than GpuConv. This is forbidden, so we fix it.
rval = tensor.patternbroadcast(rval, node.outputs[0].type.broadcastable) rval = tensor.patternbroadcast(
rval, node.outputs[0].type.broadcastable)
return rval return rval
...@@ -1292,7 +1293,11 @@ def local_conv_gemm(node): ...@@ -1292,7 +1293,11 @@ def local_conv_gemm(node):
if (border_mode == 'valid'): if (border_mode == 'valid'):
# need to flip the kernel for valid convolution # need to flip the kernel for valid convolution
kern = kern[:, :, ::-1, ::-1] kern = kern[:, :, ::-1, ::-1]
# call GpuCorrMM or GpuCorrMM_gradWeights # By default use GpuCorrMM
rval = GpuCorrMM('valid', subsample, pad)(
gpu_contiguous(img), gpu_contiguous(kern))
# call GpuCorrMM_gradWeights if good
# (the latter is faster if batchsize * kernelHeight * kernelWidth # (the latter is faster if batchsize * kernelHeight * kernelWidth
# is larger than inputChannels * outputHeight * outputWidth. # is larger than inputChannels * outputHeight * outputWidth.
# GpuConv does not always store information on the batchsize and # GpuConv does not always store information on the batchsize and
...@@ -1317,21 +1322,23 @@ def local_conv_gemm(node): ...@@ -1317,21 +1322,23 @@ def local_conv_gemm(node):
# (we need to wrap the result in as_cuda_ndarray_variable, # (we need to wrap the result in as_cuda_ndarray_variable,
# because we are not allowed to replace a CudaNdarray with # because we are not allowed to replace a CudaNdarray with
# a DimShuffle instance in a graph optimization) # a DimShuffle instance in a graph optimization)
return [theano.sandbox.cuda.as_cuda_ndarray_variable( rval = theano.sandbox.cuda.as_cuda_ndarray_variable(
GpuCorrMM_gradWeights('valid', subsample, pad)( GpuCorrMM_gradWeights('valid', subsample, pad)(
gpu_contiguous(img.dimshuffle(1, 0, 2, 3)), gpu_contiguous(img.dimshuffle(1, 0, 2, 3)),
gpu_contiguous(kern.dimshuffle(1, 0, 2, 3)) gpu_contiguous(kern.dimshuffle(1, 0, 2, 3))
).dimshuffle(1, 0, 2, 3))] ).dimshuffle(1, 0, 2, 3))
# use GpuCorrMM if we did not choose GpuCorrMM_gradWeights above
return [GpuCorrMM('valid', subsample, pad)(
gpu_contiguous(img), gpu_contiguous(kern))]
elif (border_mode == 'full'): elif (border_mode == 'full'):
# need to dimshuffle the kernel for full convolution # need to dimshuffle the kernel for full convolution
kern = kern.dimshuffle(1, 0, 2, 3) kern = kern.dimshuffle(1, 0, 2, 3)
# call GpuCorrMM_gradInputs # call GpuCorrMM_gradInputs
return [GpuCorrMM_gradInputs('valid', subsample, pad)( rval = GpuCorrMM_gradInputs('valid', subsample, pad)(
gpu_contiguous(kern), gpu_contiguous(img))] gpu_contiguous(kern), gpu_contiguous(img))
if node.outputs[0].broadcastable != rval.broadcastable:
# With given shape information, conv2d_fft may return a different
# broadcast pattern than GpuConv. This is forbidden, so we fix it.
rval = tensor.patternbroadcast(
rval, node.outputs[0].type.broadcastable)
return [rval]
# First we register the optimizer that moves convolutions to the GPU. # First we register the optimizer that moves convolutions to the GPU.
register_opt()(local_gpu_conv) register_opt()(local_gpu_conv)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论