提交 ee36aaae authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Merge pull request #2221 from nouiz/cleanup

Cleanup
...@@ -823,68 +823,6 @@ class LocalOptimizer(object): ...@@ -823,68 +823,6 @@ class LocalOptimizer(object):
(' ' * level), self.__class__.__name__, id(self)) (' ' * level), self.__class__.__name__, id(self))
class LocalSeqOptimizer(LocalOptimizer, list):
"""
This allow to try a group of local optimizer in sequence.
When one do something, we return without trying the following one.
"""
# inherit from Optimizer first to get Optimizer.__hash__
def __init__(self, *opts, **kw):
"""WRITEME"""
if len(opts) == 1 and isinstance(opts[0], (list, tuple)):
opts = opts[0]
self[:] = opts
self.failure_callback = kw.pop('failure_callback', None)
def tracks(self):
t = []
for l in self:
tt = l.tracks()
if tt:
t.extend(tt)
return t
def transform(self, node):
"""Transform a subgraph whose output is `node`.
Subclasses should implement this function so that it returns one of two
kinds of things:
- False to indicate that no optimization can be applied to this `node`;
or
- <list of variables> to use in place of `node`'s outputs in the
greater graph.
- dict(old variables -> new variables). A dictionary that map
from old variables to new variables to replace.
:type node: an Apply instance
"""
for l in self:
ret = l.transform(node)
if ret:
return ret
def add_requirements(self, fgraph):
"""
If this local optimization wants to add some requirements to the
fgraph,
This is the place to do it.
"""
for l in self:
l.add_requirements(fgraph)
def print_summary(self, stream=sys.stdout, level=0, depth=-1):
name = getattr(self, 'name', None)
print >> stream, "%s%s %s id=%i" % (
(' ' * level), self.__class__.__name__, name, id(self))
# This way, -1 will do all depth
if depth != 0:
depth -= 1
for opt in self:
opt.print_summary(stream, level=(level + 2), depth=depth)
class FromFunctionLocalOptimizer(LocalOptimizer): class FromFunctionLocalOptimizer(LocalOptimizer):
"""WRITEME""" """WRITEME"""
def __init__(self, fn, tracks=None, requirements=()): def __init__(self, fn, tracks=None, requirements=()):
...@@ -934,6 +872,9 @@ class LocalOptGroup(LocalOptimizer): ...@@ -934,6 +872,9 @@ class LocalOptGroup(LocalOptimizer):
"""WRITEME""" """WRITEME"""
def __init__(self, *optimizers): def __init__(self, *optimizers):
if len(optimizers) == 1 and isinstance(optimizers[0], list):
# This happen when created by LocalGroupDB.
optimizers = tuple(optimizers[0])
self.opts = optimizers self.opts = optimizers
self.reentrant = any(getattr(opt, 'reentrant', True) self.reentrant = any(getattr(opt, 'reentrant', True)
for opt in optimizers) for opt in optimizers)
...@@ -942,8 +883,16 @@ class LocalOptGroup(LocalOptimizer): ...@@ -942,8 +883,16 @@ class LocalOptGroup(LocalOptimizer):
def __str__(self): def __str__(self):
return getattr(self, '__name__', return getattr(self, '__name__',
('<theano.gof.opt.LocalOptGroup instance>' + ('LocalOptGroup(%s)' %
str([str(o) for o in self.opts]))) ','.join([str(o) for o in self.opts])))
def tracks(self):
t = []
for l in self.opts:
tt = l.tracks()
if tt:
t.extend(tt)
return t
def transform(self, node): def transform(self, node):
for opt in self.opts: for opt in self.opts:
......
...@@ -257,7 +257,10 @@ class SequenceDB(DB): ...@@ -257,7 +257,10 @@ class SequenceDB(DB):
# the order we want. # the order we want.
opts.sort(key=lambda obj: obj.name) opts.sort(key=lambda obj: obj.name)
opts.sort(key=lambda obj: self.__position__[obj.name]) opts.sort(key=lambda obj: self.__position__[obj.name])
ret = self.seq_opt(opts, failure_callback=self.failure_callback) kwargs = {}
if self.failure_callback:
kwargs["failure_callback"] = self.failure_callback
ret = self.seq_opt(opts, **kwargs)
if hasattr(tags[0], 'name'): if hasattr(tags[0], 'name'):
ret.name = tags[0].name ret.name = tags[0].name
return ret return ret
...@@ -280,11 +283,17 @@ class SequenceDB(DB): ...@@ -280,11 +283,17 @@ class SequenceDB(DB):
return sio.getvalue() return sio.getvalue()
class LocalSequenceDB(SequenceDB): class LocalGroupDB(SequenceDB):
""" """This generate a local optimizer of type LocalOptGroup instead
This generate a local optimizer instead of a global optimizer. of a global optimizer.
It support the tracks, to only get applied to some Op.
""" """
seq_opt = opt.LocalSeqOptimizer seq_opt = opt.LocalOptGroup
def __init__(self, failure_callback=opt.SeqOptimizer.warn):
super(LocalGroupDB, self).__init__()
self.failure_callback = None
class ProxyDB(DB): class ProxyDB(DB):
......
...@@ -1108,9 +1108,10 @@ def local_gpu_softmax_with_bias(node): ...@@ -1108,9 +1108,10 @@ def local_gpu_softmax_with_bias(node):
# Convolution, maxpooling # Convolution, maxpooling
from theano.tensor.nnet import conv from theano.tensor.nnet import conv
# We need a fixed order for the user interface. # We need a fixed order for the user interface.
conv_seqopt = theano.gof.optdb.LocalSequenceDB() conv_groupopt = theano.gof.optdb.LocalGroupDB()
conv_seqopt.__name__ = "nnn" conv_groupopt.__name__ = "gpu_conv_opts"
register_opt('fast_compile', 'fast_run', 'gpu')(conv_seqopt) register_opt('fast_compile', 'fast_run', 'gpu')(conv_groupopt)
def _gpu_conv_to_fftconv(node): def _gpu_conv_to_fftconv(node):
# shared helper function for local_conv_fft_valid and local_conv_fft_full. # shared helper function for local_conv_fft_valid and local_conv_fft_full.
...@@ -1142,7 +1143,7 @@ def _gpu_conv_to_fftconv(node): ...@@ -1142,7 +1143,7 @@ def _gpu_conv_to_fftconv(node):
return rval return rval
@local_optimizer([gpu_from_host, conv.ConvOp, GpuConv]) @local_optimizer([GpuConv])
def local_conv_fft_valid(node): def local_conv_fft_valid(node):
if isinstance(node.op, GpuConv): if isinstance(node.op, GpuConv):
if (node.op.border_mode == 'valid' and if (node.op.border_mode == 'valid' and
...@@ -1151,25 +1152,8 @@ def local_conv_fft_valid(node): ...@@ -1151,25 +1152,8 @@ def local_conv_fft_valid(node):
return [_gpu_conv_to_fftconv(node)] return [_gpu_conv_to_fftconv(node)]
return False return False
repl = local_gpu_conv_legacy.transform(node)
if repl:
if isinstance(node.op, GpuFromHost):
gpu_conv = repl[0].owner
else:
gpu_conv = repl[0].owner.inputs[0].owner
assert isinstance(gpu_conv.op, GpuConv)
if (gpu_conv.op.border_mode == 'valid' and
gpu_conv.op.subsample == (1, 1) and
gpu_conv.op.fft_opt):
ret = _gpu_conv_to_fftconv(gpu_conv)
if ret:
if isinstance(node.op, GpuFromHost):
return [ret]
else:
return [host_from_gpu(ret)]
@local_optimizer([GpuConv])
@local_optimizer([gpu_from_host, conv.ConvOp, GpuConv])
def local_conv_fft_full(node): def local_conv_fft_full(node):
if isinstance(node.op, GpuConv): if isinstance(node.op, GpuConv):
if (node.op.border_mode == 'full' and if (node.op.border_mode == 'full' and
...@@ -1178,47 +1162,21 @@ def local_conv_fft_full(node): ...@@ -1178,47 +1162,21 @@ def local_conv_fft_full(node):
return [_gpu_conv_to_fftconv(node)] return [_gpu_conv_to_fftconv(node)]
return return
repl = local_gpu_conv_legacy.transform(node)
if repl:
if isinstance(node.op, GpuFromHost):
gpu_conv = repl[0].owner
else:
gpu_conv = repl[0].owner.inputs[0].owner
assert isinstance(gpu_conv.op, GpuConv)
if (gpu_conv.op.border_mode == 'full' and
gpu_conv.op.subsample == (1, 1) and
gpu_conv.op.fft_opt):
ret = _gpu_conv_to_fftconv(gpu_conv)
if ret:
if isinstance(node.op, GpuFromHost):
return [ret]
else:
return [host_from_gpu(ret)]
# Needs to be registered before local_gpu_conv_legacy. Otherwise, it # Needs to be registered before local_gpu_conv_legacy. Otherwise, it
# will have priority over this optimization. We want, if cudnn is # will have priority over this optimization. We want, if cudnn is
# available and the GPU supports it, to use it. Otherwise, the gemm # available and the GPU supports it, to use it. Otherwise, the gemm
# version should be used. If the users want the legacy convolution, # version should be used. If the users want the legacy convolution,
# they should use the Theano flag to disable the dnn and/or gemm version. # they should use the Theano flag to disable the dnn and/or gemm version.
@local_optimizer([gpu_from_host, conv.ConvOp]) @local_optimizer([GpuConv])
def local_gpu_conv(node): def local_gpu_conv(node):
""" """
If cudnn is available, use it. Otherwise, use the gemm version. If cudnn is available, use it. Otherwise, use the gemm version.
""" """
if theano.sandbox.cuda.dnn.dnn_available(): if (isinstance(node.op, GpuConv) and
repl = local_gpu_conv_legacy.transform(node) theano.sandbox.cuda.dnn.dnn_available()):
if repl: return theano.sandbox.cuda.dnn.local_conv_dnn.transform(node)
if isinstance(node.op, GpuFromHost):
gpu_conv = repl[0].owner
else:
gpu_conv = repl[0].owner.inputs[0].owner
assert isinstance(gpu_conv.op, GpuConv)
ret = theano.sandbox.cuda.dnn.local_conv_dnn.transform(gpu_conv)
if ret:
if isinstance(node.op, GpuFromHost):
return ret
else:
return [host_from_gpu(ret[0])]
# If dnn isn't avail, the local_gpu_conv_legacy wil introduce the # If dnn isn't avail, the local_gpu_conv_legacy wil introduce the
# legacy opt. Then the local_conv_gemm will convert it to gemm # legacy opt. Then the local_conv_gemm will convert it to gemm
# opt. # opt.
...@@ -1381,20 +1339,20 @@ def local_conv_gemm(node): ...@@ -1381,20 +1339,20 @@ def local_conv_gemm(node):
gpu_contiguous(kern), gpu_contiguous(img))] gpu_contiguous(kern), gpu_contiguous(img))]
# fft optimization not enabled by default. Need to be registered # Legacy opt first, as this is the only that move to the GPU.
# before the default convolution optimization. If the user ask fft, as # Then fft, as disabled dy default. So if use enable it, it have prio
# this isn't the default, it should have higher prio then the default. # Then default, use dnn if avail
conv_seqopt.register("conv_fft_valid", local_conv_fft_valid, 1) # Then default, use gemm if dnn or fft didn't worked.
conv_seqopt.register("conv_fft_full", local_conv_fft_full, 1) # Normally, gemm should catch all case, so the legacy should never run.
# default gpu conv optimization conv_groupopt.register('local_gpu_conv_legacy', local_gpu_conv_legacy, 0,
conv_seqopt.register('local_gpu_conv', local_gpu_conv, 10, 'fast_compile', 'fast_run')
'fast_compile', 'fast_run', "dnn") conv_groupopt.register("conv_fft_valid", local_conv_fft_valid, 1)
# Legacy convolution, after default conv_groupopt.register("conv_fft_full", local_conv_fft_full, 1)
conv_seqopt.register('local_gpu_conv_legacy', local_gpu_conv_legacy, 11, # Use dnn if avail, so have the dnn tag to be able to disable it.
'fast_compile', 'fast_run', "dnn") conv_groupopt.register('local_gpu_conv', local_gpu_conv, 10,
# conv gemm after legacy, as it convert legacy to gemm version 'fast_compile', 'fast_run', 'dnn')
conv_seqopt.register('local_conv_gemm', local_conv_gemm, 12, conv_groupopt.register('local_conv_gemm', local_conv_gemm, 12,
'fast_compile', 'fast_run', "dnn") 'fast_compile', 'fast_run')
@local_optimizer([Conv3D]) @local_optimizer([Conv3D])
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论