提交 c5773332 authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Merge pull request #2212 from nouiz/fft_conv

[BUG] Fix bug introduced yesterday, fix FFT conv optimization and small pydotprint fix.
...@@ -152,7 +152,7 @@ def inplace_optimizer(f): ...@@ -152,7 +152,7 @@ def inplace_optimizer(f):
class SeqOptimizer(Optimizer, list): class SeqOptimizer(Optimizer, list):
#inherit from Optimizer first to get Optimizer.__hash__ # inherit from Optimizer first to get Optimizer.__hash__
"""WRITEME """WRITEME
Takes a list of L{Optimizer} instances and applies them Takes a list of L{Optimizer} instances and applies them
sequentially. sequentially.
...@@ -823,6 +823,68 @@ class LocalOptimizer(object): ...@@ -823,6 +823,68 @@ class LocalOptimizer(object):
(' ' * level), self.__class__.__name__, id(self)) (' ' * level), self.__class__.__name__, id(self))
class LocalSeqOptimizer(LocalOptimizer, list):
"""
This allow to try a group of local optimizer in sequence.
When one do something, we return without trying the following one.
"""
# inherit from Optimizer first to get Optimizer.__hash__
def __init__(self, *opts, **kw):
"""WRITEME"""
if len(opts) == 1 and isinstance(opts[0], (list, tuple)):
opts = opts[0]
self[:] = opts
self.failure_callback = kw.pop('failure_callback', None)
def tracks(self):
t = []
for l in self:
tt = l.tracks()
if tt:
t.extend(tt)
return t
def transform(self, node):
"""Transform a subgraph whose output is `node`.
Subclasses should implement this function so that it returns one of two
kinds of things:
- False to indicate that no optimization can be applied to this `node`;
or
- <list of variables> to use in place of `node`'s outputs in the
greater graph.
- dict(old variables -> new variables). A dictionary that map
from old variables to new variables to replace.
:type node: an Apply instance
"""
for l in self:
ret = l.transform(node)
if ret:
return ret
def add_requirements(self, fgraph):
"""
If this local optimization wants to add some requirements to the
fgraph,
This is the place to do it.
"""
for l in self:
l.add_requirements(fgraph)
def print_summary(self, stream=sys.stdout, level=0, depth=-1):
name = getattr(self, 'name', None)
print >> stream, "%s%s %s id=%i" % (
(' ' * level), self.__class__.__name__, name, id(self))
# This way, -1 will do all depth
if depth != 0:
depth -= 1
for opt in self:
opt.print_summary(stream, level=(level + 2), depth=depth)
class FromFunctionLocalOptimizer(LocalOptimizer): class FromFunctionLocalOptimizer(LocalOptimizer):
"""WRITEME""" """WRITEME"""
def __init__(self, fn, tracks=None, requirements=()): def __init__(self, fn, tracks=None, requirements=()):
......
...@@ -223,6 +223,7 @@ class SequenceDB(DB): ...@@ -223,6 +223,7 @@ class SequenceDB(DB):
other tags) fast_run and fast_compile optimizers are drawn is a SequenceDB. other tags) fast_run and fast_compile optimizers are drawn is a SequenceDB.
""" """
seq_opt = opt.SeqOptimizer
def __init__(self, failure_callback=opt.SeqOptimizer.warn): def __init__(self, failure_callback=opt.SeqOptimizer.warn):
super(SequenceDB, self).__init__() super(SequenceDB, self).__init__()
...@@ -256,13 +257,13 @@ class SequenceDB(DB): ...@@ -256,13 +257,13 @@ class SequenceDB(DB):
# the order we want. # the order we want.
opts.sort(key=lambda obj: obj.name) opts.sort(key=lambda obj: obj.name)
opts.sort(key=lambda obj: self.__position__[obj.name]) opts.sort(key=lambda obj: self.__position__[obj.name])
ret = opt.SeqOptimizer(opts, failure_callback=self.failure_callback) ret = self.seq_opt(opts, failure_callback=self.failure_callback)
if hasattr(tags[0], 'name'): if hasattr(tags[0], 'name'):
ret.name = tags[0].name ret.name = tags[0].name
return ret return ret
def print_summary(self, stream=sys.stdout): def print_summary(self, stream=sys.stdout):
print >> stream, "SequenceDB (id %i)" % id(self) print >> stream, self.__class__.__name__ + " (id %i)" % id(self)
positions = self.__position__.items() positions = self.__position__.items()
def c(a, b): def c(a, b):
...@@ -279,6 +280,13 @@ class SequenceDB(DB): ...@@ -279,6 +280,13 @@ class SequenceDB(DB):
return sio.getvalue() return sio.getvalue()
class LocalSequenceDB(SequenceDB):
"""
This generate a local optimizer instead of a global optimizer.
"""
seq_opt = opt.LocalSeqOptimizer
class ProxyDB(DB): class ProxyDB(DB):
""" """
Wrap an existing proxy. Wrap an existing proxy.
......
...@@ -992,7 +992,7 @@ def pydotprint_variables(vars, ...@@ -992,7 +992,7 @@ def pydotprint_variables(vars,
if nd.owner: if nd.owner:
plot_apply(nd.owner, depth) plot_apply(nd.owner, depth)
try: try:
g.write_png(outfile, prog='dot') g.write(outfile, prog='dot', format=format)
except pd.InvocationException, e: except pd.InvocationException, e:
# Some version of pydot are bugged/don't work correctly with # Some version of pydot are bugged/don't work correctly with
# empty label. Provide a better user error message. # empty label. Provide a better user error message.
...@@ -1006,6 +1006,7 @@ def pydotprint_variables(vars, ...@@ -1006,6 +1006,7 @@ def pydotprint_variables(vars,
" Theano. Using another version of pydot could" " Theano. Using another version of pydot could"
" fix this problem. The pydot error is: " + " fix this problem. The pydot error is: " +
e.message) e.message)
raise
print 'The output file is available at', outfile print 'The output file is available at', outfile
......
差异被折叠。
...@@ -83,7 +83,7 @@ class TestConv2dFFT(unittest.TestCase): ...@@ -83,7 +83,7 @@ class TestConv2dFFT(unittest.TestCase):
# make sure we inserted the fft trickery # make sure we inserted the fft trickery
topo = f_fft.maker.fgraph.toposort() topo = f_fft.maker.fgraph.toposort()
assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp) assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp)
for n in topo) == 2 for n in topo) == 2, topo
res_ref = f_ref() res_ref = f_ref()
...@@ -112,7 +112,7 @@ class TestConv2dFFT(unittest.TestCase): ...@@ -112,7 +112,7 @@ class TestConv2dFFT(unittest.TestCase):
# make sure we inserted the fft trickery # make sure we inserted the fft trickery
topo = f_fft.maker.fgraph.toposort() topo = f_fft.maker.fgraph.toposort()
assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp) assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp)
for n in topo) == 2 for n in topo) == 2, topo
res_ref = f_ref() res_ref = f_ref()
res_fft = f_fft() res_fft = f_fft()
......
...@@ -306,6 +306,7 @@ def test_consistency_GPU_parallel(): ...@@ -306,6 +306,7 @@ def test_consistency_GPU_parallel():
samples = numpy.array(samples).flatten() samples = numpy.array(samples).flatten()
assert(numpy.allclose(samples, java_samples)) assert(numpy.allclose(samples, java_samples))
def test_GPU_nstreams_limit(): def test_GPU_nstreams_limit():
"""Verify that a ValueError is raised when n_streams """Verify that a ValueError is raised when n_streams
is greater than 2**20 on GPU. This is the value of is greater than 2**20 on GPU. This is the value of
...@@ -318,11 +319,18 @@ def test_GPU_nstreams_limit(): ...@@ -318,11 +319,18 @@ def test_GPU_nstreams_limit():
R = MRG_RandomStreams(seed=seed, use_cuda=True) R = MRG_RandomStreams(seed=seed, use_cuda=True)
def eval_uniform(size, nstreams): def eval_uniform(size, nstreams):
return R.uniform(size=size, nstreams=nstreams, dtype='float32').eval() if theano.config.mode == "FAST_COMPILE":
mode = "FAST_RUN"
else:
mode = None
out = R.uniform(size=size, nstreams=nstreams, dtype='float32')
f = theano.function([], out, mode=mode)
return f()
eval_uniform((10,), 2**20) eval_uniform((10,), 2**20)
assert_raises(ValueError, eval_uniform, (10,), 2**20 + 1) assert_raises(ValueError, eval_uniform, (10,), 2**20 + 1)
def test_consistency_GPUA_serial(): def test_consistency_GPUA_serial():
'''Verify that the random numbers generated by GPUA_mrg_uniform, serially, '''Verify that the random numbers generated by GPUA_mrg_uniform, serially,
are the same as the reference (Java) implementation by L'Ecuyer et al. are the same as the reference (Java) implementation by L'Ecuyer et al.
......
...@@ -2016,26 +2016,26 @@ def zeros_like(model, dtype=None): ...@@ -2016,26 +2016,26 @@ def zeros_like(model, dtype=None):
return fill(model, constant(0.0, dtype=dtype)) return fill(model, constant(0.0, dtype=dtype))
def zeros(shp, dtype=None): def zeros(shape, dtype=None):
""" """
Create a Tensor filled with zeros, closer to Numpy's syntax than ``alloc``. Create a Tensor filled with zeros, closer to Numpy's syntax than ``alloc``.
""" """
if not isinstance(shp, (list, tuple, TensorVariable)): if not isinstance(shape, (list, tuple, TensorVariable)):
shp = [shp] shape = [shape]
if dtype is None: if dtype is None:
dtype = config.floatX dtype = config.floatX
return alloc(numpy.array(0, dtype=dtype), *shp) return alloc(numpy.array(0, dtype=dtype), *shape)
def ones(shp, dtype=None): def ones(shape, dtype=None):
""" """
Create a Tensor filled with ones, closer to Numpy's syntax than ``alloc``. Create a Tensor filled with ones, closer to Numpy's syntax than ``alloc``.
""" """
if not isinstance(shp, (list, tuple, TensorVariable)): if not isinstance(shape, (list, tuple, TensorVariable)):
shp = [shp] shape = [shape]
if dtype is None: if dtype is None:
dtype = config.floatX dtype = config.floatX
return alloc(numpy.array(1, dtype=dtype), *shp) return alloc(numpy.array(1, dtype=dtype), *shape)
class Nonzero(gof.Op): class Nonzero(gof.Op):
......
...@@ -182,10 +182,20 @@ class DimShuffle(Op): ...@@ -182,10 +182,20 @@ class DimShuffle(Op):
input = as_tensor_variable(_input) input = as_tensor_variable(_input)
ib = tuple(input.type.broadcastable) ib = tuple(input.type.broadcastable)
if not ib == self.input_broadcastable: if not ib == self.input_broadcastable:
if len(ib) != len(self.input_broadcastable):
raise TypeError(( raise TypeError((
"The number of dimensions and/or broadcastable pattern of the " "The number of dimensions of the "
"input is incorrect for this op. Expected %s, got %s." "input is incorrect for this op. Expected %s, got %s."
% (self.input_broadcastable, ib))) % (self.input_broadcastable, ib)))
for expected, b in zip(self.input_broadcastable, ib):
if expected is True and b is False:
raise TypeError((
"The broadcastable pattern of the "
"input is incorrect for this op. Expected %s, got %s."
% (self.input_broadcastable, ib)))
#else, expected == b or expected is False and b is True
# Both case are good.
ob = [] ob = []
for value in self.new_order: for value in self.new_order:
if value == 'x': if value == 'x':
......
...@@ -1639,7 +1639,18 @@ def local_alloc_elemwise(node): ...@@ -1639,7 +1639,18 @@ def local_alloc_elemwise(node):
*[T.eq(i.shape[idx], cmp_op.shape[idx]) *[T.eq(i.shape[idx], cmp_op.shape[idx])
for idx in xrange(i.type.ndim) for idx in xrange(i.type.ndim)
if not i.type.broadcastable[idx]]) if not i.type.broadcastable[idx]])
new_i.append(i.owner.inputs[0].owner.inputs[0]) alloc_input = i.owner.inputs[0].owner.inputs[0]
if alloc_input.ndim != i.owner.inputs[0].ndim:
# The alloc can add dimension to the value
# We add a dimshuffle to add them.
# We let later optimization merge the multiple dimshuffle
nb_dim_to_add = i.owner.inputs[0].ndim - alloc_input.ndim
alloc_input = alloc_input.dimshuffle(['x'] * nb_dim_to_add +
range(alloc_input.ndim))
# We need to keep the dimshuffle. It could swap axes or
# add dimensions anywhere.
new_i.append(i.owner.op(alloc_input))
else: else:
new_i.append(i) new_i.append(i)
new_i[assert_op_idx] = assert_op new_i[assert_op_idx] = assert_op
......
...@@ -2687,6 +2687,21 @@ class Test_local_alloc_elemwise(unittest.TestCase): ...@@ -2687,6 +2687,21 @@ class Test_local_alloc_elemwise(unittest.TestCase):
self._verify_alloc_count(func, 1) self._verify_alloc_count(func, 1)
self._verify_assert_count(func, 1) self._verify_assert_count(func, 1)
def test_error(self):
t3fft = theano.tensor.tensor(dtype=self.dtype,
broadcastable=(False, False, True))
row = theano.tensor.row(dtype=self.dtype)
o = T.alloc(row, 5, 5).dimshuffle(0, 1, 'x') + t3fft
func = function(
[t3fft, row],
o,
mode='FAST_RUN'
)
self._verify_alloc_count(func, 0)
self._verify_assert_count(func, 1)
d = numpy.random.rand(5, 5, 1).astype(self.dtype)
r = numpy.random.rand(1, 5).astype(self.dtype)
func(d, r)
def test_local_subtensor_of_alloc(): def test_local_subtensor_of_alloc():
......
...@@ -11,6 +11,7 @@ import theano.tensor as tensor ...@@ -11,6 +11,7 @@ import theano.tensor as tensor
from theano.printing import min_informative_str, debugprint from theano.printing import min_informative_str, debugprint
from theano.compat.six import StringIO from theano.compat.six import StringIO
def test_pydotprint_cond_highlight(): def test_pydotprint_cond_highlight():
""" """
This is a REALLY PARTIAL TEST. This is a REALLY PARTIAL TEST.
...@@ -44,6 +45,37 @@ def test_pydotprint_cond_highlight(): ...@@ -44,6 +45,37 @@ def test_pydotprint_cond_highlight():
' is no IfElse node in the graph\n') ' is no IfElse node in the graph\n')
def test_pydotprint_variables():
"""
This is a REALLY PARTIAL TEST.
I did them to help debug stuff.
It make sure the code run.
"""
# Skip test if pydot is not available.
if not theano.printing.pydot_imported:
raise SkipTest('pydot not available')
x = tensor.dvector()
s = StringIO()
new_handler = logging.StreamHandler(s)
new_handler.setLevel(logging.DEBUG)
orig_handler = theano.logging_default_handler
theano.theano_logger.removeHandler(orig_handler)
theano.theano_logger.addHandler(new_handler)
theano.theano_logger.removeHandler(orig_handler)
theano.theano_logger.addHandler(new_handler)
try:
theano.printing.pydotprint_variables(x * 2)
finally:
theano.theano_logger.addHandler(orig_handler)
theano.theano_logger.removeHandler(new_handler)
def test_pydotprint_long_name(): def test_pydotprint_long_name():
"""This is a REALLY PARTIAL TEST. """This is a REALLY PARTIAL TEST.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论