Merge pull request #2212 from nouiz/fft_conv

[BUG] Fix bug introduced yesterday, fix FFT conv optimization and small pydotprint fix.

Merge pull request #2212 from nouiz/fft_conv
c5773332 · Pascal Lamblin · b6407cec · 6cf6121b · c5773332 · c5773332
--- a/theano/gof/opt.py
+++ b/theano/gof/opt.py
@@ -152,7 +152,7 @@ def inplace_optimizer(f):
 class SeqOptimizer(Optimizer, list):
-    #inherit from Optimizer first to get Optimizer.__hash__
+    # inherit from Optimizer first to get Optimizer.__hash__
    """WRITEME
    Takes a list of L{Optimizer} instances and applies them
    sequentially.
@@ -823,6 +823,68 @@ class LocalOptimizer(object):
                (' ' * level), self.__class__.__name__, id(self))
+class LocalSeqOptimizer(LocalOptimizer, list):
+    """
+    This allow to try a group of local optimizer in sequence.
+    When one do something, we return without trying the following one.
+    """
+    # inherit from Optimizer first to get Optimizer.__hash__
+    def __init__(self, *opts, **kw):
+        """WRITEME"""
+        if len(opts) == 1 and isinstance(opts[0], (list, tuple)):
+            opts = opts[0]
+        self[:] = opts
+        self.failure_callback = kw.pop('failure_callback', None)
+    def tracks(self):
+        t = []
+        for l in self:
+            tt = l.tracks()
+            if tt:
+                t.extend(tt)
+        return t
+    def transform(self, node):
+        """Transform a subgraph whose output is `node`.
+        Subclasses should implement this function so that it returns one of two
+        kinds of things:
+        - False to indicate that no optimization can be applied to this `node`;
+          or
+        - <list of variables> to use in place of `node`'s outputs in the
+          greater graph.
+        - dict(old variables -> new variables). A dictionary that map
+          from old variables to new variables to replace.
+        :type node: an Apply instance
+        """
+        for l in self:
+            ret = l.transform(node)
+            if ret:
+                return ret
+    def add_requirements(self, fgraph):
+        """
+        If this local optimization wants to add some requirements to the
+        fgraph,
+        This is the place to do it.
+        """
+        for l in self:
+            l.add_requirements(fgraph)
+    def print_summary(self, stream=sys.stdout, level=0, depth=-1):
+        name = getattr(self, 'name', None)
+        print >> stream, "%s%s %s id=%i" % (
+            (' ' * level), self.__class__.__name__, name, id(self))
+        # This way, -1 will do all depth
+        if depth != 0:
+            depth -= 1
+            for opt in self:
+                opt.print_summary(stream, level=(level + 2), depth=depth)
 class FromFunctionLocalOptimizer(LocalOptimizer):
    """WRITEME"""
    def __init__(self, fn, tracks=None, requirements=()):

--- a/theano/gof/optdb.py
+++ b/theano/gof/optdb.py
@@ -223,6 +223,7 @@ class SequenceDB(DB):
    other tags) fast_run and fast_compile optimizers are drawn is a SequenceDB.
    """
+    seq_opt = opt.SeqOptimizer
    def __init__(self, failure_callback=opt.SeqOptimizer.warn):
        super(SequenceDB, self).__init__()
@@ -256,13 +257,13 @@ class SequenceDB(DB):
        # the order we want.
        opts.sort(key=lambda obj: obj.name)
        opts.sort(key=lambda obj: self.__position__[obj.name])
-        ret = opt.SeqOptimizer(opts, failure_callback=self.failure_callback)
+        ret = self.seq_opt(opts, failure_callback=self.failure_callback)
        if hasattr(tags[0], 'name'):
            ret.name = tags[0].name
        return ret
    def print_summary(self, stream=sys.stdout):
-        print >> stream, "SequenceDB (id %i)" % id(self)
+        print >> stream, self.__class__.__name__ + " (id %i)" % id(self)
        positions = self.__position__.items()
        def c(a, b):
@@ -279,6 +280,13 @@ class SequenceDB(DB):
        return sio.getvalue()
+class LocalSequenceDB(SequenceDB):
+    """
+    This generate a local optimizer instead of a global optimizer.
+    """
+    seq_opt = opt.LocalSeqOptimizer
 class ProxyDB(DB):
    """
    Wrap an existing proxy.

--- a/theano/printing.py
+++ b/theano/printing.py
@@ -992,7 +992,7 @@ def pydotprint_variables(vars,
        if nd.owner:
            plot_apply(nd.owner, depth)
    try:
-        g.write_png(outfile, prog='dot')
+        g.write(outfile, prog='dot', format=format)
    except pd.InvocationException, e:
        # Some version of pydot are bugged/don't work correctly with
        # empty label. Provide a better user error message.
@@ -1006,6 +1006,7 @@ def pydotprint_variables(vars,
                            " Theano. Using another version of pydot could"
                            " fix this problem. The pydot error is: " +
                            e.message)
+        raise
    print 'The output file is available at', outfile

--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
--- a/theano/sandbox/cuda/tests/test_fftconv.py
+++ b/theano/sandbox/cuda/tests/test_fftconv.py
@@ -83,7 +83,7 @@ class TestConv2dFFT(unittest.TestCase):
        # make sure we inserted the fft trickery
        topo = f_fft.maker.fgraph.toposort()
        assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp)
-                   for n in topo) == 2
+                   for n in topo) == 2, topo
        res_ref = f_ref()
@@ -112,7 +112,7 @@ class TestConv2dFFT(unittest.TestCase):
        # make sure we inserted the fft trickery
        topo = f_fft.maker.fgraph.toposort()
        assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp)
-                   for n in topo) == 2
+                   for n in topo) == 2, topo
        res_ref = f_ref()
        res_fft = f_fft()

--- a/theano/sandbox/test_rng_mrg.py
+++ b/theano/sandbox/test_rng_mrg.py
@@ -306,6 +306,7 @@ def test_consistency_GPU_parallel():
    samples = numpy.array(samples).flatten()
    assert(numpy.allclose(samples, java_samples))
 def test_GPU_nstreams_limit():
    """Verify that a ValueError is raised when n_streams
    is greater than 2**20 on GPU. This is the value of
@@ -318,11 +319,18 @@ def test_GPU_nstreams_limit():
    R = MRG_RandomStreams(seed=seed, use_cuda=True)
    def eval_uniform(size, nstreams):
-        return R.uniform(size=size, nstreams=nstreams, dtype='float32').eval()
+        if theano.config.mode == "FAST_COMPILE":
+            mode = "FAST_RUN"
+        else:
+            mode = None
+        out = R.uniform(size=size, nstreams=nstreams, dtype='float32')
+        f = theano.function([], out, mode=mode)
+        return f()
    eval_uniform((10,), 2**20)
    assert_raises(ValueError, eval_uniform, (10,), 2**20 + 1)
 def test_consistency_GPUA_serial():
    '''Verify that the random numbers generated by GPUA_mrg_uniform, serially,
    are the same as the reference (Java) implementation by L'Ecuyer et al.

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -2016,26 +2016,26 @@ def zeros_like(model, dtype=None):
    return fill(model, constant(0.0, dtype=dtype))
-def zeros(shp, dtype=None):
+def zeros(shape, dtype=None):
    """
    Create a Tensor filled with zeros, closer to Numpy's syntax than ``alloc``.
    """
-    if not isinstance(shp, (list, tuple, TensorVariable)):
+    if not isinstance(shape, (list, tuple, TensorVariable)):
-        shp = [shp]
+        shape = [shape]
    if dtype is None:
        dtype = config.floatX
-    return alloc(numpy.array(0, dtype=dtype), *shp)
+    return alloc(numpy.array(0, dtype=dtype), *shape)
-def ones(shp, dtype=None):
+def ones(shape, dtype=None):
    """
    Create a Tensor filled with ones, closer to Numpy's syntax than ``alloc``.
    """
-    if not isinstance(shp, (list, tuple, TensorVariable)):
+    if not isinstance(shape, (list, tuple, TensorVariable)):
-        shp = [shp]
+        shape = [shape]
    if dtype is None:
        dtype = config.floatX
-    return alloc(numpy.array(1, dtype=dtype), *shp)
+    return alloc(numpy.array(1, dtype=dtype), *shape)
 class Nonzero(gof.Op):

--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
@@ -182,10 +182,20 @@ class DimShuffle(Op):
        input = as_tensor_variable(_input)
        ib = tuple(input.type.broadcastable)
        if not ib == self.input_broadcastable:
+            if len(ib) != len(self.input_broadcastable):
                raise TypeError((
-                "The number of dimensions and/or broadcastable pattern of the "
+                    "The number of dimensions of the "
                    "input is incorrect for this op. Expected %s, got %s."
                    % (self.input_broadcastable, ib)))
+            for expected, b in zip(self.input_broadcastable, ib):
+                if expected is True and b is False:
+                    raise TypeError((
+                        "The broadcastable pattern of the "
+                        "input is incorrect for this op. Expected %s, got %s."
+                        % (self.input_broadcastable, ib)))
+                #else, expected == b or expected is False and b is True
+                # Both case are good.
        ob = []
        for value in self.new_order:
            if value == 'x':

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -1639,7 +1639,18 @@ def local_alloc_elemwise(node):
                                    *[T.eq(i.shape[idx], cmp_op.shape[idx])
                                      for idx in xrange(i.type.ndim)
                                      if not i.type.broadcastable[idx]])
-            new_i.append(i.owner.inputs[0].owner.inputs[0])
+            alloc_input = i.owner.inputs[0].owner.inputs[0]
+            if alloc_input.ndim != i.owner.inputs[0].ndim:
+                # The alloc can add dimension to the value
+                # We add a dimshuffle to add them.
+                # We let later optimization merge the multiple dimshuffle
+                nb_dim_to_add = i.owner.inputs[0].ndim - alloc_input.ndim
+                alloc_input = alloc_input.dimshuffle(['x'] * nb_dim_to_add +
+                                                     range(alloc_input.ndim))
+            # We need to keep the dimshuffle. It could swap axes or
+            # add dimensions anywhere.
+            new_i.append(i.owner.op(alloc_input))
        else:
            new_i.append(i)
    new_i[assert_op_idx] = assert_op

--- a/theano/tensor/tests/test_opt.py
+++ b/theano/tensor/tests/test_opt.py
@@ -2687,6 +2687,21 @@ class Test_local_alloc_elemwise(unittest.TestCase):
        self._verify_alloc_count(func, 1)
        self._verify_assert_count(func, 1)
+    def test_error(self):
+        t3fft = theano.tensor.tensor(dtype=self.dtype,
+                                     broadcastable=(False, False, True))
+        row = theano.tensor.row(dtype=self.dtype)
+        o = T.alloc(row, 5, 5).dimshuffle(0, 1, 'x') + t3fft
+        func = function(
+            [t3fft, row],
+            o,
+            mode='FAST_RUN'
+        )
+        self._verify_alloc_count(func, 0)
+        self._verify_assert_count(func, 1)
+        d = numpy.random.rand(5, 5, 1).astype(self.dtype)
+        r = numpy.random.rand(1, 5).astype(self.dtype)
+        func(d, r)
 def test_local_subtensor_of_alloc():

--- a/theano/tests/test_printing.py
+++ b/theano/tests/test_printing.py
@@ -11,6 +11,7 @@ import theano.tensor as tensor
 from theano.printing import min_informative_str, debugprint
 from theano.compat.six import StringIO
 def test_pydotprint_cond_highlight():
    """
    This is a REALLY PARTIAL TEST.
@@ -44,6 +45,37 @@ def test_pydotprint_cond_highlight():
            ' is no IfElse node in the graph\n')
+def test_pydotprint_variables():
+    """
+    This is a REALLY PARTIAL TEST.
+    I did them to help debug stuff.
+    It make sure the code run.
+    """
+    # Skip test if pydot is not available.
+    if not theano.printing.pydot_imported:
+        raise SkipTest('pydot not available')
+    x = tensor.dvector()
+    s = StringIO()
+    new_handler = logging.StreamHandler(s)
+    new_handler.setLevel(logging.DEBUG)
+    orig_handler = theano.logging_default_handler
+    theano.theano_logger.removeHandler(orig_handler)
+    theano.theano_logger.addHandler(new_handler)
+    theano.theano_logger.removeHandler(orig_handler)
+    theano.theano_logger.addHandler(new_handler)
+    try:
+        theano.printing.pydotprint_variables(x * 2)
+    finally:
+        theano.theano_logger.addHandler(orig_handler)
+        theano.theano_logger.removeHandler(new_handler)
 def test_pydotprint_long_name():
    """This is a REALLY PARTIAL TEST.