merge

2bc77c39 · Razvan Pascanu · 746b8dd9 · 91aecc40 · 2bc77c39 · 2bc77c39
--- a/doc/tutorial/debug_faq.txt
+++ b/doc/tutorial/debug_faq.txt
@@ -44,7 +44,13 @@ precise inspection of what's being computed where, when, and how, see the
 How do I print a graph before or after compilation?
 ----------------------------------------------------------
-Theano provides a function to print a graph before and after compilation:
+Theano provides two functions to print a graph to the terminal before or after 
+compilation. It can print graph that only have one output. If you have multiple 
+output, call once for each output. Their is another one that create a png image
+of the function. It support multiple output.
+1) The first is ``theano.pp``. It hide some Op added by the compiler as the
+*DimShuffle Op used for the broadcast.
 >>> x = T.dscalar('x') 
 >>> y = x**2
@@ -55,13 +61,63 @@ Theano provides a function to print a graph before and after compilation:
 >>> pp(f.maker.env.outputs[0])
 '(2.0 * x)'
-The parameter in T.dscalar('x') in the first line is the name of this variable(in the graph, not in python). This name is reused when printing the graph. Otherwise the variable x is printed as its type as: <TensorType(float64, scalar)>. That is not the most comprehensible. The string 'x' can be any string, but to make the code more comprehensible, try to pass the same name or derivative of the name in python.
+The parameter in T.dscalar('x') in the first line is the name of this variable 
+in the graph. This name is used when printing the graph to make it more readable.
+If no name is provided the variable x is printed as its type as. In this example
+<TensorType(float64, scalar)>. 
+The name parameter can be any string. Their is absolutly no restriction. 
+This mean you can have many variable with the same name. 
+To make the code more comprehensible, try to give the name parameter the same name as what you use in the code.
+2)The second fonction to print a graph is `theano.printing.debugprint`(Variable, depth=-1)
+dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd
+You can use is on graph variable and compiled function as pp. If the depth
+parameter is provided, we limit the nuber of level that we print.
+>>> theano.printing.debugprint(gy)
+ Elemwise{mul} 46950804894224
+   Elemwise{mul} 46950804735120
+     Elemwise{second,no_inplace} 46950804626128
+       Elemwise{pow,no_inplace} 46950804625040
+         x 46950658736720
+         2 46950804039760
+       1.0 46950804625488
+     2 46950804039760
+   Elemwise{pow} 46950804737616
+     x 46950658736720
+     Elemwise{sub} 46950804736720
+       2 46950804039760
+       InplaceDimShuffle{} 46950804736016
+         1 46950804735760
+<open file '<stdout>', mode 'w' at 0x2ab38d49f198>
+>>> theano.printing.debugprint(gy, depth=2)
+ Elemwise{mul} 46950804894224
+   Elemwise{mul} 46950804735120
+   Elemwise{pow} 46950804737616
+<open file '<stdout>', mode 'w' at 0x2ab38d49f198>
+>>> theano.printing.debugprint(f.maker.env.outputs[0])
+ Elemwise{mul,no_inplace} 46950805397392
+   2.0 46950805310800
+   x 46950804895504
+<open file '<stdout>', mode 'w' at 0x2ab38d49f198>
+3) The function ``theano.printing.pydotprint(fct, file=SOME_DEFAULT_VALUE)`` will print a compiled theano function to a png file.
+In the graph, box are an Apply Node(the execution of an op) and ellipse are variable.
+If variable have name they are used as the text(if multiple var have the same name, they will be merged in the graph).
+Otherwise, if the variable is constant, we print the value and finaly we print the type + an uniq number to don't have multiple var merged.
+We print the op of the apply in the Apply box with a number that represent the toposort order of application of those Apply.
+green ellipse are input to the graph and blue ellipse are output of the graph.
 The function I compiled is too slow, what's up?
 -----------------------------------------------
 First, make sure you're running in FAST_RUN mode, by passing ``mode='FAST_RUN'``
-to ``theano.function`` or ``theano.make`` or by setting to ``FAST_RUN``
+to ``theano.function`` or ``theano.make`` or by setting to ``PROFILE_MODE`` 
-the flag :attr:`config.mode`. Some
+the flags :attr:`config.mode`. Some
 operations have excruciatingly slow Python implementations and that
 can negatively effect the performance of FAST_COMPILE.
@@ -80,7 +136,7 @@ Check out this one:
 .. code-block:: python
-    class PrintEverythingMode(theano.Mode):
+    class PrintEverythingMode(Mode):
        def __init__(self):
            def print_eval(i, node, fn):
                print i, node, [input[0] for input in fn.inputs],

--- a/theano/compile/debugmode.py
+++ b/theano/compile/debugmode.py
@@ -961,6 +961,7 @@ class _Linker(gof.link.LocalLinker):
                # transfer the initial values from the storage_map to the r_vals
                debug("DEBUGMODE: transfer initial values")
+                r_transfered_from_storage_map = []
                for r in storage_map:
                    if (r.owner is None):
                        if (storage_map[r][0] is None):
@@ -969,6 +970,7 @@ class _Linker(gof.link.LocalLinker):
                            raise InvalidValueError(r, storage_map[r][0])
                        r_vals[r] = storage_map[r][0]
                        storage_map[r][0] = None
+                        r_transfered_from_storage_map.append(r)
                #####
                #  Precondition: the storage map is empty, transferred completely to r_vals
                #####
@@ -1122,7 +1124,7 @@ class _Linker(gof.link.LocalLinker):
                        else:
                            storage_map[r][0] = dr_vals[r][0]
            except:
-                for r in storage_map:
+                for r in r_transfered_from_storage_map:
                    if r in original_storage_map_keys:
                        if storage_map[r][0] is None:
                            storage_map[r][0] = r_vals[r]

--- a/theano/compile/function_module.py
+++ b/theano/compile/function_module.py
@@ -283,6 +283,11 @@ class Function(object):
        #def assign(c, v):
            #c.data = v
+        # Store the list of names of named inputs.
+        named_inputs = []
+        # Count the number of un-named inputs.
+        n_unnamed_inputs = 0
        #setters = []
        # Initialize the storage
        # this loop works by modifying the elements (as variable c) of self.input_storage inplace.
@@ -312,6 +317,10 @@ class Function(object):
                  finder[input.name] = c 
                else:
                  finder[input.name] = DUPLICATE
+                if input.name is None:
+                    n_unnamed_inputs += 1
+                else:
+                    named_inputs.append(input.name)
                #backport
                #finder[input.name] = c if input.name not in finder else DUPLICATE
                # inv_finder maps the container to the input (useful for one error message)
@@ -378,7 +387,9 @@ class Function(object):
                try:
                    s = finder[item]
                except KeyError:
-                    raise TypeError("Unknown input or state: %s" % item)
+                    # Print informative error message.
+                    msg = get_info_on_inputs(named_inputs, n_unnamed_inputs)
+                    raise TypeError("Unknown input or state: %s. %s" % (item, msg))
                if s is DUPLICATE:
                    raise TypeError("Ambiguous name: %s - please check the names of the inputs of your function for duplicates." % item)
                if isinstance(s, gof.Container):
@@ -1014,3 +1025,43 @@ def convert_function_input(input):
    else:
        raise TypeError("Unknown input type: %s, expected Variable instance" % type(input), input)
+def get_info_on_inputs(named_inputs, n_unnamed_inputs):
+    """Return a human-readable description of named and un-named inputs."""
+    n_named_inputs = len(named_inputs)
+    def get_plural(n):
+        if n > 1:
+            return 's'
+        else:
+            return ''
+    if n_named_inputs == 0:
+        if n_unnamed_inputs == 0:
+            msg = 'The function is supposed to have no input.'
+        else:
+            if n_unnamed_inputs == 1:
+                msg = ("The function has a single input variable which has no "
+                        "name, and thus cannot be assigned through a keyword"
+                        " argument (use 'name=...' in a Variable's "
+                        "constructor to give it a name).")
+            else:
+                # Use plural.
+                msg = ("The function has %s inputs, but none of them is named,"
+                        " and thus they cannot be assigned through keyword "
+                        "arguments (use 'name=...' in a Variable's "
+                        "constructor to give it a name)." % n_unnamed_inputs)
+    else:
+        if n_unnamed_inputs == 0:
+            msg = ("The function has %s named input%s (%s)." % (
+                n_named_inputs, get_plural(n_named_inputs),
+                ', '.join(named_inputs)))
+        else:
+            msg = ("The function has %s named input%s (%s), and %s unnamed "
+                    "input%s which thus cannot be accessed through keyword "
+                    "argument%s (use 'name=...' in a variable's constructor "
+                    "to give it a name)." % (
+                    n_named_inputs, get_plural(n_named_inputs),
+                    ', '.join(named_inputs), n_unnamed_inputs,
+                    get_plural(n_unnamed_inputs),
+                    get_plural(n_unnamed_inputs)))
+    return msg
--- a/theano/gof/cmodule.py
+++ b/theano/gof/cmodule.py
@@ -579,6 +579,14 @@ def std_libs():
 def std_lib_dirs():
    return std_lib_dirs_and_libs()[1]
+p=subprocess.Popen(['gcc','-dumpversion'],stdout=subprocess.PIPE)
+p.wait()
+gcc_version_str = p.stdout.readline().strip()
+del p
+def gcc_version():
+    return gcc_version_str
 def gcc_module_compile_str(module_name, src_code, location=None, include_dirs=[], lib_dirs=[], libs=[],
        preargs=[]):
    """

--- a/theano/printing.py
+++ b/theano/printing.py
-"""Pretty-printing graphs, and the 'Print' Op.
+"""Pretty-printing (pprint()), the 'Print' Op, debugprint() and pydotprint().
+They all allow different way to print a graph or the result of an Op in a graph(Print Op)
 """
 import gof
 from copy import copy
@@ -7,6 +8,9 @@ from theano import config
 from gof import Op, Apply
 from theano.gof.python25 import any
+#We import the debugprint here to have all printing of graph available from this module
+from theano.compile.debugmode import debugprint
 class Print(Op):
    """This identity-like Op has the side effect of printing a message followed by its inputs
    when it runs. Default behaviour is to print the __str__ representation. Optionally, one 
@@ -307,9 +311,12 @@ def pydotprint(fct, outfile=os.path.join(config.compiledir,'theano.pydotprint.pn
    :param fct: the theano fct returned by theano.function.
    :param outfile: the output file where to put the graph.
-    In the graph, box are an Apply Node(the execution of an op) and elipse are variable.
+    In the graph, box are an Apply Node(the execution of an op) and ellipse are variable.
-    If variable have name they are used as the text(if multiple var have the same name, they will be merged in the graph). Otherwise, if a constant, we print the value and finaly we print the type + an uniq number to don't have multiple var merged.
+    If variable have name they are used as the text(if multiple var have the same name, they will be merged in the graph).
+    Otherwise, if the variable is constant, we print the value and finaly we print the type + an uniq number to don't have multiple var merged.
    We print the op of the apply in the Apply box with a number that represent the toposort order of application of those Apply.
+    green ellipse are input to the graph and blue ellipse are output of the graph.
    """
    import pydot as pd

--- a/theano/sandbox/fourier.py
+++ b/theano/sandbox/fourier.py
+"""Provides Ops for FFT and DCT.
+"""
+from theano.gof import Op, Apply, generic
+from theano import tensor
+import numpy.fft
+import numpy
+class GradTodo(Op):
+    def make_node(self, x):
+        return Apply(self, [x], [x.type()])
+    def perform(self, node, inputs, outputs):
+        raise NotImplementedError('TODO')
+grad_todo = GradTodo()
+class FFT(Op):
+    """Fast Fourier Transform
+    .. TODO:
+        The current implementation just works for matrix inputs, and permits taking a 1D FFT over
+        either rows or columns.  Add support for N-D FFTs as provided by either numpy or FFTW
+        directly.
+    .. TODO:
+        Give the C code that uses FFTW.
+    .. TODO:
+        unit tests.
+    """
+    default_output = 0
+    # don't return the plan object in the 'buf' output
+    half = False
+    """Only return the first half (positive-valued) of the frequency components"""
+    def __init__(self, half=False, inverse=False):
+        self.half = half
+        self.inverse=inverse
+    def __eq__(self, other):
+        return type(self) == type(other) and (self.half == other.half) and (self.inverse ==
+                other.inverse)
+    def __hash__(self):
+        return hash(type(self)) ^ hash(self.half) ^ 9828743 ^ (self.inverse)
+    def __ne__(self, other):
+        return not(self == other)
+    def make_node(self, frames, n, axis):
+        """ compute an n-point fft of frames along given axis """
+        _frames = tensor.as_tensor(frames, ndim=2)
+        _n = tensor.as_tensor(n, ndim=0)
+        _axis = tensor.as_tensor(axis, ndim=0)
+        if self.half and _frames.type.dtype.startswith('complex'):
+            raise TypeError('Argument to HalfFFT must not be complex', frames)
+        spectrogram = tensor.zmatrix()
+        buf = generic()
+        # The `buf` output is present for future work 
+        # when we call FFTW directly and re-use the 'plan' that FFTW creates.
+        # In that case, buf would store a CObject encapsulating the plan.
+        rval = Apply(self, [_frames, _n, _axis], [spectrogram, buf])
+        return rval
+    def perform(self, node, (frames, n, axis), (spectrogram, buf)):
+        if self.inverse:
+            fft_fn = numpy.fft.ifft
+        else:
+            fft_fn = numpy.fft.fft
+        fft =  fft_fn(frames, int(n), int(axis))
+        if self.half:
+            M, N = fft.shape
+            if axis == 0:
+                if (M % 2):
+                    raise ValueError('halfFFT on odd-length vectors is undefined')
+                spectrogram[0] = fft[0:M/2, :]
+            elif axis==1:
+                if (N % 2):
+                    raise ValueError('halfFFT on odd-length vectors is undefined')
+                spectrogram[0] = fft[:,0:N/2]
+            else:
+                raise NotImplementedError()
+        else:
+            spectrogram[0] = fft
+    def grad(self, (frames, n, axis), (g_spectrogram, g_buf)):
+        return [grad_todo(frames), None, None]
+fft = FFT(half=False, inverse=False)
+half_fft = FFT(half=True, inverse=False)
+ifft = FFT(half=False, inverse=True)
+half_ifft = FFT(half=True, inverse=True)
+def dct_matrix(rows, cols, unitary=True):
+    """
+    Return a (rows x cols) matrix implementing a discrete cosine transform.
+    This algorithm is adapted from Dan Ellis' Rastmat
+    spec2cep.m, lines 15 - 20.
+    """
+    rval = numpy.zeros((rows, cols))
+    col_range = numpy.arange(cols)
+    scale = numpy.sqrt(2.0/cols)
+    for i in xrange(rows):
+        rval[i] = numpy.cos(i * (col_range*2+1)/(2.0 * cols) * numpy.pi) * scale
+    if unitary:
+        rval[0] *= numpy.sqrt(0.5)
+    return rval
--- a/theano/sandbox/scan.py
+++ b/theano/sandbox/scan.py
@@ -29,8 +29,6 @@
 """
 __docformat__ = 'restructedtext en'
-import numpy 
 import theano
 from theano.tensor import opt
 from theano import gof

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -3209,8 +3209,8 @@ class AdvancedIncSubtensor(Op):
                            broadcastable = x.type.broadcastable)])
            raise NotImplementedError('Advanced indexing increment of x (of dimension %i) by y (of dimension %i) with these argument dimensions (%s) not supported yet'\
                    % (x.ndim, y.ndim, ','.join(str(input.ndim) for input in inputs)))
-        raise NotImplementedError('Advanced indexing increment of x by y with arguments (%s) not supported yet'\
+        raise NotImplementedError('Advanced indexing increment of x (of dim %i) by y (of dim %i) with arguments (%s) not supported yet'\
-                % ','.join(str(input) for input in inputs))
+                % (x.ndim, y.ndim, ','.join(str(input) for input in inputs)))
    def perform(self, node, inputs, (out,)):
        # TODO: same thing as in AdvancedSubtensor's perform TODO
@@ -3452,8 +3452,7 @@ def grad(cost, wrt, g_cost=None, consider_constant=[], warn_type=False):
    :return: symbolic expression of gradient of `cost` with respect to `wrt`.
    If `wrt` is a list, then return a list containing the gradient of `cost` wrt
    each element of the list.  If an element of `wrt` is not differentiable
-    with respect to the output, then a `TensorConstant` with an appropriate
+    with respect to the output, then a zero variable is returned.
-    kind of zero is returned.
    This function is a wrapper around a the more general function
    `theano.gradient.grad_sources_inputs``.
@@ -3473,21 +3472,13 @@ def grad(cost, wrt, g_cost=None, consider_constant=[], warn_type=False):
    gmap = gradient.grad_sources_inputs([(cost, g_cost)], inputs + consider_constant,
            warn_type=warn_type)
-    def zero(p):
+    # Note that it is important to use `zeros_like` when there is no gradient,
-        return TensorConstant(
+    # instead of returning a scalar constant equal to zero. Otherwise we lose
-                TensorType(dtype = p.type.dtype, broadcastable = []),
+    # the guarantee that the gradient has same shape as `wrt`.
-                theano._asarray(0, dtype=p.type.dtype))
-    #try:
-        #it = iter(wrt)
-    #except:
-        #it = None
-    #if it: #hasattr(wrt, '__iter__'): # isinstance(wrt, (list, tuple)):
    if isinstance(wrt, (list, tuple)):
-        return [gmap.get(p, zero(p)) for p in wrt]
+        return [gmap.get(p, zeros_like(p)) for p in wrt]
    else:
-        return gmap.get(wrt, zero(wrt))
+        return gmap.get(wrt, zeros_like(wrt))
 class numeric_grad:
    """WRITEME"""
@@ -3590,7 +3581,7 @@ class numeric_grad:
 def verify_grad(op, pt, n_tests=2, rng=None, eps=None, tol=None, mode=None, cast_to_output_type=False):
    """ WRITEME
    Raises an Exception if the difference between the analytic gradient and
    numerical gradient (computed through the Finite Difference Method) exceeds
    the given tolerance.
@@ -3607,7 +3598,7 @@ def verify_grad(op, pt, n_tests=2, rng=None, eps=None, tol=None, mode=None, cast
           try to make it a SMALL graph.  Often verify grad is run in
           debug mode, which can be very slow if it has to verify a lot
           of intermediate computations.
    """
    pt = [numpy.array(p) for p in pt]
@@ -3619,9 +3610,8 @@ def verify_grad(op, pt, n_tests=2, rng=None, eps=None, tol=None, mode=None, cast
        tol = __builtin__.max(_type_tol[str(p.dtype)] for p in pt)
    if rng is None:
-        rng = numpy.random
+        raise TypeError('rng should be a valid instance of numpy.random.RandomState.',
-        from theano import tests as theano_tests # TODO This is an ugly import. Fix?
+                'You may want to use theano.tests.unittest_tools.verify_grad instead of theano.tensor.verify_grad.')
-        theano_tests.unittest_tools.seed_rng()
    def function(inputs, output):
        if mode is None:
@@ -3633,9 +3623,9 @@ def verify_grad(op, pt, n_tests=2, rng=None, eps=None, tol=None, mode=None, cast
    for test_num in xrange(n_tests):
        tensor_pt = [value(p.copy(), name='input %i'%i) for i,p in enumerate(pt)]
        #op can be either a function or an actual Op instance
-        o_output = op(*tensor_pt) 
+        o_output = op(*tensor_pt)
        if isinstance(o_output,list) > 1:
            raise NotImplementedError('cant (yet) autotest gradient of op with multiple outputs')

--- a/theano/tensor/nnet/conv.py
+++ b/theano/tensor/nnet/conv.py
@@ -100,6 +100,21 @@ class ConvOp(Op):
            'imshp_logical', 'kshp_logical', 'kshp_logical_top_aligned']
    """These attributes uniquely identify the behaviour of this op for given inputs"""
+    def c_compile_args(self):
+        #when the ksph==(1,1) gcc 4.3.0 segfault during the compilation with -O3.
+        #This don't happen at -O2
+        if theano.gof.cmodule.gcc_version() in ['4.3.0'] and self.kshp==(1,1):
+            return ['-O2']
+        else: return []
+    def c_no_compile_args(self):
+        #when the ksph==(1,1) gcc 4.3.0 segfault during the compilation with -O3.
+        #This don't happen at -O2
+        if theano.gof.cmodule.gcc_version() in ['4.3.0'] and self.kshp==(1,1):
+            return ['-O3']
+        else: return []
    @staticmethod
    def getOutputShape(inshp, kshp, (dx,dy)=(1,1), mode='valid'):
        """

--- a/theano/tensor/nnet/nnet.py
+++ b/theano/tensor/nnet/nnet.py
@@ -708,9 +708,14 @@ class CrossentropySoftmax1HotWithBiasDx (gof.Op):
        # advanced indexing is not working yet. When it works, do it to avoid
        # potentially misleading behavior in gradient computations! (although
        # typically we should not need the gradient w.r.t. dy).
-        # y_idx_range = tensor.arange(y_idx.shape[0])
+        y_idx_range = tensor.arange(y_idx.shape[0])
-        # return [g_dx * tensor.AdvancedIncSubtensor((y_idx_range, y_idx))(sm, -1, y_idx_range, y_idx), dy.dimshuffle(0, 'x') * g_dx, None]
+        g_dy = tensor.sum(
-        return [None, dy.dimshuffle(0, 'x') * g_dx, None]
+                g_dx * tensor.AdvancedIncSubtensor((y_idx_range, y_idx))(
+                    sm, tensor.fill(dy, -1), y_idx_range, y_idx),
+                axis=1)
+        g_sm = dy.dimshuffle(0, 'x') * g_dx
+        g_y_idx = None
+        return [g_dy, g_sm, g_y_idx]
    def c_code_cache_version(self):
        return (2,)
    def c_code(self, node, name, (dnll, sm, y_idx), (dx,), sub):

--- a/theano/tensor/nnet/tests/test_nnet.py
+++ b/theano/tensor/nnet/tests/test_nnet.py
@@ -95,6 +95,16 @@ class T_CrossentropySoftmax1HotWithBiasDx(unittest.TestCase):
        softmax_output = numpy.random.rand(10, 5)
        softmax_output /= softmax_output.sum(axis=1).reshape(10, 1)
        utt.verify_grad(f, [softmax_output])
+    def test1(self):
+        rng = numpy.random.RandomState(utt.fetch_seed())
+        softmax_output = rng.rand(10, 5)
+        softmax_output /= softmax_output.sum(axis=1).reshape(10, 1)
+        def f(dy):
+            return (theano.tensor.nnet.crossentropy_softmax_1hot_with_bias_dx(
+                dy,
+                softmax_output,
+                rng.randint(low=0, high=5, size=10)))
+        utt.verify_grad(f, [rng.rand(10)])
 class T_CrossentropySoftmaxArgmax1HotWithBias(unittest.TestCase):
    def setUp(self):

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -1664,12 +1664,12 @@ class test_grad(unittest.TestCase):
        self.failUnless(o.gval1 is g1)
    def test_1None_rval(self):
-        """grad: Test returning a single None from grad"""
+        """grad: Test returning a single zero value from grad"""
        o = test_grad.O()
        a1 = o.make_node()
        g = grad(a1.outputs[0], a1.outputs[1])
-        self.failUnless(isinstance(g, TensorConstant))
+        self.failUnless(g.owner.op == fill)
-        self.failUnless(g.data == 0)
+        self.failUnless(g.owner.inputs[1].data == 0)
        try:
            grad(a1.outputs[0], 'wtf')
        except AttributeError, e:
@@ -1677,14 +1677,14 @@ class test_grad(unittest.TestCase):
        self.fail()
    def test_NNone_rval(self):
-        """grad: Test returning some Nones from grad"""
+        """grad: Test returning some zero value from grad"""
        o = test_grad.O()
        a1 = o.make_node()
        g0,g1,g2 = grad(a1.outputs[0], a1.inputs + [scalar('z')])
        self.failUnless(o.gval0 is g0)
        self.failUnless(o.gval1 is g1)
-        self.failUnless(isinstance(g2, TensorConstant))
+        self.failUnless(g2.owner.op == fill)
-        self.failUnless(g2.data == 0)
+        self.failUnless(g2.owner.inputs[1].data == 0)
 class T_op_cache(unittest.TestCase):
    def setUp(self):