Merged

94519309 · Olivier Delalleau · 0f89d7fa · 6cdbe8d7 · 94519309 · 94519309
--- a/theano/printing.py
+++ b/theano/printing.py
@@ -393,7 +393,7 @@ default_colorCodes = {'GpuFromHost' : 'red',
 def pydotprint(fct, outfile=None,
               compact=True, format='png', with_ids=False,
               high_contrast=False, cond_highlight = None, colorCodes = None,
-               max_label_size=50):
+               max_label_size=50, scan_graphs = False):
    """
    print to a file in png format the graph of op of a compile theano fct.
@@ -401,6 +401,8 @@ def pydotprint(fct, outfile=None,
    :param outfile: the output file where to put the graph.
    :param compact: if True, will remove intermediate var that don't have name.
    :param format: the file format of the output.
+    :param with_ids: Print the toposort index of the node in the node name.
+                     and an index number in the variable ellipse.
    :param high_contrast: if true, the color that describes the respective
            node is filled with its corresponding color, instead of coloring
            the border
@@ -412,6 +414,11 @@ def pydotprint(fct, outfile=None,
                right branch, ops that are on both branches
                As an alternative you can provide the node that represents
                the lazy if
+    :param scan_graphs: if true it will plot the inner graph of each scan op
+                in files with the same name as the name given for the main
+                file to which the name of the scan op is concatenated and
+                the index in the toposort of the scan.
+                This index can be printed in the graph with the option with_ids.
    In the graph, box are an Apply Node(the execution of an op) and ellipse are variable.
    If variable have name they are used as the text(if multiple var have the same name, they will be merged in the graph).
@@ -428,7 +435,6 @@ def pydotprint(fct, outfile=None,
    if colorCodes is None:
        colorCodes = default_colorCodes
    if outfile is None:
        outfile = os.path.join(config.compiledir,'theano.pydotprint.' +
                               config.device + '.' + format)
@@ -499,8 +505,10 @@ def pydotprint(fct, outfile=None,
            #a var id is needed as otherwise var with the same type will be merged in the graph.
            varstr = str(var.type)
        if (varstr in all_strings) or with_ids:
-            varstr += ' id=' + str(len(var_str))
+            idx = ' id=' + str(len(var_str))
-        if len(varstr) > max_label_size:
+            if len(varstr)+len(idx) > max_label_size:
+                varstr = varstr[:max_label_size-3-len(idx)]+idx+'...'
+        elif len(varstr) > max_label_size:
            varstr = varstr[:max_label_size-3]+'...'
        var_str[var]=varstr
        all_strings.add(varstr)
@@ -523,11 +531,14 @@ def pydotprint(fct, outfile=None,
            else: pf = time*100/mode.fct_call_time[fct]
            prof_str='   (%.3fs,%.3f%%,%.3f%%)'%(time,pt,pf)
        applystr = str(node.op).replace(':','_')
-        if len(applystr)>max_label_size:
-            applystr = applystr[:max_label_size-3]+'...'
-        if (applystr in all_strings) or with_ids:
-            applystr = applystr+'    id='+str(topo.index(node))
        applystr += prof_str
+        if (applystr in all_strings) or with_ids:
+            idx = ' id='+str(topo.index(node))
+            if len(applystr)+len(idx) > max_label_size:
+                applystr = applystr[:max_label_size-3-len(idx)]+idx+'...'
+        elif len(applystr) > max_label_size:
+            applystr = applystr[:max_label_size-3]+'...'
        all_strings.add(applystr)
        apply_name_cache[node] = applystr
        return applystr
@@ -626,6 +637,27 @@ def pydotprint(fct, outfile=None,
    g.write(outfile, prog='dot', format=format)
    print 'The output file is available at',outfile
+    if scan_graphs:
+        scan_ops = [(idx, x) for idx,x in enumerate(fct_env.toposort()) if isinstance(x.op, theano.scan_module.scan_op.Scan)]
+        path, fn = os.path.split(outfile)
+        basename = '.'.join(fn.split('.')[:-1])
+        # Safe way of doing things .. a file name may contain multiple .
+        ext      = fn[len(basename):]
+        for idx, scan_op in scan_ops:
+            # is there a chance that name is not defined?
+            if hasattr(scan_op.op,'name'):
+                new_name = basename+'_'+scan_op.op.name+'_'+str(idx)
+            else:
+                new_name = basename+'_'+str(idx)
+            new_name = os.path.join(path, new_name+ext)
+            pydotprint(scan_op.op.fn, new_name, compact, format, with_ids,
+                       high_contrast, cond_highlight, colorCodes,
+                       max_label_size, scan_graphs)

--- a/theano/scan_module/__init__.py
+++ b/theano/scan_module/__init__.py
@@ -367,8 +367,21 @@ class ScanSaveMem(gof.Optimizer):
                    # If the memory for this output has been pre-allocated
                    # before going into the scan op (by an alloc node)
                    if idx < op.n_mit_sot + op.n_sit_sot:
+                        # In case the input is still an alloc node
+                        if nw_inputs[offset+idx].owner:
                            _nw_input = nw_inputs[offset+idx].owner.inputs[1]
                            nw_input = scan_utils.expand( _nw_input, val - init_l[i] )
+                        # Else, if it was constant folded to a single value
+                        elif isinstance(nw_inputs[offset+idx], tensor.Constant):
+                            # The hope is that constant folding will fold
+                            # this as well
+                            nw_input = nw_inputs[offset+idx][:val]
+                        else:
+                            raise Exception(('Unforseen case. Please report'
+                                            ' to theano-dev with an example'
+                                            ' script for this case to be'
+                                            ' debuged'))
                        nw_inputs[offset+idx] = nw_input
                        replaced_outs.append(op.n_mit_mot + idx)
                        odx = op.n_mit_mot + idx

--- a/theano/scan_module/tests/test_scan.py
+++ b/theano/scan_module/tests/test_scan.py
@@ -6,6 +6,8 @@ import theano
 import theano.sandbox.rng_mrg
 from theano import tensor
 from theano.tests  import unittest_tools as utt
+from theano.compile.pfunc import rebuild_collect_shared
 '''
  Questions and notes about scan that should be answered :
@@ -162,7 +164,29 @@ def scan_project_sum(*args, **kwargs):
 def asarrayX(value):
    return theano._asarray(value, dtype=theano.config.floatX)
+def clone_optimized_graph(f):
+    maker_ins = [x for x in f.maker.env.inputs if not isinstance(x,
+                                                                 theano.tensor.sharedvar.SharedVariable)]
+    inps, outs, _ = rebuild_collect_shared(f.maker.env.outputs,
+                                           maker_ins,
+                                           copy_inputs_over = False)
+    ins = [x for x in inps if not isinstance(x, theano.tensor.sharedvar.SharedVariable)]
+    return (ins, outs)
+def grab_scan_node(output):
+    if output.owner is None:
+        return None
+    if output.owner.op.__class__.__name__=='Scan':
+        return [output.owner]
+    rval =[]
+    for i in output.owner.inputs:
+        ri = grab_scan_node(i)
+        if ri is not None:
+            rval += ri
+    if rval is []:
+        return None
+    else:
+        return rval
 class T_Scan(unittest.TestCase):
 #class T_Scan(object):
@@ -2044,16 +2068,16 @@ class T_Scan(unittest.TestCase):
        o,_ = theano.reduce(lambda v,acc : acc+v, x,
                           theano.tensor.constant(numpy.asarray(0.,dtype=theano.config.floatX))
                            )
+        mode = theano.compile.mode.FAST_RUN
-        #f1 = theano.function([],o)
+        mode = mode.excluding('inplace')
+        f1 = theano.function([],o, mode= mode)
-        # Get the scan node
+        inputs, outputs = clone_optimized_graph(f1)
-        #scan_node = [n for n in f1.maker.env.toposort()
-        #             if n.op.__class__.__name__=='Scan'][0]
+        scan_nodes = grab_scan_node(outputs[0])
-        # Check how much memory it uses
+        assert scan_nodes is not None
-        # Can actually do that since things are hidden by the infershape
+        scan_node = scan_nodes[0]
-        # mechanism
+        f1 = theano.function(inputs, scan_node.inputs[2])
-        #assert scan_node.inputs[2].value.shape == ()
+        assert f1().shape[0] == 1
        gx = theano.tensor.grad(o, x)
        f2 = theano.function([],gx)
        assert numpy.allclose( f2(), numpy.ones((10,)))
@@ -2067,15 +2091,16 @@ class T_Scan(unittest.TestCase):
                           theano.tensor.constant(numpy.asarray(0.,dtype=theano.config.floatX))
                            )
-        #f1 = theano.function([],o)
+        mode = theano.compile.mode.FAST_RUN
+        mode = mode.excluding('inplace')
+        f1 = theano.function([],o, mode= mode)
+        inputs, outputs = clone_optimized_graph(f1)
-        # Get the scan node
+        scan_nodes = grab_scan_node(outputs[0])
-        #scan_node = [n for n in f1.maker.env.toposort()
+        assert scan_nodes is not None
-        #             if n.op.__class__.__name__=='Scan'][0]
+        scan_node = scan_nodes[0]
-        # Check how much memory it uses
+        f1 = theano.function(inputs, scan_node.inputs[2])
-        # Can actually do that since things are hidden by the infershape
+        assert f1().shape[0] == 1
-        # mechanism
-        #assert scan_node.inputs[2].value.shape == ()
        gx = theano.tensor.grad(o, x)
        f2 = theano.function([],gx)
        assert numpy.allclose( f2(), numpy.ones((10,)))
@@ -2088,15 +2113,16 @@ class T_Scan(unittest.TestCase):
                           theano.tensor.constant(numpy.asarray(0.,dtype=theano.config.floatX))
                            )
-        #f1 = theano.function([],o)
+        mode = theano.compile.mode.FAST_RUN
+        mode = mode.excluding('inplace')
+        f1 = theano.function([],o, mode= mode)
+        inputs, outputs = clone_optimized_graph(f1)
-        # Get the scan node
+        scan_nodes = grab_scan_node(outputs[0])
-        #scan_node = [n for n in f1.maker.env.toposort()
+        assert scan_nodes is not None
-        #             if n.op.__class__.__name__=='Scan'][0]
+        scan_node = scan_nodes[0]
-        # Check how much memory it uses
+        f1 = theano.function(inputs, scan_node.inputs[2])
-        # Can actually do that since things are hidden by the infershape
+        assert f1().shape[0] == 1
-        # mechanism
-        #assert scan_node.inputs[2].value.shape == ()
        gx = theano.tensor.grad(o, x)
        f2 = theano.function([],gx)
        assert numpy.allclose( f2(), numpy.ones((10,)))

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -6,6 +6,7 @@
 import logging
 _logger = logging.getLogger('theano.tensor.opt')
+import copy
 import operator
 import itertools
 import sys
@@ -573,6 +574,11 @@ class ShapeFeature(object):
        if hasattr(r.type,"broadcastable") and r.type.broadcastable[i]:
            return self.lscalar_one
+        # If user provided size
+        elif ( hasattr(r.tag,'shape') and
+              r.tag.shape is not None and
+              r.tag.shape[i] is not None):
+            return T.constant(copy.copy(r.tag.shape[i]),dtype='int64')
        else:
            return Shape_i(i).make_node(r).outputs[0]
@@ -2740,7 +2746,14 @@ register_specialize(local_mul_specialize)
 @gof.local_optimizer([T.add])
 def local_add_specialize(node):
    def fill_chain(v):
-        return _fill_chain(v, node.inputs)
+        # Not sure why this happens .. but I did not had the time to look
+        # into it, it probably has something to do with the dtype I'm
+        # providing the tag.shape of my variable
+        out = _fill_chain(v, node.inputs)
+        if out[0].dtype != node.outputs[0].dtype:
+            return [T.cast(out[0], dtype = node.outputs[0].dtype)]
+        else:
+            return out
    #here, we are past the point of canonicalization, so we don't want to put in un-necessary fills.
    if node.op == T.add: