Merge pull request #511 from nouiz/pep8

Pep8

Merge pull request #511 from nouiz/pep8
941ea07c · lamblin · f1f123b8 · 34e499c8 · 941ea07c · 941ea07c
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -28,6 +28,14 @@ New Features
   anything below an intermediate variable that has a name. Defaults to False.
 * debugprint does not print anymore the "|" symbol in a column after the last input.

+Sparse Sandbox Addition (Not reviewed/documented/tested, but used by some people)
+ * They are all in the theano.sparse.sandbox.sp2 module
+ * Op class: Cast, Poisson, Multinomial, EliminateZeros, Sum, Binomial
+ * Op class: SamplingDot, SamplingDotCsr(inserted automatically)
+ * Op function: structured_sigmoid, structured_exp, structured_pow, structured_minimum,
+ * Op class: StructuredAddSV, StrucutedAddSVCSR(inserted automatically)
+ * opt: local_sampling_dot_csr, local_structured_add_s_v
+
 Internal changes
 * Define new exceptions MissingInputError and UnusedInputError, and use them
   in theano.function, instead of TypeError and ValueError. (Pascal L.)

--- a/doc/dev_start_guide.txt
+++ b/doc/dev_start_guide.txt
@@ -317,7 +317,48 @@ bindings to work only on Python files.
 Emacs
 ~~~~~

-WRITEME
+There is an **execellent** system to configure emacs for python:
+`emacs-for-python
+<https://github.com/gabrielelanaro/emacs-for-python>`_. It gatter many
+emacs config into one and modify them to behave together nicely. You
+can use it to check for pep8 compliance and for python syntax errors.
+
+To install it on linux, you can do like this:
+
+.. code-block:: bash
+
+   cd
+   git clone https://github.com/gabrielelanaro/emacs-for-python.git .emacs.d/emacs-for-python
+
+
+Then in your ``~/.emacs`` file, add this:
+
+.. code-block:: bash
+
+   ;; Mandatory
+   (load-file "~/.emacs.d/emacs-for-python/epy-init.el")
+   (add-to-list 'load-path "~/.emacs.d/emacs-for-python/") ;; tell where to load the various files
+
+   ;; Each of them enable different part of the system
+   ;; only the 2 first are needed for pep8, syntax check.
+   (require 'epy-setup) ;; It will setup other loads, it is required!
+   (require 'epy-python) ;; If you want the python facilities [optional]
+   (require 'epy-completion) ;; If you want the autocompletion settings [optional]
+   (require 'epy-editing) ;; For configurations related to editing [optional]
+
+   ;; define f10 to previous error
+   ;; define f11 to next error
+   (require 'epy-bindings) ;; For my suggested keybindings [optional]
+
+   ;; some shortcut that don't collide with gnome-terminal
+   ;; otherwise, "epy-bindings" define f10 and f11 for them.
+   (global-set-key [f2] 'flymake-goto-prev-error)
+   (global-set-key [f3] 'flymake-goto-next-error)
+
+   ;; next two lines are the checks to do. You can add more if you wish
+   (epy-setup-checker "pyflakes %f") ;; for python syntax check
+   (epy-setup-checker "pep8 %f") ;; for pep8 check
+


 Unit tests

--- a/theano/compile/profiling.py
+++ b/theano/compile/profiling.py
@@ -7,11 +7,11 @@
 # TODO: ensure field width for string fields makes columns line up
 # TODO: what to do about 'diff summary'? (ask Fred?)
 #
-__authors__   = "James Bergstra"
-__reviewer__  = "Razvan Pascanu"
+__authors__ = "James Bergstra"
+__reviewer__ = "Razvan Pascanu"
 __copyright__ = "(c) 2011, Universite de Montreal"
-__license__   = "3-clause BSD License"
-__contact__   = "theano-dev <theano-dev@googlegroups.com>"
+__license__ = "3-clause BSD License"
+__contact__ = "theano-dev <theano-dev@googlegroups.com>"

 __docformat__ = "restructuredtext en"
 import atexit
@@ -22,7 +22,7 @@ import time
 import numpy

 import theano
-from theano.configparser import AddConfigVar, StrParam, BoolParam
+from theano.configparser import AddConfigVar, BoolParam

 import_time = time.time()
 config = theano.config
@@ -46,8 +46,10 @@ def _atexit_print_fn():
    if len(_atexit_print_list) > 1:
    # Make a global profile
        cum = copy.copy(_atexit_print_list[0])
-        cum.message = "Sum of all Theano functions"
+        cum.message = "Sum of all printed profile at exit"
        for ps in _atexit_print_list[1:]:
+#        for ps in [ps for ps in _atexit_print_list[1:]
+#                   if not isinstance(ps, ScanProfileStats)]:
            for attr in ["compile_time", "fct_call_time", "fct_callcount",
                         "vm_call_time", "optimizer_time", "linker_time"]:
                setattr(cum, attr, getattr(cum, attr) + getattr(ps, attr))
@@ -62,6 +64,7 @@ def _atexit_print_fn():

 atexit.register(_atexit_print_fn)

+
 class ProfileStats(object):
    """
    Object to store runtime and memory profiling information for all of
@@ -119,6 +122,7 @@ class ProfileStats(object):
    # time spent linking graph (FunctionMaker.create)

    line_width = 140
+
    # param is called flag_time_thunks because most other attributes with time
    # in the name are times *of* something, rather than configuration flags.
    def __init__(self, atexit_print=True, flag_time_thunks=None, **kwargs):
@@ -185,20 +189,27 @@ class ProfileStats(object):
        """dict op -> total number of flops"""
        # timing is stored by node, we compute timing by Op on demand
        rval = {}
-        return rval #TODO: continue here
+        return rval  # TODO: continue here
        for node, count in self.apply_callcount.items():
            rval.setdefault(node.op, 0)
            rval[node.op] += 1
        return rval
-        for a,t in op_time.items():
-            if hasattr(a,'flops'):
-                op_flops[a]=a.flops*op_call[a]/t/1e6
+        for a, t in self.op_time.items():
+            if hasattr(a, 'flops'):
+                op_flops[a] = a.flops * op_call[a] / t / 1e6

-        flops_msg=''
+        flops_msg = ''
        if op_flops:
-            flops_msg=' <MFlops/s>'
-            print '\nHACK WARNING: we print the flops for some OP, but the logic don\' always work. You need to know the internal of Theano to make it work correctly. Otherwise don\'t use!'
-        print '\nOp-wise summary: <%% of local_time spent on this kind of Op> <cumulative %%> <self seconds> <cumulative seconds> <time per call> %s <nb_call> <nb apply> <Op name>'%(flops_msg)
+            flops_msg = ' <MFlops/s>'
+            print ('\nHACK WARNING: we print the flops for some OP, but the'
+                   ' logic don\' always work. You need to know the internal'
+                   ' of Theano to make it work correctly.'
+                   ' Otherwise don\'t use!')
+        print ('\nOp-wise summary:'
+               ' <%% of local_time spent on this kind of Op>'
+               ' <cumulative %%> <self seconds> <cumulative seconds>'
+               ' <time per call> %s <nb_call> <nb apply> <Op name>' % (
+                flops_msg))

    def summary_ops(self, file=sys.stderr, N=None):
        if self.apply_time:
@@ -216,19 +227,21 @@ class ProfileStats(object):
        op_impl = self.op_impl()
        if N is None:
            N = len(self.op_flops)
-        otimes = [(t*100/local_time,
+        otimes = [(t * 100 / local_time,
                    t,
                    op,
                    op_impl.get(op, '  '),
                    op_call.get(op, 0),
-                    op_apply.get(op,0))
+                    op_apply.get(op, 0))
                for op, t in op_time.items()]
        otimes.sort()
        otimes.reverse()
-        tot=0
+        tot = 0
        print >> file, 'Ops'
        print >> file, '---'
-        #print >> file, '<% time> <cumulative %%> <apply time> <cumulative seconds> <time per call> <nb_call> <Op name>'
+        #print >> file, '<% time> <cumulative %%> <apply time>,'
+        #print >>file, '<cumulative seconds> <time per call> <nb_call>'
+        #print >>file, '<Op name>'
        hs = []
        # formatting string
        es = []
@@ -263,14 +276,15 @@ class ProfileStats(object):

        print >> file, header_str

-        for f,t,a,impl,nb_call,nb_apply in otimes[:N]:
+        for f, t, a, impl, nb_call, nb_apply in otimes[:N]:
            if nb_call == 0:
                assert t == 0
                continue
-            tot+=t
-            ftot=tot*100/local_time
-            print >> file, format_str%(f,ftot,t,t/nb_call, impl, nb_call,
-                                       nb_apply, str(a)[:maxlen])
+            tot += t
+            ftot = tot * 100 / local_time
+            print >> file, format_str % (f, ftot, t, t / nb_call,
+                                         impl, nb_call,
+                                         nb_apply, str(a)[:maxlen])
            # While this carries over less information, it is arranged such
            # that it way more readeable that the previous output of the
            # profiler
@@ -281,7 +295,7 @@ class ProfileStats(object):
            #    print >>file, '   %4.1f%%  %5.1f%%  %5.3fs  %5.3fs  %.2es %s %5d %2d %s' % (
            #            f, ftot, t, tot, t/nb_call, impl, nb_call, nb_apply, a)
        print >>file, '   ... (remaining %i Ops account for %6.2f%%(%.2fs) of the runtime)'\
-                %(max(0, len(otimes)-N),
+                % (max(0, len(otimes) - N),
                  sum(f for f, t, a, ci, nb_call, nb_op in otimes[N:]),
                  sum(t for f, t, a, ci, nb_call, nb_op in otimes[N:]))
        print >> file, ''
@@ -333,7 +347,7 @@ class ProfileStats(object):
        print >> file, header_str

        atimes = [(
-                t*100/local_time,
+                t * 100 / local_time,
                t,
                a,
                a.env.toposort().index(a),
@@ -341,13 +355,13 @@ class ProfileStats(object):
            for a, t in self.apply_time.items()]
        atimes.sort()
        atimes.reverse()
-        tot=0
+        tot = 0
        for (f, t, a, nd_id, nb_call) in atimes[:N]:
-            tot+=t
-            ftot=tot*100/local_time
-            if nb_call==0:
+            tot += t
+            ftot = tot * 100 / local_time
+            if nb_call == 0:
                continue
-            print >> file, format_str %(f,ftot, t, t/nb_call, nb_call,
+            print >> file, format_str %(f, ftot, t, t / nb_call, nb_call,
                                        nd_id,
                                        str(a)[:maxlen])
            # Same as before, this I've sacrificied some information making
@@ -355,7 +369,7 @@ class ProfileStats(object):
            #print >> file, '   %4.1f%%  %5.1f%%  %5.3fs  %5.3fs %.2es  %i  %s'%(
            #        f, ftot, t, tot, t/nb_call,nb_call, str(a))
        print >> file, '   ... (remaining %i Apply instances account for %.2f%%(%.2fs) of the runtime)'\
-                %(max(0, len(atimes)-N),
+                % (max(0, len(atimes) - N),
                  sum(f for f, t, a, nd_id, nb_call in atimes[N:]),
                  sum(t for f, t, a, nd_id, nb_call in atimes[N:]))
        print >> file, ''
@@ -363,31 +377,34 @@ class ProfileStats(object):
    def summary_function(self, file):
        print >> file, 'Function profiling'
        print >> file, '=================='
-        print >> file, '  Message: %s'%self.message
+        print >> file, '  Message: %s' % self.message
        print >> file, '  Time in %i calls to Function.__call__: %es' % (
                self.fct_callcount, self.fct_call_time)
-        if self.fct_call_time>0:
-            print >> file, '  Time in Function.fn.__call__: %es (%.3f%%)' %(
-                    self.vm_call_time, 100*self.vm_call_time / self.fct_call_time)
+        if self.fct_call_time > 0:
+            print >> file, '  Time in Function.fn.__call__: %es (%.3f%%)' % (
+                    self.vm_call_time,
+                    100 * self.vm_call_time / self.fct_call_time)
            local_time = sum(self.apply_time.values())
            if local_time > 0:
-                print >> file, '  Time in thunks: %es (%.3f%%)' %(
+                print >> file, '  Time in thunks: %es (%.3f%%)' % (
                        local_time, 100*local_time / self.fct_call_time)
        print >> file, '  Total compile time: %es' % self.compile_time
        print >> file, '    Theano Optimizer time: %es' % self.optimizer_time
-        print >> file, '    Theano Linker time (includes C, CUDA code generation/compiling): %es' % self.linker_time
+        print >> file, ('    Theano Linker time (includes C,'
+                        ' CUDA code generation/compiling): %es' %
+                        self.linker_time)
        print >> file, ''

-
-    def summary(self, file=sys.stderr, n_ops_to_print=20, n_applies_to_print=20):
+    def summary(self, file=sys.stderr, n_ops_to_print=20,
+                n_applies_to_print=20):
        self.summary_function(file)
        local_time = sum(self.apply_time.values())
        if local_time > 0:
            self.summary_ops(file, n_ops_to_print)
            self.summary_nodes(file, n_applies_to_print)
        else:
-            print >> file, "  No node time accumulated (hint: try config profiling.time_thunks=1)"
-
+            print >> file, ("  No node time accumulated "
+                            "(hint: try config profiling.time_thunks=1)")


 if 0: # old code still to be ported from ProfileMode
@@ -404,47 +421,50 @@ if 0: # old code still to be ported from ProfileMode

        print ''
        print 'ProfileMode.long_print()'
-        print 'name = %s'%fct_name
-        print 'msg = %s'%message
+        print 'name = %s' % fct_name
+        print 'msg = %s' % message
        print '---------------------------'
        print ''

-        print 'Total time spent running thunks: %.3fs'% local_time
+        print 'Total time spent running thunks: %.3fs' % local_time

-        sop_time={}
-        sop_call={}
+        sop_time = {}
+        sop_call = {}
        sop_op = {}
-        sop_c={} #map each op class to Bool. True iff all applies were done in c.
-        for a,t in op_time.items():
+        #map each op class to Bool. True iff all applies were done in c.
+        sop_c = {}
+        for a, t in op_time.items():
            typ = type(a)
-            sop_time.setdefault(typ,0)
-            sop_time[typ]+=t
-            sop_op.setdefault(typ,0)
-            sop_op[typ]+=1
-            sop_c.setdefault(typ,True)
-            sop_c[typ]=sop_c[typ] and op_cimpl.get(a, False)
-            sop_call[typ]=sop_call.get(typ,0)+op_call[a]
+            sop_time.setdefault(typ, 0)
+            sop_time[typ] += t
+            sop_op.setdefault(typ, 0)
+            sop_op[typ] += 1
+            sop_c.setdefault(typ, True)
+            sop_c[typ] = sop_c[typ] and op_cimpl.get(a, False)
+            sop_call[typ] = sop_call.get(typ, 0) + op_call[a]
        print '\nSingle Op-wise summary: <% of local_time spent on this kind of Op> <cumulative %%> <self seconds> <cumulative seconds> <time per call> <nb_call> <nb_op> <nb_op> <Op name>'
-        sotimes = [(t*100/local_time, t, a, sop_c[a], sop_call[a], sop_op[a]) for a, t in sop_time.items()]
+        sotimes = [(t * 100 / local_time, t, a, sop_c[a],
+                    sop_call[a], sop_op[a]) for a, t in sop_time.items()]
        sotimes.sort()
        sotimes.reverse()
-        tot=0
-        for f,t,a,ci, nb_call, nb_op in sotimes[:n_ops_to_print]:
+        tot = 0
+        for f, t, a, ci, nb_call, nb_op in sotimes[:n_ops_to_print]:
            if nb_call == 0:
                assert t == 0
                continue
-            tot+=t
-            ftot=tot*100/local_time
+            tot += t
+            ftot = tot * 100 / local_time
            if ci:
                msg = '*'
            else:
                msg = ' '
            print '   %4.1f%%  %5.1f%%  %5.3fs  %5.3fs  %.2es %s %5d %2d %s' % (f, ftot, t, tot, t/nb_call, msg, nb_call, nb_op, a)
        print '   ... (remaining %i Ops account for %.2f%%(%.2fs) of the runtime)'\
-                %(max(0, len(sotimes)-n_ops_to_print),
-                  sum(f for f, t, a, ci, nb_call, nb_op in sotimes[n_ops_to_print:]),
-                  sum(t for f, t, a, ci, nb_call, nb_op in sotimes[n_ops_to_print:]))
-
+                % (max(0, len(sotimes) - n_ops_to_print),
+                  sum(f for f, t, a, ci, nb_call, nb_op in
+                      sotimes[n_ops_to_print:]),
+                  sum(t for f, t, a, ci, nb_call, nb_op in
+                      sotimes[n_ops_to_print:]))

        total_time = time.time() - import_time
        total_fct_time = sum(fct_call_time.values())
@@ -453,24 +473,32 @@ if 0: # old code still to be ported from ProfileMode
        print
        print 'Theano fct summary: <% total fct time> <total time> <time per call> <nb call> <fct name>'
        for key in fct_call.keys():
-            if fct_call[key]>0:
-                print '   %4.1f%% %.3fs %.2es %d %s'%(fct_call_time[key]/total_fct_time*100 ,fct_call_time[key],
-                                                      fct_call_time[key]/fct_call[key], fct_call[key], key.name)
+            if fct_call[key] > 0:
+                print '   %4.1f%% %.3fs %.2es %d %s'%(
+                    fct_call_time[key] / total_fct_time * 100,
+                    fct_call_time[key],
+                    fct_call_time[key] / fct_call[key],
+                    fct_call[key], key.name)
            else:
                print '   NOT CALLED',key.name

-        if total_fct_time>0:
-            time_pr_in_fct=local_time/total_fct_time*100
-            time_per_call=total_fct_time/total_fct_call
+        if total_fct_time > 0:
+            time_pr_in_fct = local_time / total_fct_time * 100
+            time_per_call = total_fct_time / total_fct_call
        else:
-            time_pr_in_fct=0
-            time_per_call=0
+            time_pr_in_fct = 0
+            time_per_call = 0

        print
-        print 'Time since import %.3fs'%(total_time)
-        print 'Compile time: %.3fs %.1f%%'%(compile_time, compile_time/total_time*100)
-        print 'Theano fct call %.3fs %.1f%%'%(total_fct_time,total_fct_time/total_time*100)
-        print '   Theano Op time (included in fct call, Time spent running thunks) %.3fs %.1f%%(of total) %.1f%%(of fct call)'% (local_time,local_time/total_time*100, time_pr_in_fct)
+        print 'Time since import %.3fs' % (total_time)
+        print 'Compile time: %.3fs %.1f%%' % (compile_time,
+                                              compile_time / total_time * 100)
+        print 'Theano fct call %.3fs %.1f%%' % (total_fct_time,
+                                                total_fct_time / total_time *
+                                                100)
+        print '   Theano Op time (included in fct call, Time spent running thunks) %.3fs %.1f%%(of total) %.1f%%(of fct call)' % (local_time,
+                                                                                                                                  local_time / total_time * 100,
+                                                                                                                                  time_pr_in_fct)
        print 'Other time since import %.3fs %.1f%%'%(other_time,other_time/total_time*100)
        print '%i Theano fct call, %.3fs per call'%(total_fct_call, time_per_call)

@@ -479,13 +507,13 @@ if 0: # old code still to be ported from ProfileMode
        print '<Apply> <Apply position> <fct name> <inputs type> <outputs type>'
        for fct in fct_call.keys():
            for idx, node in enumerate(fct.maker.env.toposort()):
-                if any(hasattr(i,'dtype') and i.dtype=='float64' for i in node.outputs) and not any(hasattr(i,'dtype') and i.dtype=='float64' for i in node.inputs):
+                if any(hasattr(i, 'dtype') and i.dtype == 'float64' for i in node.outputs) and not any(hasattr(i, 'dtype') and i.dtype == 'float64' for i in node.inputs):
                    print str(node), idx, fct.name, str([getattr(i,'dtype',None) for i in node.inputs]),str([getattr(i,'dtype',None) for i in node.outputs])

        if any([x[2].__name__.startswith("Gpu") for x in sotimes]):
-            cpu=[]
-            gpu=[]
-            trans=[]
+            cpu = []
+            gpu = []
+            trans = []
            for so in sotimes:
                if so[2].__name__ in ["HostFromGpu", "GpuFromHost"]:
                    trans.append(so)
@@ -493,9 +521,9 @@ if 0: # old code still to be ported from ProfileMode
                    gpu.append(so)
                else:
                    cpu.append(so)
-            sum_cpu=sum(so[1] for so in cpu)
-            sum_gpu=sum(so[1] for so in gpu)
-            sum_trans=sum(so[1] for so in trans)
+            sum_cpu = sum(so[1] for so in cpu)
+            sum_gpu = sum(so[1] for so in gpu)
+            sum_trans = sum(so[1] for so in trans)
            print

            print "Spent %.3fs(%.3f%%) in cpu Op, %.3fs(%.3f%%) in gpu Op and %.3fs(%.3f%%) transfert Op"%(
@@ -505,7 +533,7 @@ if 0: # old code still to be ported from ProfileMode
            print "<fct name> <input name> <input type> <str input>"
            for fct in fct_call.keys():
                for i in fct.input_storage:
-                    if hasattr(i.type, 'dtype') and i.type.dtype=='float64':
+                    if hasattr(i.type, 'dtype') and i.type.dtype == 'float64':
                        print fct.name, i.name, i.type, i

        if outputs_size:

--- a/theano/gof/cc.py
+++ b/theano/gof/cc.py
@@ -4,17 +4,21 @@ Defines Linkers that deal with C implementations.

 # Python imports
 from copy import copy
-import re #for set_compiledir
-import os, sys, StringIO
+import re  # for set_compiledir
+import os
+import StringIO
+import sys
 from itertools import izip


-if sys.version_info[:2] >= (2,5):
+if sys.version_info[:2] >= (2, 5):
    import hashlib
+
    def hash_from_code(msg):
        return hashlib.md5(msg).hexdigest()
 else:
    import md5
+
    def hash_from_code(msg):
        return md5.new(msg).hexdigest()

@@ -46,14 +50,13 @@ from compilelock import get_lock, release_lock
 import cmodule


-
 import logging
-_logger=logging.getLogger("theano.gof.cc")
+_logger = logging.getLogger("theano.gof.cc")
 _logger.setLevel(logging.WARN)

 from theano.gof.callcache import CallCache

-run_cthunk = None # Will be imported only when needed.
+run_cthunk = None  # Will be imported only when needed.


 def get_module_cache(init_args=None):
@@ -63,37 +66,47 @@ def get_module_cache(init_args=None):
    """
    return cmodule.get_module_cache(config.compiledir, init_args=init_args)

+
 _persistent_module_cache = None
+
+
 def get_persistent_module_cache():
    global _persistent_module_cache
    if _persistent_module_cache is None:
-        _persistent_module_cache = CallCache(os.path.join(config.compiledir, 'persistent_cache'))
+        _persistent_module_cache = CallCache(os.path.join(config.compiledir,
+                                                          'persistent_cache'))
    return _persistent_module_cache

+
 class CodeBlock:
    """WRITEME
    Represents a computation unit composed of declare, behavior, and cleanup.
    @ivar declare: C code that declares variables for use by the computation
    @ivar behavior: C code that performs the computation
-    @ivar cleanup: C code that cleans up things allocated or incref-ed in behavior
+    @ivar cleanup: C code that cleans up things allocated or incref-ed
+        in behavior
    """

    def __init__(self, declare, behavior, cleanup, sub):
        """
-        Initialize a L{CodeBlock} with templatized declare, behavior and cleanup.
-        The sub parameter will be used in the other arguments' templates. sub
-        should contain a key called 'id' that maps to an identifier for this block.
-        The identifier will be used to determine the failure code and a label
-        to jump to. It should also contain a key called 'failure_var' that contains
-        the name of the variable that contains the error code.
+        Initialize a L{CodeBlock} with templatized declare, behavior
+        and cleanup.  The sub parameter will be used in the other
+        arguments' templates. sub should contain a key called 'id'
+        that maps to an identifier for this block.
+        The identifier will be used to determine the failure code and
+        a label to jump to. It should also contain a key called
+        'failure_var' that contains the name of the variable that
+        contains the error code.
        """
        self.declare = declare
        self.behavior = behavior
-        # the dummy is because gcc throws an error when a label's right next to a closing
-        # brace (maybe there's an ignore flag for that...)
-        # we need the label even if cleanup is empty because the behavior block jumps there
-        # on failure
-        self.cleanup = ("__label_%(id)i:\n"%sub + cleanup + "\ndouble __DUMMY_%(id)i;\n"%sub) #% sub
+        # the dummy is because gcc throws an error when a label's
+        # right next to a closing brace (maybe there's an ignore flag
+        # for that...)
+        # we need the label even if cleanup is empty because the
+        # behavior block jumps there on failure
+        self.cleanup = ("__label_%(id)i:\n" % sub + cleanup +
+                        "\ndouble __DUMMY_%(id)i;\n" % sub)  # % sub


 def failure_code(sub):
@@ -102,10 +115,10 @@ def failure_code(sub):


 def code_gen(blocks):
-    """WRITEME
-    From a list of L{CodeBlock} instances, returns a string that executes them
-    all in sequence. eg for C{(decl1, task1, cleanup1)} and C{(decl2, task2, cleanup2)}
-    the returned string will be of the form::
+    """WRITEME From a list of L{CodeBlock} instances, returns a string
+    that executes them all in sequence. eg for C{(decl1, task1,
+    cleanup1)} and C{(decl2, task2, cleanup2)} the returned string
+    will be of the form::

        decl1
        decl2
@@ -181,10 +194,11 @@ def struct_gen(args, struct_builders, blocks, sub):
    args_names = ", ".join(args)
    args_decl = ", ".join(["PyObject* %s" % arg for arg in args])

-    # The following code stores the exception data in __ERROR, which is a special
-    # field of the struct. __ERROR is a list of length 3 that holds the type, the
-    # value and the traceback. After storing the error, we return the failure code
-    # so we know which code block failed.
+    # The following code stores the exception data in __ERROR, which
+    # is a special field of the struct. __ERROR is a list of length 3
+    # that holds the type, the value and the traceback. After storing
+    # the error, we return the failure code so we know which code
+    # block failed.
    do_return = """
        if (%(failure_var)s) {
            // When there is a failure, this code puts the exception
@@ -213,8 +227,8 @@ def struct_gen(args, struct_builders, blocks, sub):
    sub = dict(sub)
    sub.update(locals())

-    # TODO: add some error checking to make sure storage_<x> are 1-element lists
-    # and __ERROR is a 3-elements list.
+    # TODO: add some error checking to make sure storage_<x> are
+    # 1-element lists and __ERROR is a 3-elements list.
    struct_code = """
    struct %(name)s {
        PyObject* __ERROR;
@@ -260,6 +274,7 @@ def get_nothing(r, name, sub):
    """WRITEME"""
    return ""

+
 def get_c_declare(r, name, sub):
    """WRITEME"""
    pre = """
@@ -267,6 +282,7 @@ def get_c_declare(r, name, sub):
    """ % locals()
    return pre + r.type.c_declare(name, sub)

+
 def get_c_init(r, name, sub):
    """WRITEME"""
    pre = "" """
@@ -275,6 +291,7 @@ def get_c_init(r, name, sub):
    """ % locals()
    return pre + r.type.c_init(name, sub)

+
 def get_c_extract(r, name, sub):
    """WRITEME"""
    pre = """
@@ -283,6 +300,7 @@ def get_c_extract(r, name, sub):
    """ % locals()
    return pre + r.type.c_extract(name, sub)

+
 def get_c_cleanup(r, name, sub):
    """WRITEME"""
    post = """
@@ -290,6 +308,7 @@ def get_c_cleanup(r, name, sub):
    """ % locals()
    return r.type.c_cleanup(name, sub) + post

+
 def get_c_sync(r, name, sub):
    """WRITEME"""
    return """
@@ -300,11 +319,13 @@ def get_c_sync(r, name, sub):
      PyList_SET_ITEM(storage_%(name)s, 0, py_%(name)s);
      {Py_XDECREF(old);}
    }
-    """ % dict(sync = r.type.c_sync(name, sub), name = name, **sub)
+    """ % dict(sync=r.type.c_sync(name, sub), name=name, **sub)
+

 def apply_policy(policy, r, name, sub):
    """WRITEME
-    @param policy: list of functions that map a L{Variable} to a string, or a single such function
+    @param policy: list of functions that map a L{Variable} to a string,
+        or a single such function
    @type r: L{Variable}
    @return: C{policy[0](r) + policy[1](r) + ...}
    """
@@ -316,18 +337,22 @@ def apply_policy(policy, r, name, sub):
    return policy(r, name, sub)


-
 def struct_variable_codeblocks(variable, policies, id, symbol_table, sub):
    """WRITEME
    variable -> a Variable
-    policies -> a pair of tuples ((declare_policy, behavior_policy, cleanup_policy), -- at construction
-                                  (declare_policy, behavior_policy, cleanup_policy)) -- at execution
-                the first list will produce an element of the 'struct_builders' argument in struct_gen
-                the second list will produce an element of the 'blocks' argument in struct_gen
+    policies -> a pair of tuples ((declare_policy, behavior_policy,
+                                   cleanup_policy), -- at construction
+                                  (declare_policy, behavior_policy,
+                                   cleanup_policy)) -- at execution
+                the first list will produce an element of the
+                'struct_builders' argument in struct_gen the second
+                list will produce an element of the 'blocks' argument
+                in struct_gen
+
    id -> the id assigned to this variable's task in the computation
-    symbol_table -> a dict that maps variables to variable names. It is not read
-        by this function but a variable name for the variable is computed and added
-        to the table.
+    symbol_table -> a dict that maps variables to variable names. It
+        is not read by this function but a variable name for the
+        variable is computed and added to the table.
    sub -> dictionary for use by L{CodeBlock}.
    """

@@ -339,17 +364,20 @@ def struct_variable_codeblocks(variable, policies, id, symbol_table, sub):
    sub['fail'] = failure_code(sub)
    sub['py_ptr'] = "py_%s" % name
    sub['stor_ptr'] = "storage_%s" % name
+    # struct_declare, struct_behavior, struct_cleanup, sub)
    struct_builder = CodeBlock(*[apply_policy(policy, variable, name, sub)
-                                 for policy in policies[0]]+[sub]) # struct_declare, struct_behavior, struct_cleanup, sub)
+                                 for policy in policies[0]] + [sub])
    sub['id'] = id + 1
    sub['fail'] = failure_code(sub)
    sub['py_ptr'] = "py_%s" % name
    sub['stor_ptr'] = "storage_%s" % name
+    # run_declare, run_behavior, run_cleanup, sub)
    block = CodeBlock(*[apply_policy(policy, variable, name, sub)
-                        for policy in policies[1]]+[sub]) # run_declare, run_behavior, run_cleanup, sub)
+                        for policy in policies[1]] + [sub])

    return struct_builder, block

+
 class CLinker(link.Linker):
    """WRITEME

@@ -365,11 +393,12 @@ class CLinker(link.Linker):
    def __init__(self):
        self.env = None

-    def accept(self, env, no_recycling = []):
+    def accept(self, env, no_recycling=[]):
        """WRITEME"""
        if self.env is not None and self.env is not env:
            return type(self)().accept(env, no_recycling)
-            #raise Exception("Cannot accept from a Linker that is already tied to another Env.")
+            #raise Exception("Cannot accept from a Linker that is already"
+            #                " tied to another Env.")
        self.env = env
        self.fetch_variables()
        self.no_recycling = no_recycling
@@ -377,15 +406,21 @@ class CLinker(link.Linker):

    def fetch_variables(self):
        """WRITEME
-        Fills the inputs, outputs, variables, orphans, temps and node_order fields.
+        Fills the inputs, outputs, variables, orphans,
+        temps and node_order fields.
        """
        env = self.env
        self.inputs = env.inputs
        self.outputs = env.outputs
-        self.variables = graph.variables(self.inputs, self.outputs) # list(env.variables)
+        # list(env.variables)
+        self.variables = graph.variables(self.inputs, self.outputs)
        # The orphans field is listified to ensure a consistent order.
-        self.orphans = list(r for r in self.variables if isinstance(r, graph.Value) and r not in self.inputs) #list(env.orphans.difference(self.outputs))
-        self.temps = list(set(self.variables).difference(self.inputs).difference(self.outputs).difference(self.orphans))
+ #list(env.orphans.difference(self.outputs))
+        self.orphans = list(r for r in self.variables
+                            if isinstance(r, graph.Value) and
+                            r not in self.inputs)
+        self.temps = list(set(self.variables).difference(
+                self.inputs).difference(self.outputs).difference(self.orphans))
        self.consts = []
        self.node_order = env.toposort()

@@ -408,8 +443,6 @@ class CLinker(link.Linker):

        no_recycling = self.no_recycling

-        env = self.env
-
        self.consts = []

        c_support_code_apply = []
@@ -429,62 +462,82 @@ class CLinker(link.Linker):
        failure_var = "__failure"
        id = 1

-        sub = dict(failure_var = failure_var)
+        sub = dict(failure_var=failure_var)

        for variable in self.variables:

            # it might be possible to inline constant variables as C literals
 ##            if getattr(variable, 'constant', False):
-            # policy = [[what to declare in the struct, what to do at construction, what to do at destruction],
-            #           [what to declare in each run, what to do at the beginning of each run, what to do at the end of each run]]
+            # policy = [[what to declare in the struct,
+            #            what to do at construction,
+            #            what to do at destruction],
+            #           [what to declare in each run,
+            #            what to do at the beginning of each run,
+            #            what to do at the end of each run]]
            if variable in self.inputs:
                # we need to extract the new inputs at each run
                # they do not need to be relayed to Python, so we don't sync
 #                 if isinstance(variable, Constant):
-#                     raise TypeError("Inputs to CLinker cannot be Constant.", variable)
+#                     raise TypeError("Inputs to CLinker cannot be Constant.",
+#                                     variable)
                policy = [[get_nothing, get_nothing, get_nothing],
                          [get_c_declare, get_c_extract, get_c_cleanup]]
            elif variable in self.orphans:
                if not isinstance(variable, graph.Value):
-                    raise TypeError("All orphans to CLinker must be Value instances.", variable)
+                    raise TypeError("All orphans to CLinker must be Value"
+                                    " instances.", variable)
                if isinstance(variable, graph.Constant):
                    try:
-                        symbol[variable] = "(" + variable.type.c_literal(variable.data) + ")"
+                        symbol[variable] = ("(" + variable.type.c_literal(
+                                variable.data) + ")")
                        self.consts.append(variable)
                        self.orphans.remove(variable)
                        continue
                    except (utils.MethodNotDefined, NotImplementedError):
                        pass
-                # orphans are not inputs so we'll just get fetch them when we initialize the struct and assume they stay the same
+                # orphans are not inputs so we'll just get fetch them
+                # when we initialize the struct and assume they stay
+                # the same
                policy = [[get_c_declare, get_c_extract, get_c_cleanup],
                          [get_nothing, get_nothing, get_nothing]]
            elif variable in self.temps:
-                # temps don't need to be extracted from Python, so we call c_init rather than c_extract
-                # they do not need to be relayed to Python, so we don't sync
+                # temps don't need to be extracted from Python, so we
+                # call c_init rather than c_extract they do not need
+                # to be relayed to Python, so we don't sync
                if variable.type.c_is_simple() or variable in no_recycling:
                    policy = [[get_nothing, get_nothing, get_nothing],
                              [get_c_declare, get_c_init, get_c_cleanup]]
                else:
-                    # it is useful for complex temps to reuse storage at each run, so we only clean up in the destructor
+                    # it is useful for complex temps to reuse storage
+                    # at each run, so we only clean up in the
+                    # destructor
                    policy = [[get_c_declare, get_c_init, get_c_cleanup],
                              [get_nothing, get_nothing, get_nothing]]
            elif variable in self.outputs:
-                # outputs don't need to be extracted from Python, so we call c_init rather than c_extract
+                # outputs don't need to be extracted from Python, so
+                # we call c_init rather than c_extract
                if variable.type.c_is_simple() or variable in no_recycling:
                    policy = [[get_nothing, get_nothing, get_nothing],
-                              [get_c_declare, get_c_init, (get_c_sync, get_c_cleanup)]]
+                              [get_c_declare, get_c_init,
+                               (get_c_sync, get_c_cleanup)]]
                else:
-                    # it is useful for complex outputs to reuse storage at each run, so we only clean up in the destructor
+                    # it is useful for complex outputs to reuse
+                    # storage at each run, so we only clean up in the
+                    # destructor
                    policy = [[get_c_declare, get_c_init, get_c_cleanup],
                              [get_nothing, get_nothing, get_c_sync]]
            else:
                raise Exception("what the fuck")

-            builder, block = struct_variable_codeblocks(variable, policy, id, symbol, sub)
+            builder, block = struct_variable_codeblocks(variable, policy,
+                                                        id, symbol, sub)

-            # each Variable generates two CodeBlocks, one to declare/initialize/destroy struct variables
-            # and the other to declare/extract/cleanup each time the function is run.
-            # Typically, only one of the two actually does anything (see all the possible combinations above)
+            # each Variable generates two CodeBlocks, one to
+            # declare/initialize/destroy struct variables and the
+            # other to declare/extract/cleanup each time the function
+            # is run.
+            # Typically, only one of the two actually does anything
+            # (see all the possible combinations above)

            init_tasks.append((variable, 'init', id))
            init_blocks.append(builder)
@@ -496,19 +549,23 @@ class CLinker(link.Linker):

        for node_num, node in enumerate(self.node_order):

-            # We populate sub with a mapping from the variable names specified by the op's c_var_names
-            # method to the actual variable names that we will use.
+            # We populate sub with a mapping from the variable names
+            # specified by the op's c_var_names method to the actual
+            # variable names that we will use.
 ##            ivnames, ovnames = op.c_var_names()
-            sub = dict(failure_var = failure_var)
-##            for variable, vname in zip(op.inputs + op.outputs, ivnames + ovnames):
+            sub = dict(failure_var=failure_var)
+##            for variable, vname in zip(op.inputs + op.outputs,
+##                                       ivnames + ovnames):
 ##                sub[vname] = symbol[variable]

            name = "node_%i" % node_num
-            isyms, osyms = [symbol[r] for r in node.inputs], [symbol[r] for r in node.outputs]
+            isyms = [symbol[r] for r in node.inputs]
+            osyms = [symbol[r] for r in node.outputs]

            # c_validate_update is deprecated
            if hasattr(node.op, 'c_validate_update'):
-                raise Exception("c_validate_update is deprecated, move contents to c_code", node.op)
+                raise Exception("c_validate_update is deprecated,"
+                                " move contents to c_code", node.op)

            # Make the CodeBlock for c_code
            sub['id'] = id
@@ -517,20 +574,23 @@ class CLinker(link.Linker):
            op = node.op
            # type-specific support code
            try:
-                c_support_code_apply.append(op.c_support_code_apply(node, name))
+                c_support_code_apply.append(op.c_support_code_apply(node,
+                                                                    name))
            except utils.MethodNotDefined:
                pass
            else:
                # The following will be executed if the "try" block succeeds
                assert isinstance(c_support_code_apply[-1], basestring), (
-                        str(node.op)+" didn't returned a string for c_support_code_apply")
+                        str(node.op) +
+                        " didn't returned a string for c_support_code_apply")

            # emit c_code
            try:
                behavior = op.c_code(node, name, isyms, osyms, sub)
            except utils.MethodNotDefined:
                raise NotImplementedError("%s cannot produce C code" % op)
-            assert isinstance(behavior, basestring), str(node.op)+" didn't returned a string for c_code"
+            assert isinstance(behavior, basestring), (
+                str(node.op) + " didn't returned a string for c_code")

            try:
                cleanup = op.c_code_cleanup(node, name, isyms, osyms, sub)
@@ -543,18 +603,24 @@ class CLinker(link.Linker):
            tasks.append((node, 'code', id))
            id += 1

-        # List of arg names for use in struct_gen. Note the call to uniq: duplicate inputs
-        # must only be passed once because they are mapped to the same name.
-        # Duplicates are defined by (a is b), rather than (a==b) since Constant instances can
+        # List of arg names for use in struct_gen. Note the call to
+        # uniq: duplicate inputs must only be passed once because they
+        # are mapped to the same name.  Duplicates are defined by (a
+        # is b), rather than (a==b) since Constant instances can
        # compare equal to equivalent Constant instances.
        args = []
-        args += ["storage_%s" % symbol[variable] for variable in utils.uniq(self.inputs + self.outputs + self.orphans)]
+        args += ["storage_%s" % symbol[variable] for variable
+                 in utils.uniq(self.inputs + self.outputs + self.orphans)]

-        struct_code = struct_gen(args, init_blocks, blocks, dict(failure_var = failure_var, name = "<<<<NAME>>>>"))
+        struct_code = struct_gen(args, init_blocks, blocks,
+                                 dict(failure_var=failure_var,
+                                      name="<<<<NAME>>>>"))

        # TODO: still needed? We do not use weave anymore.
-        # The hash calculated on the code identifies it so weave can cache properly.
-        # (the hash has to be used outside of the support code because weave does not consider changes in the support code)
+        # The hash calculated on the code identifies it so weave can
+        # cache properly.  (the hash has to be used outside of the
+        # support code because weave does not consider changes in the
+        # support code)
        hash = hash_from_code(struct_code)

        struct_name = '__struct_compiled_op_%s' % hash
@@ -570,7 +636,7 @@ class CLinker(link.Linker):
        self.init_tasks = init_tasks
        self.blocks = blocks
        self.tasks = tasks
-        all = self.inputs + self.outputs + self.orphans
+        all_info = self.inputs + self.outputs + self.orphans
        self.c_support_code_apply = c_support_code_apply

        if (self.init_tasks, self.tasks) != self.get_init_tasks():
@@ -582,7 +648,8 @@ class CLinker(link.Linker):

        # List of indices that should be ignored when passing the arguments
        # (basically, everything that the previous call to uniq eliminated)
-        self.dupidx = [i for i, x in enumerate(all) if all.count(x) > 1 and all.index(x) != i]
+        self.dupidx = [i for i, x in enumerate(all_info)
+                       if all_info.count(x) > 1 and all_info.index(x) != i]
        return self.struct_code

    def support_code(self):
@@ -595,9 +662,12 @@ class CLinker(link.Linker):
        """
        ret = []
        # generic support code
-        for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
-            try: ret.append(x.c_support_code())
-            except utils.MethodNotDefined: pass
+        for x in [y.type for y in self.variables] + [
+            y.op for y in self.node_order]:
+            try:
+                ret.append(x.c_support_code())
+            except utils.MethodNotDefined:
+                pass
        return ret

    def compile_args(self):
@@ -608,33 +678,43 @@ class CLinker(link.Linker):
        This might contain duplicates.
        """
        ret = ["-O3"]
-# this is the param the -ffast-math activate. I put the explicitly as FillMissing must disable some of them. Putting -ffast-math would make it disable all other parameter at the same time.
+# this is the param the -ffast-math activate. I put the explicitly as
+# FillMissing must disable some of them. Putting -ffast-math would
+# make it disable all other parameter at the same time.
        ret += ["-fno-math-errno",
                #"-funsafe-math-optimizations",
                #"-fno-signaling-nans",
                #"-fcx-limited-range",
                #"-fno-rounding-math",
                #"-ffinite-math-only",
-                "-Wno-unused-label",#the current code generate label event if they are not used. Could use gcc attribute for those label only
-                "-Wno-unused-variable",#idem as the precedent
-                "-Wno-write-strings",#generated by our code generator...
+
+                #the current code generate label event if they are not used.
+                #Could use gcc attribute for those label only
+                "-Wno-unused-label",
+                "-Wno-unused-variable",  # idem as the precedent
+                "-Wno-write-strings",  # generated by our code generator...
                ]
-        for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
-            try: ret += x.c_compile_args()
-            except utils.MethodNotDefined: pass
+        for x in [y.type for y in self.variables] + [
+            y.op for y in self.node_order]:
+            try:
+                ret += x.c_compile_args()
+            except utils.MethodNotDefined:
+                pass

        c_compiler = self.c_compiler()
        ret += c_compiler.compile_args()

-        ret=list(set(ret))#to remove duplicate
-        for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
+        ret = list(set(ret))  # to remove duplicate
+        for x in [y.type for y in self.variables] + [
+            y.op for y in self.node_order]:
            try:
                for i in x.c_no_compile_args():
                    try:
                        ret.remove(i)
                    except ValueError:
-                        pass# in case the value is not there
-            except utils.MethodNotDefined: pass
+                        pass  # in case the value is not there
+            except utils.MethodNotDefined:
+                pass
        return ret

    def headers(self):
@@ -645,14 +725,18 @@ class CLinker(link.Linker):
        The return value will not contain duplicates.
        """
        ret = []
-        for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
-            try: ret += x.c_headers()
-            except utils.MethodNotDefined: pass
+        for x in [y.type for y in self.variables] + [
+            y.op for y in self.node_order]:
+            try:
+                ret += x.c_headers()
+            except utils.MethodNotDefined:
+                pass
        return list(set(ret))

    def c_compiler(self):
        c_compiler = None
-        for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
+        for x in [y.type for y in self.variables] + [
+            y.op for y in self.node_order]:
            if hasattr(x, 'c_compiler'):
                x_compiler = x.c_compiler()
            else:
@@ -662,11 +746,13 @@ class CLinker(link.Linker):
                c_compiler = x_compiler
            else:
                if x_compiler and (x_compiler != c_compiler):
-                    raise Exception('Nodes have requested specific different compilers',
-                            (c_compiler, x_compiler))
+                    raise Exception('Nodes have requested specific'
+                                    ' different compilers',
+                                    (c_compiler, x_compiler))
        if (c_compiler is None):
            return cmodule.GCC_compiler
-        else: return c_compiler
+        else:
+            return c_compiler

    def header_dirs(self):
        """WRITEME
@@ -676,9 +762,12 @@ class CLinker(link.Linker):
        The return value will not contain duplicates.
        """
        ret = []
-        for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
-            try: ret += x.c_header_dirs()
-            except utils.MethodNotDefined: pass
+        for x in [y.type for y in self.variables] + [
+            y.op for y in self.node_order]:
+            try:
+                ret += x.c_header_dirs()
+            except utils.MethodNotDefined:
+                pass
        return list(set(ret))

    def libraries(self):
@@ -689,9 +778,12 @@ class CLinker(link.Linker):
        The return value will not contain duplicates.
        """
        ret = []
-        for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
-            try: ret += x.c_libraries()
-            except utils.MethodNotDefined: pass
+        for x in [y.type for y in self.variables] + [
+            y.op for y in self.node_order]:
+            try:
+                ret += x.c_libraries()
+            except utils.MethodNotDefined:
+                pass
        return list(set(ret))

    def lib_dirs(self):
@@ -702,12 +794,16 @@ class CLinker(link.Linker):
        The return value will not contain duplicates.
        """
        ret = []
-        for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
-            try: ret += x.c_lib_dirs()
-            except utils.MethodNotDefined: pass
+        for x in [y.type for y in self.variables] + [
+            y.op for y in self.node_order]:
+            try:
+                ret += x.c_lib_dirs()
+            except utils.MethodNotDefined:
+                pass
        return list(set(ret))

-    def __compile__(self, input_storage = None, output_storage = None, keep_lock=False):
+    def __compile__(self, input_storage=None,
+                    output_storage=None, keep_lock=False):
        """WRITEME
        Compiles this linker's env.

@@ -737,33 +833,37 @@ class CLinker(link.Linker):
                                    input_storage,
                                    output_storage,
                                    keep_lock=keep_lock)
-        return thunk, \
-            [link.Container(input, storage) for input, storage in izip(self.env.inputs, input_storage)], \
-            [link.Container(output, storage, True) for output, storage in izip(self.env.outputs, output_storage)], \
-            error_storage
+        return (thunk,
+                [link.Container(input, storage) for input, storage in
+                 izip(self.env.inputs, input_storage)],
+                [link.Container(output, storage, True) for output, storage in
+                 izip(self.env.outputs, output_storage)],
+                error_storage)

    def get_init_tasks(self):
        init_tasks = []
        tasks = []
-        id=1
+        id = 1
        for v in self.variables:
            if v in self.consts:
                continue
            if v in self.orphans and isinstance(v, graph.Constant):
                try:
-                    v.type.c_literal(v.data) #constant will be inlined, no need to get
+                    # constant will be inlined, no need to get
+                    v.type.c_literal(v.data)
                    continue
                except (utils.MethodNotDefined, NotImplementedError):
                    pass
            init_tasks.append((v, 'init', id))
-            tasks.append((v, 'get', id+1))
+            tasks.append((v, 'get', id + 1))
            id += 2
        for node in self.node_order:
            tasks.append((node, 'code', id))
            id += 1
        return init_tasks, tasks

-    def make_thunk(self, input_storage = None, output_storage = None, keep_lock=False):
+    def make_thunk(self, input_storage=None, output_storage=None,
+                   keep_lock=False):
        """WRITEME
        Compiles this linker's env and returns a function to perform the
        computations, as well as lists of storage cells for both the
@@ -787,16 +887,18 @@ class CLinker(link.Linker):
          first_output = ostor[0].data
        """
        init_tasks, tasks = self.get_init_tasks()
-        cthunk, in_storage, out_storage, error_storage = self.__compile__(input_storage, output_storage,
-                                                                          keep_lock=keep_lock)
-        res = _CThunk(cthunk, init_tasks, tasks, error_storage), in_storage, out_storage
-        return res
+        cthunk, in_storage, out_storage, error_storage = self.__compile__(
+            input_storage, output_storage,
+            keep_lock=keep_lock)
+
+        res = _CThunk(cthunk, init_tasks, tasks, error_storage)
+        return res, in_storage, out_storage

    def cmodule_key(self):
        """Return a complete hashable signature of the module we compiled.

-        This function must have the property that no two programs that compute different things
-        yield the same key.
+        This function must have the property that no two programs that
+        compute different things yield the same key.

        The key returned by this function is of the form (version, signature)
        The signature has the following form:
@@ -817,8 +919,9 @@ class CLinker(link.Linker):
        It is followed by elements for every node in the
        topological ordering of `self.env`.

-        If the Op of any Apply in the Env does not have c_code_cache_ok()==True, then this
-        function raises a KeyError exception.
+        If the Op of any Apply in the Env does not have
+        c_code_cache_ok()==True, then this function raises a KeyError
+        exception.

        Input Signature
        ---------------
@@ -828,10 +931,13 @@ class CLinker(link.Linker):
        type of the node input, and the nature of that input in the
        graph.

-        The nature of a typical variable is encoded by integer pairs ``((a,b),c)``:
-        ``a`` is the topological position of the input's owner (-1 for graph inputs),
+        The nature of a typical variable is encoded by integer pairs
+        ``((a,b),c)``:
+        ``a`` is the topological position of the input's owner
+              (-1 for graph inputs),
        ``b`` is the index of the variable in the owner's output list.
-        ``c`` is a flag indicating whether the variable is in the no_recycling set.
+        ``c`` is a flag indicating whether the variable is in the
+              no_recycling set.

        If a variable is also a graph output, then its position in the
        outputs list is also bundled with this tuple (after the b).
@@ -865,6 +971,7 @@ class CLinker(link.Linker):
                          libraries=self.libraries(),
                          header_dirs=self.header_dirs(),
                          )
+
    @staticmethod
    def cmodule_key_(env, no_recycling, compile_args=[], libraries=[],
                     header_dirs=[], insert_config_md5=True):
@@ -876,13 +983,14 @@ class CLinker(link.Linker):
        #set of variables that have been computed by nodes we have
        # seen 'so far' in the loop below
        env_computed_set = set()
-        env_inputs_dict = dict((i, (-1, pos)) for pos, i in enumerate(env.inputs))
+        env_inputs_dict = dict((i, (-1, pos)) for pos, i in
+                               enumerate(env.inputs))
        constant_ids = dict()
-        op_pos = {} # Apply -> topological position
+        op_pos = {}  # Apply -> topological position

        # First we put the header, compile_args, library names and config md5
        # into the signature.
-        sig = ['CLinker.cmodule_key'] # will be cast to tuple on return
+        sig = ['CLinker.cmodule_key']  # will be cast to tuple on return
        if compile_args is not None:
            # We must sort it as the order from a set is not guaranteed.
            # In  particular, 2 sets with the same content can give different
@@ -912,6 +1020,7 @@ class CLinker(link.Linker):
            sig.append('md5: <omitted>')

        error_on_play = [False]
+
        def in_sig(i, topological_pos, i_idx):
            # assert that every input to every node is one of'
            # - an env input
@@ -920,7 +1029,7 @@ class CLinker(link.Linker):

            # It is important that a variable (i)
            # yield a 'position' that reflects its role in code_gen()
-            if isinstance(i, graph.Constant): #orphans
+            if isinstance(i, graph.Constant):  # orphans
                if id(i) not in constant_ids:
                    isig = (i.signature(), topological_pos, i_idx)
                    # If the Theano constant provides a strong hash
@@ -933,7 +1042,8 @@ class CLinker(link.Linker):
                        isig = (isig[0].theano_hash(), topological_pos, i_idx)
                    try:
                        hash(isig)
-                    except Exception: #generic constants don't have a hashable signature
+                    except Exception:
+                        #generic constants don't have a hashable signature
                        error_on_play[0] = True
                        return None
                    constant_ids[id(i)] = isig
@@ -941,20 +1051,22 @@ class CLinker(link.Linker):
                    isig = constant_ids[id(i)]
                #print 'SIGNATURE', i.signature()
                #return i.signature()
-            elif i in env_inputs_dict:   #inputs
+            elif i in env_inputs_dict:  # inputs
                isig = env_inputs_dict[i]
            else:
                if i.owner is None:
-                    assert all( all(out is not None for out in o.outputs) for o in order)
-                    assert all( input.owner is None for input in env.inputs)
-                    raise Exception('what is this?', (i, type(i), i.clients, env))
+                    assert all(all(out is not None for out in o.outputs)
+                               for o in order)
+                    assert all(input.owner is None for input in env.inputs)
+                    raise Exception('what is this?', (i, type(i), i.clients,
+                                                      env))

                if i in env.outputs:
-                    isig = (op_pos[i.owner], # outputs
+                    isig = (op_pos[i.owner],  # outputs
                            i.owner.outputs.index(i),
                            env.outputs.index(i))
                else:
-                    isig = (op_pos[i.owner], i.owner.outputs.index(i)) # temps
+                    isig = (op_pos[i.owner], i.owner.outputs.index(i))  # temps
            return (isig, i in no_recycling)

        version = []
@@ -973,7 +1085,7 @@ class CLinker(link.Linker):
            sig.append((
                node.op,
                tuple((i.type, in_sig(i, node_pos, ipos))
-                    for ipos,i in enumerate(node.inputs)),
+                    for ipos, i in enumerate(node.inputs)),
                tuple(o in no_recycling for o in node.outputs)))

            if error_on_play[0]:
@@ -1026,7 +1138,9 @@ class CLinker(link.Linker):
        if compiler_name == 'NVCC_compiler' and config.lib.amdlibm:
            # This lib does not work correctly with nvcc in device code.
            # and newer version of g++ as 4.5.1.
-            # example of errors: "/usr/lib/gcc/x86_64-redhat-linux/4.5.1/include/mmintrin.h(49): error: identifier "__builtin_ia32_emms" is undefined"
+            # example of errors: "/usr/lib/gcc/x86_64-redhat-linux/4.5.1/
+            #                     include/mmintrin.h(49): error: identifier
+            #                     "__builtin_ia32_emms" is undefined"

            if '<amdlibm.h>' in mod.includes:
                mod.includes.remove('<amdlibm.h>')
@@ -1057,7 +1171,8 @@ class CLinker(link.Linker):
        yield module

    def build_dynamic_module(self):
-        """Return a cmodule.DynamicModule instance full of the code for our env.
+        """Return a cmodule.DynamicModule instance full of the code
+        for our env.
        """
        self.code_gen()
        module_name = self.hash
@@ -1065,13 +1180,16 @@ class CLinker(link.Linker):
        mod = cmodule.DynamicModule(module_name)

        # The code of instantiate
-        code = self.instantiate_code(1+len(self.args)) #the 1 is for error_storage
-        instantiate = cmodule.ExtFunction('instantiate', code, method=cmodule.METH_VARARGS)
+        # the 1 is for error_storage
+        code = self.instantiate_code(1 + len(self.args))
+        instantiate = cmodule.ExtFunction('instantiate', code,
+                                          method=cmodule.METH_VARARGS)
                #['error_storage'] + argnames,
                #local_dict = d,
                #global_dict = {})

-        # Static methods that can run and destroy the struct built by instantiate.
+        # Static methods that can run and destroy the struct built by
+        # instantiate.
        static = """
        int %(struct_name)s_executor(%(struct_name)s* self) {
            return self->run();
@@ -1086,7 +1204,7 @@ class CLinker(link.Linker):
            //printf("done cleanup\\n");
            //fflush(stdout);
        }
-        """ % dict(struct_name = self.struct_name)
+        """ % dict(struct_name=self.struct_name)

        # We add all the support code, compile args, headers and libs we need.
        for support_code in self.support_code() + self.c_support_code_apply:
@@ -1099,8 +1217,8 @@ class CLinker(link.Linker):

        return mod

-
-    def cthunk_factory(self, error_storage, in_storage, out_storage, keep_lock=False):
+    def cthunk_factory(self, error_storage, in_storage, out_storage,
+                       keep_lock=False):
        """WRITEME
        error_storage -> list of length 3
        in_storage -> list of lists of length 1, one per input
@@ -1120,18 +1238,22 @@ class CLinker(link.Linker):
            # If we can't get a key, then forget the cache mechanism.
            module = self.compile_cmodule()
        else:
-            module = get_module_cache().module_from_key(key=key, fn=self.compile_cmodule_by_step, keep_lock=keep_lock)
+            module = get_module_cache().module_from_key(
+                key=key, fn=self.compile_cmodule_by_step, keep_lock=keep_lock)

        vars = self.inputs + self.outputs + self.orphans
        # List of indices that should be ignored when passing the arguments
        # (basically, everything that the previous call to uniq eliminated)
-        dupidx = [i for i, x in enumerate(vars) if vars.count(x) > 1 and vars.index(x) != i]
+        dupidx = [i for i, x in enumerate(vars)
+                  if vars.count(x) > 1 and vars.index(x) != i]

-        out_storage = [x for i, x in enumerate(out_storage) if (i+len(in_storage)) not in dupidx]
+        out_storage = [x for i, x in enumerate(out_storage)
+                       if (i + len(in_storage)) not in dupidx]
        in_storage = [x for i, x in enumerate(in_storage) if i not in dupidx]
        orphd = [[orphan.data] for orphan in self.orphans]

-        ret = module.instantiate(error_storage, *(in_storage + out_storage + orphd))
+        ret = module.instantiate(error_storage, *(in_storage + out_storage +
+                                                  orphd))

        return ret

@@ -1150,6 +1272,7 @@ class CLinker(link.Linker):
        print >> code, "  return thunk; }"
        return code.getvalue()

+
 class _CThunk(object):
    """
    A thunk with a C implementation
@@ -1181,7 +1304,8 @@ class _CThunk(object):
        n = len(self.init_tasks)
        # note that the failure code is distributed in two lists
        if failure_code < 2 * n:
-            return [self.init_tasks, self.tasks][failure_code % 2][failure_code/2]
+            return [self.init_tasks, self.tasks][
+                failure_code % 2][failure_code / 2]
        else:
            return self.tasks[failure_code - n]

@@ -1199,19 +1323,16 @@ class _CThunk(object):
                    exc_value = exc_type(_exc_value, task, task.outputs)
                else:
                    exc_value = exc_type(_exc_value, task)
-                exc_value.__thunk_trace__ = trace # this can be used to retrieve the location the Op was declared
+                # this can be used to retrieve the location the Op was declared
+                exc_value.__thunk_trace__ = trace
            except Exception:
-                print >> sys.stderr, 'ERROR retrieving error_storage', self.error_storage
+                print >> sys.stderr, 'ERROR retrieving error_storage',
+                print >> sys.stderr, self.error_storage
                raise

-
-
            raise exc_type, exc_value, exc_trace


-
-
-
 class OpWiseCLinker(link.LocalLinker):
    """WRITEME
    Uses CLinker on the individual Ops that comprise an env and loops
@@ -1227,27 +1348,30 @@ class OpWiseCLinker(link.LocalLinker):
    If a Variable is in no_recycling, CLinker will clear the output storage
    associated to it prior to computation (to avoid reusing it).

-    :note: This is in a sense the 'default' linker for Theano.  The overhead of using the
-    OpWiseCLinker as compared with the CLinker is only noticeable for graphs of very small
-    tensors (such as 20 elements or less)
+    :note: This is in a sense the 'default' linker for Theano.  The
+    overhead of using the OpWiseCLinker as compared with the CLinker
+    is only noticeable for graphs of very small tensors (such as 20
+    elements or less)

    """

    __cache__ = {}

    def __init__(self,
-            fallback_on_perform = True,
-            allow_gc = True,
-            nice_errors = True):
+            fallback_on_perform=True,
+            allow_gc=True,
+            nice_errors=True):
        self.env = None
        self.fallback_on_perform = fallback_on_perform
        self.nice_errors = nice_errors
        self.allow_gc = allow_gc

-    def accept(self, env, no_recycling = []):
+    def accept(self, env, no_recycling=[]):
        if self.env is not None and self.env is not env:
-            return type(self)(self.fallback_on_perform).accept(env, no_recycling)
-            #raise Exception("Cannot accept from a Linker that is already tied to another Env.")
+            return type(self)(self.fallback_on_perform).accept(env,
+                                                               no_recycling)
+            #raise Exception("Cannot accept from a Linker that is
+            #already tied to another Env.")
        self.env = env
        self.no_recycling = no_recycling
        return self

--- a/theano/gof/vm.py
+++ b/theano/gof/vm.py
@@ -5,14 +5,12 @@ import logging
 import sys
 import time
 import link
-import traceback
 from theano.gof.python25 import all

 import theano
 config = theano.config

 from theano.configparser import config, AddConfigVar, BoolParam
-from theano import config

 logger = logging.getLogger(__name__)

@@ -33,13 +31,13 @@ class VM(object):
        number of times thunks[i] was called in the course of computations
        performed by call_with_timers().

-    call_times - list of floats, one for each thunk. call_times[i] is the amount
-        of runtime spent on thunks[i] in the course of computations performed by
-        call_with_timers().
+    call_times - list of floats, one for each thunk. call_times[i] is
+        the amount of runtime spent on thunks[i] in the course of
+        computations performed by call_with_timers().

-    need_update_inputs - bool. True indicates that Function.__call__ must
-        implement the feedback from output storage  to input storage. False means
-        it *must not* repeat that feedback.
+    need_update_inputs - bool. True indicates that Function.__call__
+        must implement the feedback from output storage to input
+        storage. False means it *must not* repeat that feedback.

    """
    def __init__(self, nodes, thunks, pre_call_clear):
@@ -58,8 +56,8 @@ class VM(object):
        self.nodes = nodes
        self.thunks = thunks
        self.pre_call_clear = pre_call_clear
-        self.call_counts = [0]*len(nodes)
-        self.call_times = [0]*len(nodes)
+        self.call_counts = [0] * len(nodes)
+        self.call_times = [0] * len(nodes)
        self.time_thunks = False

        # This variable (self.need_update_inputs) is overshadowed by
@@ -88,14 +86,15 @@ class VM(object):

    def update_profile(self, profile):
        # accumulate into the profile object
-        for node, thunk, t, c in zip(self.nodes, self.thunks, self.call_times, self.call_counts):
-            profile.apply_time.setdefault(node,0.0)
+        for node, thunk, t, c in zip(self.nodes, self.thunks,
+                                     self.call_times, self.call_counts):
+            profile.apply_time.setdefault(node, 0.0)
            profile.apply_time[node] += t

-            profile.apply_callcount.setdefault(node,0)
+            profile.apply_callcount.setdefault(node, 0)
            profile.apply_callcount[node] += c

-            profile.apply_cimpl[node] = hasattr(thunk,'cthunk')
+            profile.apply_cimpl[node] = hasattr(thunk, 'cthunk')

        # clear the timer info out of the buffers
        for i in xrange(len(self.call_times)):
@@ -113,7 +112,8 @@ class Loop(VM):
            for cont in self.pre_call_clear:
                cont[0] = None
            try:
-                for i, (thunk, node) in enumerate(zip(self.thunks, self.nodes)):
+                for i, (thunk, node) in enumerate(zip(self.thunks,
+                                                      self.nodes)):
                    t0 = time.time()
                    thunk()
                    t1 = time.time()
@@ -141,13 +141,16 @@ class LoopGC(VM):
        self.post_thunk_clear = post_thunk_clear
        if not (len(nodes) == len(thunks) == len(post_thunk_clear)):
            raise ValueError()
+
    def __call__(self):
        if self.time_thunks:
            for cont in self.pre_call_clear:
                cont[0] = None
            try:
                i = 0
-                for thunk, node, old_storage in zip(self.thunks, self.nodes, self.post_thunk_clear):
+                for thunk, node, old_storage in zip(self.thunks,
+                                                    self.nodes,
+                                                    self.post_thunk_clear):
                    t0 = time.time()
                    thunk()
                    t1 = time.time()
@@ -162,7 +165,8 @@ class LoopGC(VM):
            for cont in self.pre_call_clear:
                cont[0] = None
            try:
-                for thunk, node, old_storage in zip(self.thunks, self.nodes, self.post_thunk_clear):
+                for thunk, node, old_storage in zip(self.thunks, self.nodes,
+                                                    self.post_thunk_clear):
                    thunk()
                    for old_s in old_storage:
                        old_s[0] = None
@@ -200,8 +204,8 @@ class Stack(VM):

        for i, node in enumerate(self.nodes):
            node_idx[node] = i
-            self.apply_time[node]     = 0
-            self.outputs_size[node]   = []
+            self.apply_time[node] = 0
+            self.outputs_size[node] = []
            node.destroy_dependencies = []
            if node in ords:
                for prereq in ords[node]:
@@ -217,9 +221,9 @@ class Stack(VM):
                    if cl[0] is not 'output':
                        ls += cl[0].outputs
                dependencies[k] += ls
-
        if config.profile:
-            self.memory_size_map = {"nt8": 1, "t16": 2, "t32": 4, "t64": 8, "128": 16}
+            self.memory_size_map = {"nt8": 1, "t16": 2, "t32": 4,
+                                    "t64": 8, "128": 16}
            atexit.register(self.atexit_print_all)

    def run_thunk_of_node(self, node):
@@ -257,11 +261,13 @@ class Stack(VM):
        last_apply_stack_len = -1
        ls = []
        while apply_stack:
-            # Make sure something happened last time round.
-            # This is just a safety check to make sure the op is written correctly
-            # apply_stack should either decrease in length by one (a thunk successfully applied), or
-            # increase in length (added dependencies over and above the original).
-            # NB: this doesn't catch cycles (would be too expensive/slow), just stalls.
+            # Make sure something happened last time round.  This is
+            # just a safety check to make sure the op is written
+            # correctly apply_stack should either decrease in length
+            # by one (a thunk successfully applied), or increase in
+            # length (added dependencies over and above the original).
+            # NB: this doesn't catch cycles (would be too expensive/slow),
+            #     just stalls.
            apply_stack_len = len(apply_stack)
            assert apply_stack_len != last_apply_stack_len
            last_apply_stack_len = apply_stack_len
@@ -289,8 +295,8 @@ class Stack(VM):
            if not thunks[self.node_idx[current_apply]].lazy:
                # Check if all inputs are in place
                # If so compute thunk and remove it from the apply_stack
-                # If not leave it in, and add to the apply_stack those that will
-                # produce you those inputs
+                # If not leave it in, and add to the apply_stack those
+                # that will produce you those inputs

                if computed_ins and not computed_outs:
                    try:
@@ -302,22 +308,26 @@ class Stack(VM):
                            # ?? What about inplace .. if the op is inplace
                            # you don't actually ask for more memory!
                            size = []
-                            for (idx,o) in enumerate(
-                                    thunks[self.node_idx[current_apply]].outputs):
-                                if not hasattr(o[0],'size'):
+                            for (idx, o) in enumerate(
+                                    thunks[self.node_idx[
+                                        current_apply]].outputs):
+                                if not hasattr(o[0], 'size'):
                                    size.append(-1)
                                    continue
-                                s=o[0].size
+                                s = o[0].size
                                dtype = str(o[0].dtype)
                                dtype2 = dtype[-3:]
-                                s *= self.memory_size_map[dtype2] # KeyError here: couldn't determine the dtype memory size
+                                # KeyError here: couldn't determine
+                                # the dtype memory size
+                                s *= self.memory_size_map[dtype2]
                                size.append(s)
                            self.outputs_size[current_apply] = size
                    except Exception:
                        raise_with_op(current_apply)
                    for o in current_apply.outputs:
                        compute_map[o][0] = 1
-                    # Garbage Collection -> check if anybody else uses this input
+                    # Garbage Collection -> check if anybody else uses
+                    # this input
                    if self.allow_gc:
                        for i in current_apply.inputs:
                            if (dependencies[i] and i.owner
@@ -332,8 +342,11 @@ class Stack(VM):
                elif not computed_ins:

                    apply_stack.append(current_apply)
-                    apply_stack.extend(inp.owner for inp in current_apply.inputs if inp.owner)
-                    apply_stack.extend(inp.owner for inp in current_apply.destroy_dependencies if inp.owner)
+                    apply_stack.extend(inp.owner for inp
+                                       in current_apply.inputs if inp.owner)
+                    apply_stack.extend(inp.owner for inp
+                                       in current_apply.destroy_dependencies
+                                       if inp.owner)

            elif not computed_outs:
                # Try and run it to see if it works
@@ -346,22 +359,26 @@ class Stack(VM):

                if requires:
                    for r in requires:
-                        # We are not done with this op ..
-                        # so we added back and see to get the inputs we are missing
+                        # We are not done with this op ..  so we added
+                        # back and see to get the inputs we are
+                        # missing
                        apply_stack.append(current_apply)
                        if current_apply.inputs[r].owner:
                            apply_stack.append(current_apply.inputs[r].owner)
                else:
                    if config.profile:
                        size = []
-                        for (idx,o) in enumerate(thunks[self.node_idx[current_apply]].outputs):
+                        for (idx, o) in enumerate(thunks[
+                                self.node_idx[current_apply]].outputs):
                            if not hasattr(o[0], 'size'):
                                size.append(-1)
                                continue
                            s=o[0].size
                            dtype = str(o[0].dtype)
                            dtype2 = dtype[-2:]
-                            s *= self.memory_size_map[dtype2] # KeyError here: couldn't determine the dtype memory size
+                            # KeyError here: couldn't determine the
+                            # dtype memory size
+                            s *= self.memory_size_map[dtype2]
                            size.append(s)
                        self.outputs_size[current_apply] = size
                    if self.allow_gc:
@@ -379,6 +396,7 @@ class Stack(VM):

 try:
    import lazylinker_c
+
    class CVM(lazylinker_c.CLazyLinker, VM):
        def __init__(self, *args, **kwargs):
            lazylinker_c.CLazyLinker.__init__(self, *args, **kwargs)
@@ -394,9 +412,9 @@ class VM_Linker(link.LocalLinker):

    def __init__(self, allow_gc=True, use_cloop=False, callback=None):
        """
-        allow_gc - force the virtual machine to clean up unnecessary references,
-            in order to allow garbage collection on intermediate values during
-            computation of a function.
+        allow_gc - force the virtual machine to clean up unnecessary
+            references, in order to allow garbage collection on
+            intermediate values during computation of a function.

        use_cloop - use the C-based virtual machine if possible

@@ -411,9 +429,10 @@ class VM_Linker(link.LocalLinker):
        self.callback = callback
        self.updated_vars = {}

-    def accept(self, env, no_recycling = []):
+    def accept(self, env, no_recycling=[]):
        """
-        :param env: a PerformLinker can have accepted one Env instance at a time.
+        :param env: a PerformLinker can have accepted one Env instance
+            at a time.

        :param no_recycling: WRITEME

@@ -464,9 +483,9 @@ class VM_Linker(link.LocalLinker):

            nodes_idx_inv = {}
            vars_idx_inv = {}
-            for (node,i) in nodes_idx.items():
+            for (node, i) in nodes_idx.items():
                nodes_idx_inv[i] = node
-            for (var,i) in vars_idx.items():
+            for (var, i) in vars_idx.items():
                vars_idx_inv[i] = var

            # put storage_map and compute_map into a int-based scheme
@@ -496,8 +515,8 @@ class VM_Linker(link.LocalLinker):
                base_input_output_list.extend(outputs_idx)

            # build the var owner array
-            var_owner = [None]*len(vars_idx)
-            for (var,i) in vars_idx.items():
+            var_owner = [None] * len(vars_idx)
+            for (var, i) in vars_idx.items():
                if var.owner:
                    var_owner[i] = nodes_idx[var.owner]

@@ -511,18 +530,18 @@ class VM_Linker(link.LocalLinker):
            for i, node in enumerate(nodes):
                node_output_size.append(0)
                prereq_var_idxs = []
-                for prereq_node in ords.get(node,[]):
+                for prereq_node in ords.get(node, []):
                    prereq_var_idxs.extend(
                            [vars_idx[v] for v in prereq_node.outputs])
                prereq_var_idxs = list(set(prereq_var_idxs))
-                prereq_var_idxs.sort() # TODO: why sort?
+                prereq_var_idxs.sort()  # TODO: why sort?
                node_prereqs.append(prereq_var_idxs)

            update_storage = []
            for (ivar, ovar) in updated_vars.items():
                if ivar != ovar:
-                    update_storage.append(vars_idx[ivar]) #dst
-                    update_storage.append(vars_idx[ovar]) #src
+                    update_storage.append(vars_idx[ivar])  # dst
+                    update_storage.append(vars_idx[ovar])  # src

            c0 = sys.getrefcount(node_n_inputs)
            vm = CVM(
@@ -530,8 +549,8 @@ class VM_Linker(link.LocalLinker):
                    thunks,
                    pre_call_clear,
                    allow_gc=self.allow_gc,
-                    call_counts=[0]*len(nodes),
-                    call_times=[0.0]*len(nodes),
+                    call_counts=[0] * len(nodes),
+                    call_times=[0.0] * len(nodes),
                    compute_map_list=compute_map_list,
                    storage_map_list=storage_map_list,
                    base_input_output_list=base_input_output_list,
@@ -569,7 +588,7 @@ class VM_Linker(link.LocalLinker):
                        )
        return vm

-    def make_all(self, profiler = None, input_storage = None,
+    def make_all(self, profiler=None, input_storage=None,
            output_storage = None,
            ):
        env = self.env
@@ -617,4 +636,3 @@ class VM_Linker(link.LocalLinker):
                    for output, storage in zip(env.outputs, output_storage)],
                thunks,
                order)
-
--- a/theano/sandbox/cuda/__init__.py
+++ b/theano/sandbox/cuda/__init__.py
@@ -346,7 +346,9 @@ def handle_shared_float32(tf):
        theano.compile.shared_constructor(float32_shared_constructor)

    else:
-        raise NotImplementedError('removing our handler')
+        theano.compile.shared_constructor(float32_shared_constructor, True)
+        assert (float32_shared_constructor not in
+                theano.compile.shared.constructors)

 # We can't test the driver during import here as this cause circular
 # import dependency. So we also test it in the file theano/__init__.py

--- a/theano/sparse/sandbox/sp2.py
+++ b/theano/sparse/sandbox/sp2.py
 from theano.sparse.basic import * # To facilitate later merge into sparse module
-from theano.sparse.basic import _is_sparse, _is_sparse_variable, \
-    _is_dense_variable, _is_sparse, _is_dense, _kmap_eq, _kmap_hash
+from theano.sparse.basic import (
+    _is_sparse, _is_sparse_variable, _is_dense_variable,
+    _is_sparse, _is_dense, _kmap_eq, _kmap_hash)
+

 class Cast(gof.op.Op):
    def __init__(self, out_type):
        self.out_type = out_type
+
    def __eq__(self, other):
        return (type(self) == type(other)) and self.out_type == other.out_type
+
    def __hash__(self):
        return hash(type(self)) ^ hash(self.out_type)
+
    def make_node(self, x):
        x = as_sparse_variable(x)
        return gof.Apply(self, [x],
            [SparseType(dtype=self.out_type, format=x.format).make_variable()])
+
    def perform(self, node, (x, ), (out, )):
        assert _is_sparse(x)
        out[0] = x
@@ -20,95 +26,118 @@ class Cast(gof.op.Op):
 fcast = Cast('float32')
 dcast = Cast('float64')

+
 class Poisson(gof.op.Op):
    def __eq__(self, other):
        return (type(self) == type(other))
+
    def __hash__(self):
        return hash(type(self))
+
    def make_node(self, x):
        x = as_sparse_variable(x)
        return gof.Apply(self, [x], [x.type()])
+
    def perform(self, node, (x, ), (out, )):
        assert _is_sparse(x)
        out[0] = x.copy()
-        out[0].data = numpy.asarray(numpy.random.poisson(out[0].data), dtype=x.dtype)
+        out[0].data = numpy.asarray(numpy.random.poisson(out[0].data),
+                                    dtype=x.dtype)
        out[0].eliminate_zeros()
 poisson = Poisson()

+
 class Multinomial(gof.op.Op):
    def __eq__(self, other):
        return (type(self) == type(other))
+
    def __hash__(self):
        return hash(type(self))
+
    def make_node(self, n, p):
        n = tensor.as_tensor_variable(n)
        p = as_sparse_variable(p)
-        
+
        return gof.Apply(self, [n, p], [p.type()])
+
    def perform(self, node, (n, p), (out, )):
        assert _is_sparse(p)
-        
+
        if p.format != 'csr':
            raise NotImplemented()
-        
+
        out[0] = p.copy()
        for i in xrange(p.shape[0]):
-            k, l = p.indptr[i], p.indptr[i+1]
+            k, l = p.indptr[i], p.indptr[i + 1]
            out[0].data[k:l] = numpy.random.multinomial(n[i], p.data[k:l])
 multinomial = Multinomial()

+
 class EliminateZeros(gof.op.Op):
    def __eq__(self, other):
        return (type(self) == type(other))
+
    def __hash__(self):
        return hash(type(self))
+
    def make_node(self, x):
        x = as_sparse_variable(x)
        return gof.Apply(self, [x], [x.type()])
+
    def perform(self, node, (x, ), (out, )):
        assert _is_sparse(x)
        out[0] = x.copy()
        out[0].eliminate_zeros()
 eliminate_zeros = EliminateZeros()

+
 class Sum(gof.op.Op):
    def __eq__(self, other):
        return (type(self) == type(other))
+
    def __hash__(self):
        return hash(type(self))
+
    def make_node(self, x, a):
        x = as_sparse_variable(x)
        a = tensor.as_tensor_variable(a)
-        return gof.Apply(self, [x, a], [tensor.TensorType(dtype = x.type.dtype,
-                        broadcastable = (False,)).make_variable()])
+        return gof.Apply(self, [x, a], [tensor.TensorType(dtype=x.type.dtype,
+                        broadcastable=(False,)).make_variable()])
+
    def perform(self, node, (x, a), (out, )):
        assert _is_sparse(x)
        out[0] = numpy.asarray(x.sum(a), dtype=x.dtype).flatten()
 sum = Sum()

+
 class Binomial(gof.op.Op):
    def __init__(self, format, dtype):
        self.format = format
        self.dtype = dtype
+
    def __eq__(self, other):
-        return (type(self) == type(other)) and self.format == other.format and \
-            self.dtype == other.dtype
+        return ((type(self) == type(other)) and
+                self.format == other.format and
+                self.dtype == other.dtype)
+
    def __hash__(self):
        return hash(type(self)) ^ hash(self.format) ^ hash(self.dtype)
+
    def make_node(self, n, p, shape):
        n = tensor.as_tensor_variable(n)
        p = tensor.as_tensor_variable(p)
        shape = tensor.as_tensor_variable(shape)
-        return gof.Apply(self, [n, p, shape], [SparseType(dtype = self.dtype,
-                                 format = self.format).make_variable()])
+        return gof.Apply(self, [n, p, shape], [SparseType(dtype=self.dtype,
+                                 format=self.format).make_variable()])
+
    def perform(self, node, (n, p, shape, ), (out, )):
        N = n * p * shape[0] * shape[1]
        data = numpy.ones(N, dtype=self.dtype)
        row = numpy.random.randint(0, shape[0], N)
        col = numpy.random.randint(0, shape[1], N)
-        
+
        res = scipy.sparse.coo_matrix((data, (row, col)), shape=shape)
-        
+
        out[0] = getattr(res, 'to' + self.format)()
        out[0].data = numpy.ones_like(out[0].data)
 csr_fbinomial = Binomial('csr', 'float32')
@@ -116,16 +145,17 @@ csc_fbinomial = Binomial('csc', 'float32')
 csr_dbinomial = Binomial('csr', 'float64')
 csc_dbinomial = Binomial('csc', 'float64')

+
 def structured_sigmoid(x):
    """
    Element-wise sigmoid function only to the non-zero elements.
    """
    x = as_sparse_variable(x)
-    
+
    x_data, x_ind, x_ptr, x_shape = csm_properties(x)
-    
+
    x_data = tensor.nnet.sigmoid(x_data)
-    
+
    return CSR(x_data, x_ind, x_ptr, x_shape)


@@ -134,11 +164,11 @@ def structured_exp(x):
    Element-wise exponential function to the non-zero elements.
    """
    x = as_sparse_variable(x)
-    
+
    x_data, x_ind, x_ptr, x_shape = csm_properties(x)
-    
+
    x_data = tensor.exp(x_data)
-    
+
    return CSR(x_data, x_ind, x_ptr, x_shape)


@@ -162,13 +192,13 @@ def structured_minimum(x, y):
    Element-wise minimum function only to non-zero elements.
    """
    x = as_sparse_variable(x)
-    
+
    y = tensor.as_tensor_variable(y)
-    
+
    x_data, x_ind, x_ptr, x_shape = csm_properties(x)
-    
+
    x_data = tensor.minimum(x_data, y)
-    
+
    return CSR(x_data, x_ind, x_ptr, x_shape)


@@ -179,24 +209,28 @@ class StructuredAddSV(gof.op.Op):
    matrix.'''
    def __eq__(self, other):
        return (type(self) == type(other))
+
    def __hash__(self):
        return hash(type(self))
+
    def make_node(self, x, y):
        x = as_sparse_variable(x)
        y = tensor.as_tensor_variable(y)
-        
+
        assert y.type.ndim == 1
-        
+
        if x.type.dtype != y.type.dtype:
            raise NotImplementedError()
        return gof.Apply(self,
                         [x, y],
-                         [SparseType(dtype = x.type.dtype,
-                                 format = x.type.format).make_variable()])
+                         [SparseType(dtype=x.type.dtype,
+                                 format=x.type.format).make_variable()])
+
    def perform(self, node, (x, y), (out, )):
        assert _is_sparse(x) and not _is_sparse(y)
        assert x.shape[1] == y.shape[0]
        out[0] = x.__class__(x + (x.toarray() != 0) * y)
+
    def grad(self, (x, y), (gz,)):
        assert _is_sparse_variable(x) and _is_sparse_variable(y)
        assert _is_sparse_variable(gz)
@@ -207,14 +241,18 @@ structured_add_s_v = StructuredAddSV()
 class StrucutedAddSVCSR(gof.Op):
    def __eq__(self, other):
        return (type(self) == type(other))
+
    def __hash__(self):
        return hash(type(self))
+
    def make_node(self, a_data, a_indices, a_indptr, b):
        assert b.type.ndim == 1
        return gof.Apply(self, [a_data, a_indices, a_indptr, b],
                               [tensor.tensor(b.dtype, (False,))])
-    def c_code(self, node, name, (_data, _indices, _indptr, _b,), (_zout, ), sub):

+    def c_code(self, node, name, inputs, outputs, sub):
+        _data, _indices, _indptr, _b, = inputs
+        _zout, = outputs
        if node.inputs[0].type.dtype in ('complex64', 'complex128'):
            raise NotImplementedError('Complex types are not supported for a')
        if node.inputs[3].type.dtype in ('complex64', 'complex128'):
@@ -272,98 +310,105 @@ class StrucutedAddSVCSR(gof.Op):
            }
        }

-        """% dict(locals(), **sub)
+        """ % dict(locals(), **sub)
 structured_add_s_v_csr = StrucutedAddSVCSR()

+
 @gof.local_optimizer([structured_add_s_v])
 def local_structured_add_s_v(node):
    if node.op == structured_add_s_v:
        x, y = node.inputs
-        
+
        x_is_sparse_variable = _is_sparse_variable(x)
-        y_is_sparse_variable = _is_sparse_variable(y)
-        
+        #y_is_sparse_variable = _is_sparse_variable(y)
+
        if x_is_sparse_variable:
-          svar = x
-          dvar = y
+            svar = x
+            dvar = y
        else:
-          svar = y
-          dvar = x
-        
+            svar = y
+            dvar = x
+
        if dvar.type.ndim != 1:
            return False
        elif svar.type.format == 'csr':
-          CSx = CSR
-          structured_add_s_v_csx = structured_add_s_v_csr
+            CSx = CSR
+            structured_add_s_v_csx = structured_add_s_v_csr
        else:
            raise NotImplemented()
-        
+
        s_val, s_ind, s_ptr, s_shape = csm_properties(svar)
-        
+
        c_data = structured_add_s_v_csx(s_val, s_ind, s_ptr, dvar)
-        
+
        return [CSx(c_data, s_ind, s_ptr, s_shape)]
-    
+
    return False
 register_specialize(local_structured_add_s_v)


 class SamplingDot(gof.op.Op):
    """
-    Operand for calculating the dot product DOT(X, Y) = Z when you only want to calculate
-    a subset of Z. It is equivalent to P o (X . Y) where o is the element-wise product, X and Y operands of
-    the dot product and P is a matrix that contains 1 when the corresponding element of Z should be calculated
-    and 0 when it shouldn't. Note that SamplingDot has a different interface than DOT because SamplingDot
-    requires X to be a MxK matrix while Y is a NxK matrix instead of the usual KxN matrix.
-
-    It will work if the pattern is not binary value, but if the pattern doesn't have a high sparsity proportion
-    it will be slower then a more optimized dot followed by a normal elemwise multiplication.
+    Operand for calculating the dot product DOT(X, Y) = Z when you
+    only want to calculate a subset of Z. It is equivalent to P o (X
+    . Y) where o is the element-wise product, X and Y operands of the
+    dot product and P is a matrix that contains 1 when the
+    corresponding element of Z should be calculated and 0 when it
+    shouldn't. Note that SamplingDot has a different interface than
+    DOT because SamplingDot requires X to be a MxK matrix while Y is a
+    NxK matrix instead of the usual KxN matrix.
+
+    It will work if the pattern is not binary value, but if the
+    pattern doesn't have a high sparsity proportion it will be slower
+    then a more optimized dot followed by a normal elemwise
+    multiplication.

    """
    def __eq__(self, other):
        return type(self) == type(other)
-    
+
    def __hash__(self):
        return hash(type(self))
-        
+
    def __str__(self):
        return 'SamplingDot'
-    
+
    def make_node(self, x, y, p):
        x = tensor.as_tensor_variable(x)
        y = tensor.as_tensor_variable(y)
-        
+
        if not _is_sparse_variable(p):
            raise TypeError(p)

        dtype_out = scalar.upcast(x.type.dtype, y.type.dtype, p.type.dtype)
-        
+
        return gof.Apply(self, [x, y, p], [p.type()])
-    
+
    def perform(self, node, (x, y, p), (out,)):
        if _is_sparse_variable(x):
            raise TypeError(x)
-        
+
        if _is_sparse_variable(y):
            raise TypeError(y)
-        
+
        if not _is_sparse(p):
            raise TypeError(p)
-        
+
        rval = p.__class__(p.multiply(numpy.dot(x, y.T)))
-        
+
        out[0] = rval
-    
+
    def grad(self, (x, y, p), (gz,)):
        rval = [
            dot(gz, y),
            dot(gz.T, x),
            None
        ]
-        
+
        return rval
 sampling_dot = SamplingDot()

+
 class SamplingDotCsr(gof.Op):
    """
    Optimized SamplingDot when the pattern P is a CSR matrix.
@@ -374,13 +419,13 @@ class SamplingDotCsr(gof.Op):
    """
    def __eq__(self, other):
        return type(self) == type(other)
-    
+
    def __hash__(self):
        return hash(type(self))
-        
+
    def __str__(self):
        return 'SamplingDot{Csr}'
-    
+
    def make_node(self, x, y, p_data, p_ind, p_ptr, p_ncols):
        x = tensor.as_tensor_variable(x)
        y = tensor.as_tensor_variable(y)
@@ -388,12 +433,13 @@ class SamplingDotCsr(gof.Op):
        p_ind = tensor.as_tensor_variable(p_ind)
        p_ptr = tensor.as_tensor_variable(p_ptr)
        p_ncols = tensor.as_tensor_variable(p_ncols)
-        
+
        assert p_ncols.dtype == 'int32'
-        
-        dtype_out = scalar.upcast(x.type.dtype, y.type.dtype, p_data.type.dtype)
+
+        dtype_out = scalar.upcast(x.type.dtype, y.type.dtype,
+                                  p_data.type.dtype)
        dot_out = scalar.upcast(x.type.dtype, y.type.dtype)
-        
+
        # We call blas ?dot function that take only param of the same type
        x = tensor.cast(x, dot_out)
        y = tensor.cast(y, dot_out)
@@ -406,7 +452,7 @@ class SamplingDotCsr(gof.Op):

    def c_support_code(self):
        return blas.blas_header_text()
-    
+
    def c_libraries(self):
        import pdb; pdb.set_trace()
        return blas.ldflags()
@@ -416,19 +462,24 @@ class SamplingDotCsr(gof.Op):

    def c_lib_dirs(self):
        return blas.ldflags(libs=False, libs_dir=True)
-    
+
    def c_header_dirs(self):
        return blas.ldflags(libs=False, include_dir=True)
-    
-    def c_code(self, node, name, (x, y, p_data, p_ind, p_ptr, p_ncols), (z_data, z_ind, z_ptr), sub):
+
+    def c_code(self, node, name, inputs, outputs, sub):
+        x, y, p_data, p_ind, p_ptr, p_ncols = inputs
+        z_data, z_ind, z_ptr = outputs
        if node.inputs[0].type.dtype in ('complex64', 'complex128'):
            raise NotImplementedError('Complex types are not supported for x')
        if node.inputs[1].type.dtype in ('complex64', 'complex128'):
            raise NotImplementedError('Complex types are not supported for y')
        if node.inputs[2].type.dtype in ('complex64', 'complex128'):
-            raise NotImplementedError('Complex types are not supported for pattern')
-        
-        dot_out = scalar.upcast(node.inputs[0].type.dtype, node.inputs[0].type.dtype)
+            raise NotImplementedError(
+                'Complex types are not supported for pattern')
+
+        # TODO: why 2 times the same inputs?
+        dot_out = scalar.upcast(node.inputs[0].type.dtype,
+                                node.inputs[0].type.dtype)

        if dot_out == "float32":
            conv_type = "float"
@@ -436,13 +487,17 @@ class SamplingDotCsr(gof.Op):
        else:
            conv_type = "double"
            cdot = "ddot_sub_"
-        
-        typenum_x = node.inputs[0].type.dtype_specs()[-1] # retrieve dtype number
-        typenum_y = node.inputs[1].type.dtype_specs()[-1] # retrieve dtype number
-        typenum_p = node.inputs[2].type.dtype_specs()[-1] # retrieve dtype number
-        typenum_zd = tensor.TensorType(node.outputs[0].dtype, []).dtype_specs()[-1] # retrieve dtype number
-        typenum_zi = tensor.TensorType(node.outputs[1].dtype, []).dtype_specs()[-1] # retrieve dtype number
-        typenum_zp = tensor.TensorType(node.outputs[2].dtype, []).dtype_specs()[-1] # retrieve dtype number
+
+        # retrieve dtype number
+        typenum_x = node.inputs[0].type.dtype_specs()[-1]
+        typenum_y = node.inputs[1].type.dtype_specs()[-1]
+        typenum_p = node.inputs[2].type.dtype_specs()[-1]
+        typenum_zd = tensor.TensorType(node.outputs[0].dtype,
+                                       []).dtype_specs()[-1]
+        typenum_zi = tensor.TensorType(node.outputs[1].dtype,
+                                       []).dtype_specs()[-1]
+        typenum_zp = tensor.TensorType(node.outputs[2].dtype,
+                                       []).dtype_specs()[-1]

        rval = """
        if (%(x)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(x) != 2"); %(fail)s;}
@@ -529,13 +584,14 @@ class SamplingDotCsr(gof.Op):
                    
                    Dzd[n_idx * Sdzd] *= Dpd[n_idx * Sdpd];
                }
-            }            
+            }
        }
-        """% dict(locals(), **sub)
+        """ % dict(locals(), **sub)

        return rval
 sampling_dot_csr = SamplingDotCsr()

+
 # register a specialization to replace SamplingDot -> SamplingDotCsr
 @gof.local_optimizer([sampling_dot])
 def local_sampling_dot_csr(node):
@@ -543,10 +599,10 @@ def local_sampling_dot_csr(node):
        x, y, p = node.inputs
        if p.type.format == 'csr':
            p_data, p_ind, p_ptr, p_shape = csm_properties(p)
-            
+
            z_data, z_ind, z_ptr = sampling_dot_csr(x, y, p_data,
                p_ind, p_ptr, p_shape[1])
-            
+
            return [CSR(z_data, z_ind, z_ptr, p_shape)]
    return False
 register_specialize(local_sampling_dot_csr, name='local_sampling_dot_csr')