Merge pull request #630 from nouiz/pycuda_init

Pycuda init

Merge pull request #630 from nouiz/pycuda_init
42809e8b · lamblin · cbe15896 · 4d93ae26 · 42809e8b · 42809e8b
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -22,6 +22,8 @@ Bug fixes
 * Fixed many subtle bugs involving mutable default arguments which may have
   led to unexpected behaviour, such as objects sharing instance variables
   they were not supposed to share. (David W-F)
+ * Correctly record the GPU device number used when we let the driver select it.
+   (Frederic B.)
 Documentation
 * Added in the tutorial documentation on how to extend Theano.

--- a/theano/compile/tests/test_builders.py
+++ b/theano/compile/tests/test_builders.py
@@ -22,9 +22,9 @@ class T_OpFromGraph(unittest.TestCase):
        xv = numpy.ones((2, 2), dtype=config.floatX)
        yv = numpy.ones((2, 2), dtype=config.floatX)*3
        zv = numpy.ones((2, 2), dtype=config.floatX)*5
-        print function, function.__module__
+        #print function, function.__module__
-        print fn.maker.env.toposort()
+        #print fn.maker.env.toposort()
-        print fn(xv, yv, zv)
+        fn(xv, yv, zv)
        assert numpy.all(8.0 == fn(xv, yv, zv))
        assert numpy.all(8.0 == fn(xv, yv, zv))

--- a/theano/compile/tests/test_debugmode.py
+++ b/theano/compile/tests/test_debugmode.py
@@ -13,7 +13,7 @@ import unittest
 def test0():
    x = theano.tensor.dvector()
    f = theano.function([x], ((2. * x) + 7) / 2., mode=debugmode.DebugMode())
-    print f([1, 2])
+    f([1, 2])
 class BROKEN_ON_PURPOSE_Add(gof.Op):
@@ -211,7 +211,7 @@ def test_badclinkeroutput():
    try:
        f_inconsistent([1.0, 2.0, 3.0], [2, 3, 4])
    except debugmode.BadCLinkerOutput, e:
-        print repr(e)
+        #print repr(e)
        assert e.r.owner.op is inconsistent
        return  # TEST PASS
@@ -490,7 +490,7 @@ class Test_ViewMap(unittest.TestCase):
            f([1, 2, 3, 4], [5, 6, 7, 8])
            assert False  # DebugMode should have caught the error
        except debugmode.BadViewMap, e:
-            print e
+            #print e
            pass
        # the situation can be rescued by picking one of the inputs and
@@ -554,7 +554,7 @@ class Test_check_isfinite(unittest.TestCase):
        #inf should go through
        infs = numpy.asarray([1.0, 1., 1.]) / 0
-        print infs
+        #print infs
        f(infs)
        return
@@ -576,11 +576,11 @@ class BrokenCImplementationAdd(gof.Op):
        return r
    def perform(self, node, inp, out_):
-        print 'executing python perform'
+        #print 'executing python perform'
        a, b = inp
        out, = out_
        z = a + b
-        print 'out[0] was:', out[0]
+        #print 'out[0] was:', out[0]
        out[0] = z
    def c_code_cache_version(self):
@@ -671,8 +671,8 @@ class Test_preallocated_output(unittest.TestCase):
            f = theano.function([a, b], out, mode='DEBUG_MODE')
            out_val = f(a_val, b_val)
-            print 'out_val =', out_val
+            #print 'out_val =', out_val
-            print out_val.strides
+            #print out_val.strides
            # Should work for now (0.4.0), because the C thunk does not care
            # at all of what is in storage_map initially.
@@ -682,8 +682,8 @@ class Test_preallocated_output(unittest.TestCase):
            f = theano.function([a, b], out, mode='DEBUG_MODE')
            out_val = f(a_val, b_val)
-            print 'out_val =', out_val
+            #print 'out_val =', out_val
-            print out_val.strides
+            #print out_val.strides
        finally:
            config.DebugMode.check_preallocated_output = init_conf_val
--- a/theano/compile/tests/test_function_module.py
+++ b/theano/compile/tests/test_function_module.py
@@ -307,7 +307,7 @@ class T_function(unittest.TestCase):
    def test_constant_output(self):
        # Test that if the output is a constant, we respect the theano memory interface
        f = theano.function([],theano.tensor.constant([4]))
-        print f.maker.env.toposort()
+        #print f.maker.env.toposort()
        out = f()
        assert (out==4).all()
        out[0]=3
@@ -318,7 +318,7 @@ class T_function(unittest.TestCase):
        # Test that if the output is a constant and borrow, we respect the theano memory interface
        f = theano.function([],Out(theano.tensor.constant([4]), borrow=True))
-        print f.maker.env.toposort()
+        #print f.maker.env.toposort()
        out = f()
        assert (out==4).all()
        out[0]=3
@@ -412,8 +412,8 @@ class T_picklefunction(unittest.TestCase):
        self.assertFalse(x in g.container)
        self.assertFalse(x in g.value)
        self.assertTrue(len(f.defaults) == len(g.defaults))
-        print 'f.defaults = %s' % (f.defaults, )
+        #print 'f.defaults = %s' % (f.defaults, )
-        print 'g.defaults = %s' % (g.defaults, )
+        #print 'g.defaults = %s' % (g.defaults, )
        self.assertTrue(all([f_req == g_req and f_feed == g_feed and
            f_val == g_val
            for ((f_req, f_feed, f_val), (g_req, g_feed, g_val)) in zip(

--- a/theano/compile/tests/test_inplace_opt_for_value.py
+++ b/theano/compile/tests/test_inplace_opt_for_value.py
@@ -187,7 +187,7 @@ class ExampleRNN(Module):
        self.minimizer = minimizer([x, y], self.cost, self.params)
    def _instance_initialize(self, obj):
-        print 'INITIALIZE EXAMPLE RNN'
+        #print 'INITIALIZE EXAMPLE RNN'
        n_vis = self.n_vis
        rng = N.random.RandomState(unittest_tools.fetch_seed(2342))
@@ -214,7 +214,7 @@ def test_example_rnn():
    LAG = 4
    y[LAG:] = x[:-LAG, 0:n_out]
-    if 1:
+    if 0:
        for i, node in enumerate(rnn.minimizer.step_cost.maker.env.toposort()):
            print i, node
@@ -223,9 +223,6 @@ def test_example_rnn():
        niter=30
    for i in xrange(niter):
-        if i % 100 == 0:
-            print i, rnn.minimizer.step_cost(x, y), rnn.minimizer.stepsize
-        else:
        rnn.minimizer.step_cost(x, y)
    if theano.config.mode=='DEBUG_MODE':
        assert rnn.minimizer.step_cost(x,y) < -.9 #it starts around -.28
@@ -258,7 +255,7 @@ def test_WEIRD_STUFF():
 #    rnn2 = rnn_module1.make(mode=Mode('c|py', 'fast_run').excluding("inplace_opt"))#work
 #    rnn2 = rnn_module1.make(mode=Mode('py', 'fast_run'))#fail
    m = Mode('py', 'fast_run')
-    for n in m.optimizer: print n.name
+#    for n in m.optimizer: print n.name
    if 0:
        topo1=rnn1.minimizer.step_cost.maker.env.toposort()
@@ -266,7 +263,7 @@ def test_WEIRD_STUFF():
        for i in range(len(topo1)):
            print '1',i, topo1[i]
            print '2',i, topo2[i]
-    if 1:
+    if 0:
        topo1=rnn1.minimizer.step.maker.env.toposort()
        topo2=rnn2.minimizer.step.maker.env.toposort()
        for i in range(len(topo1)):
@@ -274,10 +271,10 @@ def test_WEIRD_STUFF():
            print '2',i, topo2[i]
    import theano.printing
-    print len(rnn1.minimizer.step.maker.inputs)
+    #print len(rnn1.minimizer.step.maker.inputs)
-    print len(rnn2.minimizer.step.maker.inputs)
+    #print len(rnn2.minimizer.step.maker.inputs)
-    print rnn1.minimizer.step.maker.inputs
+    #print rnn1.minimizer.step.maker.inputs
-    print rnn2.minimizer.step.maker.inputs
+    #print rnn2.minimizer.step.maker.inputs
@@ -293,15 +290,15 @@ def test_WEIRD_STUFF():
    niter=3
    for i in xrange(niter):
-        print rnn1.minimizer.step_cost(x, y)
+        #print rnn1.minimizer.step_cost(x, y)
-        print rnn2.minimizer.step_cost(x, y)
+        #print rnn2.minimizer.step_cost(x, y)
    #    assert rnn1.n_vis != rnn2.n_vis or slef.n_hid != rnn2.n_hid or rnn1.n_out != rnn2.n_out
        assert (N.abs(rnn1.z0-rnn2.z0)<1e-8).all()
-        print (N.abs(rnn1.w-rnn2.w)<1e-8).all()
+        #print (N.abs(rnn1.w-rnn2.w)<1e-8).all()
-        print (N.abs(rnn1.w-rnn2.w))
+        #print (N.abs(rnn1.w-rnn2.w))
-        print rnn1.w
+        #print rnn1.w
-        print rnn2.w
+        #print rnn2.w
        assert (N.abs(rnn1.w-rnn2.w)<1e-8).all()
    #    assert b

--- a/theano/compile/tests/test_misc.py
+++ b/theano/compile/tests/test_misc.py
@@ -18,7 +18,7 @@ class NNet(object):
        self.lr = shared(lr, 'learning_rate')
        self.w1 = shared(numpy.zeros((n_hidden, n_input)), 'w1')
        self.w2 = shared(numpy.zeros((n_output, n_hidden)), 'w2')
-        print self.lr.type
+        #print self.lr.type
        self.hidden = sigmoid(tensor.dot(self.w1, self.input))
        self.output = tensor.dot(self.w2, self.hidden)
@@ -51,7 +51,7 @@ class TestNnet(unittest.TestCase):
                output, cost = nnet.sgd_step(input, target)
                mean_cost += cost
            mean_cost /= float(len(data))
-            print 'Mean cost at epoch %s: %s' % (epoch, mean_cost)
+            #print 'Mean cost at epoch %s: %s' % (epoch, mean_cost)
        self.assertTrue(abs(mean_cost - 0.20588975452) < 1e-6)
        # Just call functions to make sure they do not crash.
        out = nnet.compute_output(input)

--- a/theano/compile/tests/test_modes.py
+++ b/theano/compile/tests/test_modes.py
@@ -32,7 +32,7 @@ class T_bunch_of_modes(unittest.TestCase):
            # test that it runs something
            f([[1, 2], [3, 4]], [5, 6])
            linker_classes_involved.append(f.maker.mode.linker.__class__)
-            print 'MODE:', mode, f.maker.mode.linker, 'stop'
+#            print 'MODE:', mode, f.maker.mode.linker, 'stop'
        # regression check:
        # there should be
        # - VM_Linker

--- a/theano/compile/tests/test_module.py
+++ b/theano/compile/tests/test_module.py
@@ -146,7 +146,7 @@ class T_module(unittest.TestCase):
            #assign 4 and 5 to the two variables' containers in m
            m.l = [4, 5]
-            print 'm.f', m.f()
+            m.f()
            assert numpy.all(5 == m.f())
            assert numpy.all(4 == m.g())
@@ -189,9 +189,9 @@ class T_module(unittest.TestCase):
            assert 5 == m.f()
            assert 4 == m.g()
-        print 'dscalar test'
+        #print 'dscalar test'
        local_test(lambda:T.dscalar(),lambda:T.dscalar())
-        print 'value test'
+        #print 'value test'
        local_test(lambda:T.value(1),lambda:T.value(2))
@@ -494,9 +494,9 @@ class T_module(unittest.TestCase):
        M.a = [1,2,3]
        M.make()
        m = M.make()
-        print m.a
+        #print m.a
-        print m.a[0], type(m.a[0]), m.a[0] == 1
+        #print m.a[0], type(m.a[0]), m.a[0] == 1
-        print list(m.a)
+        #print list(m.a)
        assert list(m.a) == [1,2,3]
        assert m.a is not M.a
        try:
@@ -545,7 +545,8 @@ def test_multiple_references():
            self.sub_module = sub_module
        def _instance_initialize(self, obj):
-            print 'Initializing A'
+            pass
+            #print 'Initializing A'
    class B(theano.Module):
@@ -555,7 +556,8 @@ def test_multiple_references():
            self.sub_module = sub_module
        def _instance_initialize(self, obj):
-            print 'Initializing B'
+            pass
+            #print 'Initializing B'
    class C(theano.Module):
@@ -565,11 +567,11 @@ def test_multiple_references():
            self.value = theano.tensor.scalar()
        def _instance_initialize(self, obj):
-            print 'Initializing C'
+            #print 'Initializing C'
            obj.value = 0
        def _instance_set(self, obj, value):
-            print 'Setting C'
+            #print 'Setting C'
            obj.value = value
@@ -584,7 +586,7 @@ def test_multiple_references():
            self.bug = theano.tensor.scalar()
        def _instance_initialize(self, obj):
-            print 'Initializing D'
+            #print 'Initializing D'
            obj.c.set(1)

--- a/theano/compile/tests/test_pfunc.py
+++ b/theano/compile/tests/test_pfunc.py
@@ -369,7 +369,6 @@ class Test_pfunc(unittest.TestCase):
            z: (((x * 5) + y) ** z)})
        up()
-        print x.get_value(borrow=True)
        assert numpy.all(x.get_value() == 20)
        assert numpy.all(y.get_value() == 24)
        assert numpy.all(z.get_value() == (24 ** 2))
@@ -380,7 +379,6 @@ class Test_pfunc(unittest.TestCase):
        f = pfunc([], [x])
        f()
-        print x.get_value()
        assert x.get_value() == 1
        del x.default_update
@@ -399,32 +397,26 @@ class Test_pfunc(unittest.TestCase):
        # Test that the default update is taken into account in the right cases
        f1 = pfunc([], [x], no_default_updates=True)
        f1()
-        print x.get_value()
        assert x.get_value() == 0
        f2 = pfunc([], [x], no_default_updates=[x])
        f2()
-        print x.get_value()
        assert x.get_value() == 0
        f3 = pfunc([], [x], no_default_updates=[x, y])
        f3()
-        print x.get_value()
        assert x.get_value() == 0
        f4 = pfunc([], [x], no_default_updates=[y])
        f4()
-        print x.get_value()
        assert x.get_value() == 2
        f5 = pfunc([], [x], no_default_updates=[])
        f5()
-        print x.get_value()
        assert x.get_value() == 4
        f5 = pfunc([], [x], no_default_updates=False)
        f5()
-        print x.get_value()
        assert x.get_value() == 6
        self.assertRaises(TypeError, pfunc, [], [x], no_default_updates=(x))
@@ -435,32 +427,26 @@ class Test_pfunc(unittest.TestCase):
        # Mix explicit updates and no_default_updates
        g1 = pfunc([], [x], updates=[(x, (x - 1))], no_default_updates=True)
        g1()
-        print x.get_value()
        assert x.get_value() == 5
        g2 = pfunc([], [x], updates=[(x, (x - 1))], no_default_updates=[x])
        g2()
-        print x.get_value()
        assert x.get_value() == 4
        g3 = pfunc([], [x], updates=[(x, (x - 1))], no_default_updates=[x, y])
        g3()
-        print x.get_value()
        assert x.get_value() == 3
        g4 = pfunc([], [x], updates=[(x, (x - 1))], no_default_updates=[y])
        g4()
-        print x.get_value()
        assert x.get_value() == 2
        g5 = pfunc([], [x], updates=[(x, (x - 1))], no_default_updates=[])
        g5()
-        print x.get_value()
        assert x.get_value() == 1
        g5 = pfunc([], [x], updates=[(x, (x - 1))], no_default_updates=False)
        g5()
-        print x.get_value()
        assert x.get_value() == 0
    def test_default_updates_expressions(self):
@@ -473,17 +459,14 @@ class Test_pfunc(unittest.TestCase):
        f1 = pfunc([a], z)
        f1(12)
-        print x
        assert x.get_value() == 1
        f2 = pfunc([a], z, no_default_updates=True)
        assert f2(7) == 7
-        print x
        assert x.get_value() == 1
        f3 = pfunc([a], z, no_default_updates=[x])
        assert f3(9) == 9
-        print x
        assert x.get_value() == 1
    def test_default_updates_multiple(self):
@@ -524,7 +507,6 @@ class Test_pfunc(unittest.TestCase):
        f1 = pfunc([], [x])
        f1()
-        print x.get_value(), y.get_value(), z.get_value()
        assert x.get_value() == 1
        assert y.get_value() == -1
        assert z.get_value() == -2
@@ -598,10 +580,8 @@ class Test_pfunc(unittest.TestCase):
        b = 2 * a
        # Use only the tip of the graph, a is not used
        f = pfunc([b], b)
-        print 'a.get_value() =', a.get_value()
        assert a.get_value() == 0
        f(21)
-        print 'a.get_value() =', a.get_value()
        assert a.get_value() == 0
    def test_givens_replaces_shared_variable(self):
@@ -917,7 +897,7 @@ class Test_aliasing_rules(unittest.TestCase):
        data_of_b = data_of(B)
        f = pfunc([], [], updates=[(A, B[:, ::-1]), (B, A.T)])
-        theano.printing.debugprint(f)
+        #theano.printing.debugprint(f)
        f()
        # correctness (doesn't actually test the view...)
        assert numpy.all(data_of(A) == -.5)
@@ -938,7 +918,6 @@ class Test_aliasing_rules(unittest.TestCase):
            assert numpy.all(data_of(B) < 5)
            data_of_a += 10
-            print data_of(B)
            assert numpy.all(data_of(B) > 5)
            data_of_a -= 10

--- a/theano/gof/tests/test_cc.py
+++ b/theano/gof/tests/test_cc.py
@@ -195,8 +195,8 @@ def test_clinker_literal_inlining():
    fn = lnk.make_function()
    assert abs(fn(2.0, 2.0) + 0.12345678) < 1e-9
    code = lnk.code_gen()
-    print "=== Code generated ==="
+    #print "=== Code generated ==="
-    print code
+    #print code
    assert "4.12345678" in code  # we expect the number to be inlined

--- a/theano/gof/tests/test_destroyhandler.py
+++ b/theano/gof/tests/test_destroyhandler.py
@@ -110,22 +110,22 @@ class FailureWatch:
 def consistent(g):
-    print "Testing consistent:", g
+    #print "Testing consistent:", g
    try:
        assert g.consistent()
    except AssertionError:
        print "Test failed! The graph was marked as NOT consistent."
        raise
-    print "Test OK"
+    #print "Test OK"
 def inconsistent(g):
-    print "Testing NOT consistent:", g
+    #print "Testing NOT consistent:", g
    try:
        assert not g.consistent()
    except AssertionError:
        print "Test failed! The graph was marked as consistent."
        raise
-    print "Test OK"
+    #print "Test OK"

--- a/theano/gof/tests/test_vm.py
+++ b/theano/gof/tests/test_vm.py
@@ -74,10 +74,10 @@ def test_speed():
        numpy_version(x, steps_a)
        t0 = time.time()
-        print numpy_version(x, steps_a)
+        #print numpy_version(x, steps_a)
        t1 = time.time()
        t2 = time.time()
-        print numpy_version(x, steps_b)
+        #print numpy_version(x, steps_b)
        t3 = time.time()
        t_a = t1 - t0
        t_b = t3 - t2
@@ -103,15 +103,15 @@ def test_speed():
                #profile='f_b speed test %s'%name,
                )
-        print f_a([2.0, 3.0])
+        f_a([2.0, 3.0])
        t0 = time.time()
-        print f_a([2.0, 3.0])
+        f_a([2.0, 3.0])
        t1 = time.time()
-        print f_b([2.0, 3.0])
+        f_b([2.0, 3.0])
        t2 = time.time()
-        print f_b([2.0, 3.0])
+        f_b([2.0, 3.0])
        t3 = time.time()
        t_a = t1 - t0
@@ -155,15 +155,15 @@ def test_speed_lazy():
                #profile='f_b lazy ifelse %s'%name,
                )
-        print f_a([2.0])
+        f_a([2.0])
        t0 = time.time()
-        print f_a([2.0])
+        f_a([2.0])
        t1 = time.time()
-        print f_b([2.0])
+        f_b([2.0])
        t2 = time.time()
-        print f_b([2.0])
+        f_b([2.0])
        t3 = time.time()
        t_a = t1 - t0

--- a/theano/misc/pycuda_init.py
+++ b/theano/misc/pycuda_init.py
 import os
+import warnings
 import theano
 import theano.sandbox.cuda as cuda
+cuda_ndarray = cuda.cuda_ndarray.cuda_ndarray
-def select_gpu_from_theano():
+def set_gpu_from_theano():
-    # Transfer the theano gpu binding to pycuda, for consistency
+    """
-    theano_to_pycuda_device_map = {"cpu": "0",
+    This set the GPU used by PyCUDA to the same as the one used by Theano.
-                                   "gpu0": "0",
+    """
-                                   "gpu1": "1",
+    #import pdb;pdb.set_trace()
-                                   "gpu2": "2",
+    if cuda.use.device_number is None:
-                                   "gpu3": "3"}
+        cuda.use("gpu",
-    dev = theano_to_pycuda_device_map.get(theano.config.device, "0")
+                 force=False,
-    if theano.config.device == 'gpu':
+                 default_to_move_computation_to_gpu=False,
-        dev = str(cuda.cuda_ndarray.cuda_ndarray.active_device_number())
+                 move_shared_float32_to_gpu=False,
-    os.environ["CUDA_DEVICE"] = dev
+                 enable_cuda=True,
+                 test_driver=True)
-select_gpu_from_theano()
+    assert cuda.use.device_number == cuda_ndarray.active_device_number()
+#    os.environ["CUDA_DEVICE"] = str(cuda.use.device_number)
+set_gpu_from_theano()
 pycuda_available = False
-try:
+if False:
+    try:
        import pycuda
        import pycuda.autoinit
        pycuda_available = True
-except ImportError:
+    except ImportError:
        # presumably, the user wanted to use pycuda, else they wouldn't have
        # imported this module, so issue a warning that the import failed.
-    import warnings
        warnings.warn("PyCUDA import failed in theano.misc.pycuda_init")
--- a/theano/sandbox/cuda/__init__.py
+++ b/theano/sandbox/cuda/__init__.py
@@ -313,11 +313,15 @@ def use(device,
                gpu_init(device)
                use.device_number = device
            else:
-                # This mean we let the driver select the GPU.
+                # This mean the driver should select the GPU.  As we
-                # But default it is always number 0.
+                # need to get the device number now, we force the
-                # If the driver is in exclusive mode, it will always show
+                # selection of the GPU by the driver now and then we
-                # device 0 event if it use something else.
+                # query the active GPU. If we check the active GPU before
-                use.device_number = 0
+                # the device is initialized we will always receive 0
+                # event if another device is selected later.
+                cuda_ndarray.cuda_ndarray.CudaNdarray.zeros((2, 3))
+                use.device_number = active_device_number()
            if test_driver:
                import theano.sandbox.cuda.tests.test_driver
                theano.sandbox.cuda.tests.test_driver.test_nvidia_driver1()

--- a/theano/sandbox/cuda/tests/test_blas.py
+++ b/theano/sandbox/cuda/tests/test_blas.py
@@ -238,7 +238,7 @@ if 0:
                bval = numpy.arange(0,d0*d1).reshape(1,1,d0,d1)
                r = f(bval)[0]
    #            print bval, bval.shape, border
-                print r, r.shape
+                #print r, r.shape
                assert (ret==r).all()
@@ -284,7 +284,7 @@ def test_downsample():
            if float(shp[3]) / ds[1] > 512:
                continue
            for ignore_border in (True, False):
-                print 'test_downsample', shp, ds, ignore_border
+                #print 'test_downsample', shp, ds, ignore_border
                ds_op = DownsampleFactorMax(ds, ignore_border=ignore_border)
                a = tcn.shared_constructor(my_rand(*shp), 'a')

--- a/theano/sandbox/cuda/tests/test_cuda_ndarray.py
+++ b/theano/sandbox/cuda/tests/test_cuda_ndarray.py
@@ -30,7 +30,7 @@ def advantage(cpu_dt, gpu_dt):
        return cpu_dt / gpu_dt
 def test_host_to_device():
-    print >>sys.stdout, 'starting test_host_to_dev'
+    #print >>sys.stdout, 'starting test_host_to_dev'
    for shape in ((), (3,), (2,3), (3,4,5,6)):
        a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
@@ -84,7 +84,7 @@ def test_add_iadd_idiv():
            asum = a0 + a1
            t1 = time.time()
            cpu_dt = t1 - t0
-            print shape, 'adding ', a0.size, 'cpu', cpu_dt, 'advantage', advantage(cpu_dt, gpu_dt)
+            #print shape, 'adding ', a0.size, 'cpu', cpu_dt, 'advantage', advantage(cpu_dt, gpu_dt)
            assert numpy.allclose(asum,  numpy.asarray(bsum))
        #test not contiguous version.
@@ -122,7 +122,7 @@ def test_add_iadd_idiv():
        a0 += a1
        t1 = time.time()
        cpu_dt = t1 - t0
-        print shape, 'adding inplace', a0.size, 'cpu', cpu_dt, 'advantage', advantage(cpu_dt, gpu_dt)
+        #print shape, 'adding inplace', a0.size, 'cpu', cpu_dt, 'advantage', advantage(cpu_dt, gpu_dt)
        assert numpy.allclose(a0, numpy.asarray(b0))
        assert numpy.allclose(a0, a0_orig + a1)
@@ -144,7 +144,7 @@ def test_add_iadd_idiv():
        assert numpy.allclose(a0, ((a0_orig+a1)/a1+a1[..., ::-1])/a1[..., ::-1])
 def test_exp():
-    print >>sys.stdout, 'starting test_exp'
+    #print >>sys.stdout, 'starting test_exp'
    for shape in ((), (3,), (2,3), (1,10000000),(10,1000000), (100,100000),(1000,10000),(10000,1000)):
        a0 = theano._asarray(numpy.random.rand(*shape), dtype='float32')
        a1 = a0.copy()
@@ -158,26 +158,26 @@ def test_exp():
        asum = numpy.exp(a1)
        t1 = time.time()
        cpu_dt = t1 - t0
-        print shape, 'adding ', a0.size, 'cpu', cpu_dt, 'advantage', advantage(cpu_dt, gpu_dt)
+        #print shape, 'adding ', a0.size, 'cpu', cpu_dt, 'advantage', advantage(cpu_dt, gpu_dt)
        #c = numpy.asarray(b0+b1)
        if asum.shape:
            assert numpy.allclose(asum, numpy.asarray(bsum))
 def test_copy():
-    print >>sys.stdout, 'starting test_copy'
+    #print >>sys.stdout, 'starting test_copy'
    shape = (500,499)
    a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
-    print >>sys.stdout, '.. creating device object'
+    #print >>sys.stdout, '.. creating device object'
    b = cuda_ndarray.CudaNdarray(a)
-    print >>sys.stdout, '.. copy'
+    #print >>sys.stdout, '.. copy'
    c = copy.copy(b)
-    print >>sys.stdout, '.. deepcopy'
+    #print >>sys.stdout, '.. deepcopy'
    d = copy.deepcopy(b)
-    print >>sys.stdout, '.. comparisons'
+    #print >>sys.stdout, '.. comparisons'
    assert numpy.allclose(a, numpy.asarray(b))
    assert numpy.allclose(a, numpy.asarray(c))
    assert numpy.allclose(a, numpy.asarray(d))
@@ -268,7 +268,7 @@ class test_DimShuffle(unittest.TestCase):
 def test_dot():
-    print >>sys.stdout, 'starting test_dot'
+    #print >>sys.stdout, 'starting test_dot'
    utt.seed_rng()
    rng = numpy.random.RandomState(utt.fetch_seed())
@@ -320,8 +320,8 @@ def test_sum():
    a0sum = a0.sum(axis=0)
    b0sum = b0.reduce_sum([1,0])
-    print 'asum\n',a0sum
+    #print 'asum\n',a0sum
-    print 'bsum\n',numpy.asarray(b0sum)
+    #print 'bsum\n',numpy.asarray(b0sum)
    assert numpy.allclose(a0.sum(axis=0), numpy.asarray(b0.reduce_sum([1,0])))
    assert numpy.allclose(a0.sum(axis=1), numpy.asarray(b0.reduce_sum([0,1])))
@@ -932,7 +932,7 @@ def test_base():
    c = a[0]
    d = c[:,0]
-    print d.shape
+    #print d.shape
    assert c.base is a
    assert d.base is a

--- a/theano/sandbox/cuda/tests/test_mlp.py
+++ b/theano/sandbox/cuda/tests/test_mlp.py
@@ -103,7 +103,7 @@ def run_nnet(use_gpu, n_batch=60, n_in=1024, n_hid=2048, n_out=10,
    mode = get_mode(use_gpu)
-    print 'building pfunc ...'
+    #print 'building pfunc ...'
    train = pfunc([x, y, lr], [loss], mode=mode,
                  updates=[(p, p - g) for p, g in izip(params, gparams)])
@@ -138,9 +138,9 @@ def test_run_nnet():
                    theano.gradient.numeric_grad.abs_rel_err(rval_gpu,
                                                             rval_cpu)
            max_abs_diff = abs_diff.max()
-            print "max abs diff=%e max rel diff=%e n_in=%d n_hid=%d" % (
+            #print "max abs diff=%e max rel diff=%e n_in=%d n_hid=%d" % (
-                max_abs_diff, rel_diff.max(), n_in, n_hid)
+            #    max_abs_diff, rel_diff.max(), n_in, n_hid)
-            print "time cpu: %f, time gpu: %f, speed up %f" % (tc, tg, tc / tg)
+            #print "time cpu: %f, time gpu: %f, speed up %f" % (tc, tg, tc / tg)
            rtol = 1e-4
            if n_in * n_hid >= 2048 * 4096:
                rtol = 7e-4
@@ -192,14 +192,14 @@ def run_conv_nnet1(use_gpu):
    hid_flat = hid.reshape((n_batch, n_hid))
    out = tensor.tanh(tensor.dot(hid_flat, v)+c)
    loss = tensor.sum(0.5 * (out-y)**2 * lr)
-    print 'loss type', loss.type
+    #print 'loss type', loss.type
    params = [w, b, v, c]
    gparams = tensor.grad(loss, params)
    mode = get_mode(use_gpu)
-    print 'building pfunc ...'
+    #print 'building pfunc ...'
    train = pfunc([x,y,lr], [loss], mode=mode, updates=[(p, p-g) for p,g in zip(params, gparams)])
 #    for i, n in enumerate(train.maker.env.toposort()):
@@ -211,7 +211,7 @@ def run_conv_nnet1(use_gpu):
    for i in xrange(n_train):
        rval = train(xval, yval, lr)
-    print 'training done'
+    #print 'training done'
    print_mode(mode)
    return rval
@@ -281,14 +281,14 @@ def run_conv_nnet2(use_gpu): # pretend we are training LeNet for MNIST
    hid_flat = hid1.reshape((n_batch, n_hid))
    out = tensor.tanh(tensor.dot(hid_flat, v)+c)
    loss = tensor.sum(0.5 * (out-y)**2 * lr)
-    print 'loss type', loss.type
+    #print 'loss type', loss.type
    params = [w0, b0, w1, b1, v, c]
    gparams = tensor.grad(loss, params)
    mode = get_mode(use_gpu)
-    print 'building pfunc ...'
+    #print 'building pfunc ...'
    train = pfunc([x,y,lr], [loss], mode=mode, updates=[(p, p-g) for p,g in zip(params, gparams)])
 #    for i, n in enumerate(train.maker.env.toposort()):
@@ -310,7 +310,7 @@ def test_conv_nnet2():
    if True:
        utt.seed_rng()
        rval_cpu = run_conv_nnet2(False)
-        print rval_cpu[0], rval_gpu[0],rval_cpu[0]-rval_gpu[0]
+        #print rval_cpu[0], rval_gpu[0],rval_cpu[0]-rval_gpu[0]
        assert numpy.allclose(rval_cpu, rval_gpu,rtol=1e-4,atol=1e-4)
@@ -350,9 +350,9 @@ def build_conv_nnet2_classif(use_gpu, isize, ksize, n_batch,
    v = shared_fn(0.01*my_randn(n_hid, n_out), 'v')
    c = shared_fn(my_zeros(n_out), 'c')
-    print 'ALLOCATING ARCH: w0 shape', w0.get_value(borrow=True).shape
+    #print 'ALLOCATING ARCH: w0 shape', w0.get_value(borrow=True).shape
-    print 'ALLOCATING ARCH: w1 shape', w1.get_value(borrow=True).shape
+    #print 'ALLOCATING ARCH: w1 shape', w1.get_value(borrow=True).shape
-    print 'ALLOCATING ARCH: v shape', v.get_value(borrow=True).shape
+    #print 'ALLOCATING ARCH: v shape', v.get_value(borrow=True).shape
    x = tensor.Tensor(dtype='float32', broadcastable=(0,1,0,0))('x')
    y = tensor.fmatrix('y')
@@ -375,14 +375,14 @@ def build_conv_nnet2_classif(use_gpu, isize, ksize, n_batch,
    hid_flat = hid1.reshape((n_batch, n_hid))
    out = tensor.nnet.softmax(tensor.dot(hid_flat, v)+c)
    loss = tensor.sum(tensor.nnet.crossentropy_categorical_1hot(out, tensor.argmax(y, axis=1)) * lr)
-    print 'loss type', loss.type
+    #print 'loss type', loss.type
    params = [w0, b0, w1, b1, v, c]
    gparams = tensor.grad(loss, params, warn_type=True)
    mode = get_mode(use_gpu, check_isfinite)
-    print 'building pfunc ...'
+    #print 'building pfunc ...'
    train = pfunc([x,y,lr], [loss], mode=mode, updates=[(p, p-g) for p,g in zip(params, gparams)])
    if verbose:
@@ -437,9 +437,9 @@ def run_conv_nnet2_classif(use_gpu, seed, isize, ksize, bsize,
        print pickle.dumps(mode)
        print "END %s profile mode dump" % device
-    print "%s time: %.3f" % (device, t1-t0)
+    #print "%s time: %.3f" % (device, t1-t0)
-    print "estimated time for one pass through MNIST with %s: %f" % (
+    #print "estimated time for one pass through MNIST with %s: %f" % (
-            device, (t1-t0) * (60000.0 / (n_train*bsize)))
+    #        device, (t1-t0) * (60000.0 / (n_train*bsize)))
 def cmp_run_conv_nnet2_classif(seed, isize, ksize, bsize,
@@ -465,7 +465,7 @@ def cmp_run_conv_nnet2_classif(seed, isize, ksize, bsize,
    orig_float32_atol = theano.tensor.basic.float32_atol
    try:
        if float_atol:
-            print "float_atol", float_atol
+            #print "float_atol", float_atol
            theano.tensor.basic.float32_atol = float_atol
        if gpu_only and cpu_only:
@@ -565,12 +565,12 @@ def cmp_run_conv_nnet2_classif(seed, isize, ksize, bsize,
            print pickle.dumps(gpu_mode)
            print "END GPU profile mode dump"
-    print "CPU time: %.3f, GPU time: %.3f, speed up %f" % (
+    #print "CPU time: %.3f, GPU time: %.3f, speed up %f" % (
-            (time_cpu, time_gpu, time_cpu/time_gpu))
+    #        (time_cpu, time_gpu, time_cpu/time_gpu))
-    print "Estimated time for one pass through MNIST with CPU: %f" % (
+    #print "Estimated time for one pass through MNIST with CPU: %f" % (
-            (time_cpu * (60000.0 / (n_train*bsize))))
+    #        (time_cpu * (60000.0 / (n_train*bsize))))
-    print "Estimated time for one pass through MNIST with GPU: %f" % (
+    #print "Estimated time for one pass through MNIST with GPU: %f" % (
-            (time_gpu * (60000.0 / (n_train*bsize))))
+    #        (time_gpu * (60000.0 / (n_train*bsize))))
 # Default parameters for all subsequent tests

--- a/theano/sparse/sandbox/sp2.py
+++ b/theano/sparse/sandbox/sp2.py
@@ -497,6 +497,13 @@ class StrucutedAddSVCSR(gof.Op):
        return hash(type(self))
    def make_node(self, a_data, a_indices, a_indptr, b):
+        b = tensor.as_tensor_variable(b)
+        a_data = tensor.as_tensor_variable(a_data)
+        a_indices = tensor.as_tensor_variable(a_indices)
+        a_indptr = tensor.as_tensor_variable(a_indptr)
+        assert a_data.type.ndim == 1
+        assert a_indices.type.ndim == 1
+        assert a_indptr.type.ndim == 1
        assert b.type.ndim == 1
        return gof.Apply(self, [a_data, a_indices, a_indptr, b],
                               [tensor.tensor(b.dtype, (False,))])

--- a/theano/tensor/nnet/tests/test_conv3d.py
+++ b/theano/tensor/nnet/tests/test_conv3d.py
@@ -335,7 +335,7 @@ class TestConv3D(unittest.TestCase):
        col_steps  = self.rng.randint(1,4)
        time_steps = self.rng.randint(1,4)
-        print (row_steps,col_steps,time_steps)
+        #print (row_steps,col_steps,time_steps)
        videoDur    = (time_steps-1)*dt+filterDur   + self.rng.randint(0,3)
        videoWidth  = (col_steps-1)*dc+filterWidth  + self.rng.randint(0,3)

--- a/theano/tensor/nnet/tests/test_nnet.py
+++ b/theano/tensor/nnet/tests/test_nnet.py
@@ -112,8 +112,8 @@ class T_SoftmaxWithBias(unittest.TestCase):
        assert softmax_with_bias not in ops
        assert softmax in ops
-        print f([0,1,0])
+        f([0,1,0])
-        print f.maker.env.toposort()
+        #print f.maker.env.toposort()
    def test_infer_shape(self):
        fff=theano.function([],outputs=softmax_with_bias(numpy.random.rand(3,4),numpy.random.rand(4)).shape)
@@ -299,20 +299,20 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
                [op(softmax(x+b), one_of_n)])
        assert env.outputs[0].owner.op == op
-        print 'BEFORE'
+        #print 'BEFORE'
-        for node in env.toposort():
+        #for node in env.toposort():
-            print node.op
+        #    print node.op
-        print printing.pprint(node.outputs[0])
+        #print printing.pprint(node.outputs[0])
-        print '----'
+        #print '----'
        theano.compile.mode.optdb.query(
                theano.compile.mode.OPT_FAST_RUN).optimize(env)
-        print 'AFTER'
+        #print 'AFTER'
-        for node in env.toposort():
+        #for node in env.toposort():
-            print node.op
+        #    print node.op
-        print printing.pprint(node.outputs[0])
+        #print printing.pprint(node.outputs[0])
-        print '===='
+        #print '===='
        assert len(env.toposort()) == 2
        assert str(env.outputs[0].owner.op) == 'OutputGuard'
@@ -330,18 +330,18 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
                [op(softmax(T.add(x,b,c)), one_of_n)])
        assert env.outputs[0].owner.op == op
-        print 'BEFORE'
+        #print 'BEFORE'
-        for node in env.toposort():
+        #for node in env.toposort():
-            print node.op
+        #    print node.op
-        print '----'
+        #print '----'
        theano.compile.mode.optdb.query(
                theano.compile.mode.OPT_FAST_RUN).optimize(env)
-        print 'AFTER'
+        #print 'AFTER'
-        for node in env.toposort():
+        #for node in env.toposort():
-            print node.op
+        #    print node.op
-        print '===='
+        #print '===='
        assert len(env.toposort()) == 3
        assert str(env.outputs[0].owner.op) == 'OutputGuard'
@@ -356,18 +356,18 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
                [x, b, one_of_n],
                [op(softmax(x+b), one_of_n)])
        assert env.outputs[0].owner.op == op
-        print 'BEFORE'
+        #print 'BEFORE'
-        for node in env.toposort():
+        #for node in env.toposort():
-            print node.op
+        #    print node.op
-        print printing.pprint(node.outputs[0])
+        #print printing.pprint(node.outputs[0])
-        print '----'
+        #print '----'
        theano.compile.mode.optdb.query(
                theano.compile.mode.OPT_FAST_RUN).optimize(env)
-        print 'AFTER'
+        #print 'AFTER'
-        for node in env.toposort():
+        #for node in env.toposort():
-            print node.op
+        #    print node.op
-        print '===='
+        #print '===='
        assert len(env.toposort()) == 3
        assert str(env.outputs[0].owner.op) == 'OutputGuard'
        assert env.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
@@ -385,16 +385,16 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
                [x, one_of_n],
                [g_x])
-        print 'BEFORE'
+        #print 'BEFORE'
-        for node in env.toposort():
+        #for node in env.toposort():
-            print node.op, node.inputs
+        #    print node.op, node.inputs
-        print '----'
+        #print '----'
        theano.compile.mode.optdb.query(
                theano.compile.mode.OPT_FAST_RUN).optimize(env)
-        print 'AFTER'
+        #print 'AFTER'
-        for node in env.toposort():
+        #for node in env.toposort():
-            print node.op, node.inputs
+        #    print node.op, node.inputs
        # the function has 9 ops because the dimshuffle and elemwise{second} aren't getting
        # cleaned up as well as we'd like.
@@ -428,16 +428,16 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
                [x, one_of_n],
                [g_x])
-        print 'BEFORE'
+        #print 'BEFORE'
-        for node in env.toposort():
+        #for node in env.toposort():
-            print node.op, node.inputs
+        #    print node.op, node.inputs
-        print '----'
+        #print '----'
        theano.compile.mode.optdb.query(
                theano.compile.mode.OPT_FAST_RUN).optimize(env)
-        print 'AFTER'
+        #print 'AFTER'
-        for node in env.toposort():
+        #for node in env.toposort():
-            print node.op, node.inputs
+        #    print node.op, node.inputs
        # the function has 9 ops because the dimshuffle and elemwise{second} aren't getting
        # cleaned up as well as we'd like.
@@ -1021,9 +1021,9 @@ class Test_softmax_opt:
        # test that function contains softmax and no div.
        f = theano.function([c],p_y, mode=self.mode)
        f_ops = [n.op for n in f.maker.env.toposort()]
-        print '--- f ='
+        #print '--- f ='
-        printing.debugprint(f)
+        #printing.debugprint(f)
-        print '==='
+        #print '==='
        assert len(f_ops) == 1
        assert softmax in f_ops
        f(self.rng.rand(3,4).astype(config.floatX))
@@ -1041,9 +1041,9 @@ class Test_softmax_opt:
        finally:
            config.warn.sum_div_dimshuffle_bug = backup
        g_ops = [n.op for n in g.maker.env.toposort()]
-        print '--- g ='
+        #print '--- g ='
-        printing.debugprint(g)
+        #printing.debugprint(g)
-        print '==='
+        #print '==='
        raise SkipTest('Optimization not enabled for the moment')
        assert len(g_ops) == 2
@@ -1058,7 +1058,7 @@ class Test_softmax_opt:
        # test that function contains softmax and no div.
        f = theano.function([c],p_y)
-        printing.debugprint(f)
+        #printing.debugprint(f)
        # test that function contains softmax and no div.
        backup = config.warn.sum_div_dimshuffle_bug
@@ -1067,7 +1067,7 @@ class Test_softmax_opt:
            g = theano.function([c],T.grad(p_y.sum(), c))
        finally:
            config.warn.sum_div_dimshuffle_bug = backup
-        printing.debugprint(g)
+        #printing.debugprint(g)
        raise SkipTest('Optimization not enabled for the moment')
    def test_1d_basic(self):
@@ -1077,7 +1077,7 @@ class Test_softmax_opt:
        # test that function contains softmax and no div.
        f = theano.function([c], p_y)
-        printing.debugprint(f)
+        #printing.debugprint(f)
        # test that function contains softmax and no div.
        backup = config.warn.sum_div_dimshuffle_bug
@@ -1086,7 +1086,7 @@ class Test_softmax_opt:
            g = theano.function([c], T.grad(p_y.sum(), c))
        finally:
            config.warn.sum_div_dimshuffle_bug = backup
-        printing.debugprint(g)
+        #printing.debugprint(g)
        raise SkipTest('Optimization not enabled for the moment')
    # REPEAT 3 CASES in presence of log(softmax) with the advanced indexing etc.

--- a/theano/tensor/randomstreams.py
+++ b/theano/tensor/randomstreams.py
-"""Define RandomStreams, providing random number variables for Theano graphs."""
+"""Define RandomStreams, providing random number variables for Theano
+graphs.
+"""
 __docformat__ = "restructuredtext en"
 import sys
@@ -8,6 +11,7 @@ from theano.compile import module, In, Component
 from theano.gof import Container
 from theano.tensor import raw_random
 class RandomStreamsInstance(object):
    """RandomStreamsInstance"""
    def __init__(self, random_streams, memo, default_seed):
@@ -18,24 +22,26 @@ class RandomStreamsInstance(object):
    def initialize(self, seed=None):
        """Initialize each random stream
-        :param seed: each random stream will be assigned a unique state that depends
+        :param seed: each random stream will be assigned a unique
-        deterministically on this value.
+        state that depends deterministically on this value.
        :type seed: None or integer in range 0 to 2**30
        :rtype: None
        """
        self.seed(seed)
    def seed(self, seed=None):
        """Re-initialize each random stream
-        :param seed: each random stream will be assigned a unique state that depends
+        :param seed: each random stream will be assigned a unique
-        deterministically on this value.
+        state that depends deterministically on this value.
        :type seed: None or integer in range 0 to 2**30
        :rtype: None
        """
        if seed is None:
            seed = self.default_seed
@@ -43,19 +49,24 @@ class RandomStreamsInstance(object):
        #seed = self.default_seed if seed is None else seed
        seedgen = numpy.random.RandomState(seed)
        for old_r, new_r in self.random_streams.random_state_variables:
-            old_r_seed = seedgen.randint(2**30)
+            old_r_seed = seedgen.randint(2 ** 30)
            old_r_container = self.memo[old_r].value
            if old_r_container.value is None:
-                #the cast to int here makes it work on 32bit machines, not sure why
+                #the cast to int here makes it work on 32bit machines,
-                old_r_container.value = numpy.random.RandomState(int(old_r_seed))
+                #not sure why
+                old_r_container.value = numpy.random.RandomState(
+                    int(old_r_seed))
            else:
-                #the cast to int here makes it work on 32bit machines, not sure why
+                #the cast to int here makes it work on 32bit machines,
+                #not sure why
                old_r_container.value.seed(int(old_r_seed))
    def __getitem__(self, item):
-        """Retrieve the numpy RandomState instance associated with a particular stream
+        """Retrieve the numpy RandomState instance associated with a
+        particular stream
-        :param item: a variable of type RandomStateType, associated with this RandomStream
+        :param item: a variable of type RandomStateType, associated
+        with this RandomStream
        :rtype: numpy RandomState (or None, before initialize)
@@ -67,9 +78,11 @@ class RandomStreamsInstance(object):
        raise KeyError(item)
    def __setitem__(self, item, val):
-        """Set the numpy RandomState instance associated with a particular stream
+        """Set the numpy RandomState instance associated with a
+        particular stream
-        :param item: a variable of type RandomStateType, associated with this RandomStream
+        :param item: a variable of type RandomStateType, associated
+        with this RandomStream
        :param val: the new value
        :type val: numpy RandomState
@@ -78,7 +91,8 @@ class RandomStreamsInstance(object):
        """
        if type(val) is not numpy.random.RandomState:
-            raise TypeError('only values of type RandomState are permitted', val)
+            raise TypeError('only values of type RandomState are permitted',
+                            val)
        for old_r, new_r in self.random_streams.random_state_variables:
            if item is old_r:
                container = self.memo[item].value
@@ -86,24 +100,34 @@ class RandomStreamsInstance(object):
                return
        raise KeyError(item)
 class RandomStreams(Component, raw_random.RandomStreamsBase):
-    """Module component with similar interface to numpy.random (numpy.random.RandomState)"""
+    """Module component with similar interface to numpy.random
+    (numpy.random.RandomState)
+    """
    random_state_variables = []
-    """A list of pairs of the form (input_r, output_r).  This will be over-ridden by the module
+    """A list of pairs of the form (input_r, output_r).  This will be
-    instance to contain stream generators.
+    over-ridden by the module instance to contain stream
+    generators.
    """
    default_instance_seed = None
-    """Instance variable should take None or integer value.  Used to seed the random number
+    """Instance variable should take None or integer value.  Used to
-    generator that provides seeds for member streams"""
+    seed the random number generator that provides seeds for member
+    streams
-    def __init__(self, seed=None):
    """
-        :type seed: None or int
-        :param seed: a default seed to initialize the RandomState instances after build.  See
+    def __init__(self, seed=None):
-        `RandomStreamsInstance.__init__` for more details.
+        """:type seed: None or int
+        :param seed: a default seed to initialize the RandomState
+        instances after build.  See `RandomStreamsInstance.__init__`
+        for more details.
        """
        super(RandomStreams, self).__init__()
        self.random_state_variables = []
@@ -124,7 +148,8 @@ class RandomStreams(Component, raw_random.RandomStreamsBase):
        """override `Component.build` """
        if self not in memo:
            print 'creating RandomStreamsInstance'
-            memo[self] = RandomStreamsInstance(self, memo, self.default_instance_seed)
+            memo[self] = RandomStreamsInstance(self, memo,
+                                               self.default_instance_seed)
        return memo[self]
    def gen(self, op, *args, **kwargs):
@@ -136,14 +161,15 @@ class RandomStreams(Component, raw_random.RandomStreamsBase):
        :param kwargs: interpreted by `op`
-        :returns: The symbolic random draw part of op()'s return value.  This function stores
+        :returns: The symbolic random draw part of op()'s return
-        the updated RandomStateType Variable for use at `build` time.
+        value.  This function stores the updated RandomStateType
+        Variable for use at `build` time.
        :rtype: TensorVariable
        """
        random_state_variable = raw_random.random_state_type()
        new_r, out = op(random_state_variable, *args, **kwargs)
        out.rng = random_state_variable
        self.random_state_variables.append((random_state_variable, new_r))
        return out
--- a/theano/tensor/raw_random.py
+++ b/theano/tensor/raw_random.py
@@ -2,14 +2,17 @@
 __docformat__ = "restructuredtext en"
 import sys
 from copy import copy
 import numpy
 #local imports
+import theano
 import basic as tensor
-import opt, theano
+import opt
 from theano import gof
 from theano.compile import optdb
 class RandomStateType(gof.Type):
    """A Type wrapper for numpy.RandomState
@@ -157,8 +160,8 @@ class RandomFunction(gof.Op):
            print >> sys.stderr, 'WARNING: RandomState instances should be in RandomStateType'
            if 0:
                raise TypeError('r must be RandomStateType instance', r)
-        # the following doesn't work because we want to ignore the broadcastable flags in
+        # the following doesn't work because we want to ignore the
-        # shape.type
+        # broadcastable flags in shape.type
        # assert shape.type == tensor.lvector
        # convert args to TensorType instances
@@ -173,7 +176,7 @@ class RandomFunction(gof.Op):
        r, shp = node.inputs[0:2]
        #if shp is a constant array of len 0, then it means 'automatic shape'
-        unknown_shape = len(getattr(shp, 'data', [0,1,2])) == 0
+        unknown_shape = len(getattr(shp, 'data', [0, 1, 2])) == 0
        # if ndim_added == 0 and shape != () then shape
        if self.ndim_added == 0 and not unknown_shape:
@@ -188,8 +191,8 @@ class RandomFunction(gof.Op):
    def perform(self, node, inputs, out_):
        rout, out = out_
        # Use self.fn to draw shape worth of random numbers.
-        # Numbers are drawn from r if self.inplace is True, and from a copy of r if
+        # Numbers are drawn from r if self.inplace is True, and from a
-        # self.inplace is False
+        # copy of r if self.inplace is False
        r, shape, args = inputs[0], inputs[1], inputs[2:]
        assert type(r) == numpy.random.RandomState, (type(r), r)
        r_orig = r
@@ -203,34 +206,44 @@ class RandomFunction(gof.Op):
        else:
            shape = tuple(shape)
-        if shape is not None and self.outtype.ndim != len(shape) + self.ndim_added:
+        if (shape is not None and
-            raise ValueError('Shape mismatch: self.outtype.ndim (%i) != len(shape) (%i) + self.ndim_added (%i)'\
+            self.outtype.ndim != len(shape) + self.ndim_added):
-                    %(self.outtype.ndim, len(shape), self.ndim_added))
+            raise ValueError('Shape mismatch: self.outtype.ndim (%i) !='
+                             ' len(shape) (%i) + self.ndim_added (%i)'
+                            % (self.outtype.ndim, len(shape), self.ndim_added))
        if not self.inplace:
            r = copy(r)
        rout[0] = r
        rval = self.fn(r, *(args + [shape]))
        if not isinstance(rval, numpy.ndarray) \
               or str(rval.dtype) != node.outputs[1].type.dtype:
-            rval = theano._asarray(rval, dtype = node.outputs[1].type.dtype)
+            rval = theano._asarray(rval, dtype=node.outputs[1].type.dtype)
        # When shape is None, numpy has a tendency to unexpectedly
        # return a scalar instead of a higher-dimension array containing
        # only one element. This value should be reshaped
        if shape is None and rval.ndim == 0 and self.outtype.ndim > 0:
-            rval = rval.reshape([1]*self.outtype.ndim)
+            rval = rval.reshape([1] * self.outtype.ndim)
        if len(rval.shape) != self.outtype.ndim:
-            raise ValueError('Shape mismatch: "out" should have dimension %i, but the value produced by "perform" has dimension %i'\
+            raise ValueError('Shape mismatch: "out" should have dimension %i,'
+                             ' but the value produced by "perform" has'
+                             ' dimension %i'
                             % (self.outtype.ndim, len(rval.shape)))
        # Check the output has the right shape
        if shape is not None:
            if self.ndim_added == 0 and shape != rval.shape:
-                raise ValueError('Shape mismatch: "out" should have shape %s, but the value produced by "perform" has shape %s'\
+                raise ValueError(
+                    'Shape mismatch: "out" should have shape %s, but the'
+                    ' value produced by "perform" has shape %s'
                    % (shape, rval.shape))
-            elif self.ndim_added > 0 and shape != rval.shape[:-self.ndim_added]:
+            elif (self.ndim_added > 0 and
-                raise ValueError('Shape mismatch: "out" should have shape starting with %s (plus %i extra dimensions), but the value produced by "perform" has shape %s'\
+                  shape != rval.shape[:-self.ndim_added]):
+                raise ValueError(
+                    'Shape mismatch: "out" should have shape starting with'
+                    ' %s (plus %i extra dimensions), but the value produced'
+                    ' by "perform" has shape %s'
                    % (shape, self.ndim_added, rval.shape))
        out[0] = rval
@@ -260,9 +273,11 @@ def _infer_ndim_bcast(ndim, shape, *args):
    # there is a convention that -1 means the corresponding shape of a
    # potentially-broadcasted symbolic arg
    if (isinstance(shape, (tuple, list))
-            and numpy.all(numpy.asarray(shape)>=0)):
+            and numpy.all(numpy.asarray(shape) >= 0)):
-        bcast = [(s==1) for s in shape]
+        bcast = [(s == 1) for s in shape]
-        v_shape = tensor.TensorConstant(type=tensor.lvector, data=theano._asarray(shape, dtype='int64'))
+        v_shape = tensor.TensorConstant(type=tensor.lvector,
+                                        data=theano._asarray(shape,
+                                                             dtype='int64'))
        shape_ndim = len(shape)
        if ndim is None:
            ndim = shape_ndim
@@ -278,21 +293,21 @@ def _infer_ndim_bcast(ndim, shape, *args):
        # This case combines together symbolic and non-symbolic shape
        # information
        if ndim is None:
-            ndim=args_ndim
+            ndim = args_ndim
        else:
            ndim = max(args_ndim, ndim)
        ndim = max(args_ndim, len(shape))
-        shape = [-1]*(ndim - len(shape))+list(shape)
+        shape = [-1] * (ndim - len(shape)) + list(shape)
        bcast = []
        pre_v_shape = []
-        for i,s in enumerate(shape):
+        for i, s in enumerate(shape):
            if hasattr(s, 'type'):  # s is symbolic
                bcast.append(False)  # todo - introspect further
                pre_v_shape.append(s)
            else:
                if s >= 0:
                    pre_v_shape.append(tensor.as_tensor_variable(s))
-                    bcast.append((s==1))
+                    bcast.append((s == 1))
                elif s == -1:
                    n_a_i = 0
                    for a in args:
@@ -301,7 +316,7 @@ def _infer_ndim_bcast(ndim, shape, *args):
                        #           i
                        if i >= ndim - a.ndim:
                            n_a_i += 1
-                            a_i = i + a.ndim -ndim
+                            a_i = i + a.ndim - ndim
                            if not a.broadcastable[a_i]:
                                pre_v_shape.append(a.shape[a_i])
                                bcast.append(False)
@@ -316,7 +331,8 @@ def _infer_ndim_bcast(ndim, shape, *args):
                            bcast.append(True)
                else:
                    ValueError('negative shape', s)
-        # post-condition: shape may still contain both symbolic and non-symbolic things
+        # post-condition: shape may still contain both symbolic and
+        # non-symbolic things
        v_shape = tensor.stack(*pre_v_shape)
    elif shape is None:
@@ -325,7 +341,7 @@ def _infer_ndim_bcast(ndim, shape, *args):
        if not args:
            raise TypeError(('_infer_ndim_bcast cannot infer shape without'
                ' either shape or args'))
-        template = reduce(lambda a,b:a+b, args)
+        template = reduce(lambda a, b: a + b, args)
        v_shape = template.shape
        bcast = template.broadcastable
        ndim = template.ndim
@@ -333,18 +349,22 @@ def _infer_ndim_bcast(ndim, shape, *args):
        v_shape = tensor.as_tensor_variable(shape)
        if ndim is None:
            ndim = tensor.get_vector_length(v_shape)
-        bcast = [False]*ndim
+        bcast = [False] * ndim
-    if not (v_shape.dtype.startswith('int') or v_shape.dtype.startswith('uint')):
+    if (not (v_shape.dtype.startswith('int') or
-        raise TypeError('shape must be an integer vector or list', v_shape.dtype)
+             v_shape.dtype.startswith('uint'))):
+        raise TypeError('shape must be an integer vector or list',
+                        v_shape.dtype)
    if args_ndim > ndim:
-        raise ValueError('ndim should be at least as big as required by args value',
+        raise ValueError(
+            'ndim should be at least as big as required by args value',
            (ndim, args_ndim), args)
    assert ndim == len(bcast)
    return ndim, tensor.cast(v_shape, 'int32'), tuple(bcast)
 def _generate_broadcasting_indices(out_shape, *shapes):
    '''
    Return indices over each shape that broadcast them to match out_shape.
@@ -359,11 +379,11 @@ def _generate_broadcasting_indices(out_shape, *shapes):
    '''
    all_shapes = (out_shape,) + shapes
    # Will contain the return value: a list of indices for each argument
-    ret_indices = [ [()] for shape in all_shapes ]
+    ret_indices = [[()] for shape in all_shapes]
    for dim in xrange(len(out_shape)):
        # Temporary list to generate the indices
-        _ret_indices = [ [] for shape in all_shapes ]
+        _ret_indices = [[] for shape in all_shapes]
        out_range = range(out_shape[dim])
@@ -373,11 +393,14 @@ def _generate_broadcasting_indices(out_shape, *shapes):
        for shape in shapes:
            if shape[dim] == out_shape[dim]:
                ranges.append(out_range)
-            elif shape[dim] == 1: #broadcast
+            elif shape[dim] == 1:  # broadcast
                ranges.append([0] * out_shape[dim])
            else:
-                raise ValueError('shape[%i] (%i) should be equal to out_shape[%i] (%i) or to 1'\
+                raise ValueError(
-                         % (dim, shape[dim], dim, out_shape[dim]), shape, out_shape, shapes)
+                    'shape[%i] (%i) should be equal to out_shape[%i] (%i) or'
+                    ' to 1'
+                    % (dim, shape[dim], dim, out_shape[dim]), shape,
+                    out_shape, shapes)
        for prev_index in zip(*ret_indices):
            for dim_index in zip(*ranges):
@@ -435,7 +458,8 @@ def normal(random_state, size=None, avg=0.0, std=1.0, ndim=None, dtype=None):
    return op(random_state, size, avg, std)
-def binomial(random_state, size=None, n=1, p=0.5, ndim=None, dtype='int64', prob=None):
+def binomial(random_state, size=None, n=1, p=0.5, ndim=None,
+             dtype='int64', prob=None):
    """
    Sample n times with probability of success prob for each trial,
    return the number of successes.
@@ -452,7 +476,7 @@ def binomial(random_state, size=None, n=1, p=0.5, ndim=None, dtype='int64', prob
    n = tensor.as_tensor_variable(n)
    p = tensor.as_tensor_variable(p)
    ndim, size, bcast = _infer_ndim_bcast(ndim, size, n, p)
-    if n.dtype=='int64':
+    if n.dtype == 'int64':
        ### THIS WORKS AROUND A NUMPY BUG on 32bit machine
        ###  Erase when the following works on a 32bit machine:
        ###  numpy.random.binomial(
@@ -460,9 +484,10 @@ def binomial(random_state, size=None, n=1, p=0.5, ndim=None, dtype='int64', prob
        #          p=numpy.asarray([.1, .2, .3], dtype='float64'))
        n = tensor.cast(n, 'int32')
    op = RandomFunction('binomial',
-            tensor.TensorType(dtype = dtype, broadcastable = (False,)*ndim) )
+            tensor.TensorType(dtype=dtype, broadcastable=(False,) * ndim))
    return op(random_state, size, n, p)
 def random_integers_helper(random_state, low, high, size):
    '''
    Helper function to draw random integers.
@@ -477,16 +502,19 @@ def random_integers_helper(random_state, low, high, size):
        out_ndim = max(low.ndim, high.ndim)
    # broadcast low and high to out_ndim dimensions
    if low.ndim > out_ndim:
-        raise ValueError('low.ndim (%i) should not be larger than len(size) (%i)' % (low.ndim, out_ndim),
+        raise ValueError(
+            'low.ndim (%i) should not be larger than len(size) (%i)'
+            % (low.ndim, out_ndim),
            low, size)
    if low.ndim < out_ndim:
-        low = low.reshape((1,)*(out_ndim-low.ndim) + low.shape)
+        low = low.reshape((1,) * (out_ndim - low.ndim) + low.shape)
    if high.ndim > out_ndim:
-        raise ValueError('high.ndim (%i) should not be larger than len(size) (%i)' % (high.ndim, out_ndim),
+        raise ValueError(
-                high, size)
+            'high.ndim (%i) should not be larger than len(size) (%i)'
+            % (high.ndim, out_ndim), high, size)
    if high.ndim < out_ndim:
-        high = high.reshape((1,)*(out_ndim-high.ndim) + high.shape)
+        high = high.reshape((1,) * (out_ndim - high.ndim) + high.shape)
    if size is not None:
        out_size = tuple(size)
@@ -498,14 +526,17 @@ def random_integers_helper(random_state, low, high, size):
    # Build the indices over which to loop
    out = numpy.ndarray(out_size)
-    broadcast_ind = _generate_broadcasting_indices(out_size, low.shape, high.shape)
+    broadcast_ind = _generate_broadcasting_indices(out_size, low.shape,
+                                                   high.shape)
    # Iterate over these indices, drawing one sample at a time from numpy
    for oi, li, hi in zip(*broadcast_ind):
-        out[oi] = random_state.random_integers(low = low[li], high = high[hi])
+        out[oi] = random_state.random_integers(low=low[li], high=high[hi])
    return out
-def random_integers(random_state, size=None, low=0, high=1, ndim=None, dtype='int64'):
+def random_integers(random_state, size=None, low=0, high=1, ndim=None,
+                    dtype='int64'):
    """
    Sample a random integer between low and high, both inclusive.
@@ -522,6 +553,7 @@ def random_integers(random_state, size=None, low=0, high=1, ndim=None, dtype='in
            tensor.TensorType(dtype=dtype, broadcastable=bcast))
    return op(random_state, size, low, high)
 def permutation_helper(random_state, n, shape):
    """Helper function to generate permutations from integers.
@@ -552,6 +584,7 @@ def permutation_helper(random_state, n, shape):
    #print 'RETURNING', out.shape
    return out
 def permutation(random_state, size=None, n=1, ndim=None, dtype='int64'):
    """
    Returns permutations of the integers between 0 and n-1, as many times
@@ -569,10 +602,11 @@ def permutation(random_state, size=None, n=1, ndim=None, dtype='int64'):
    ndim, size, bcast = _infer_ndim_bcast(ndim, size)
    #print "NDIM", ndim, size
    op = RandomFunction(permutation_helper,
-            tensor.TensorType(dtype=dtype, broadcastable=bcast+(False,)),
+            tensor.TensorType(dtype=dtype, broadcastable=bcast + (False,)),
            ndim_added=1)
    return op(random_state, size, n)
 def multinomial_helper(random_state, n, pvals, size):
    '''
    Helper function drawing from multinomial distributions.
@@ -586,21 +620,25 @@ def multinomial_helper(random_state, n, pvals, size):
    if size is not None:
        ndim = len(size)
    else:
-        ndim = max(n.ndim, pvals.ndim-1)
+        ndim = max(n.ndim, pvals.ndim - 1)
-    out_ndim = ndim+1
+    out_ndim = ndim + 1
    # broadcast n to ndim dimensions and pvals to ndim+1
    if n.ndim > ndim:
-        raise ValueError('n.ndim (%i) should not be larger than len(size) (%i)' % (n.ndim, ndim),
+        raise ValueError(
+            'n.ndim (%i) should not be larger than len(size) (%i)'
+            % (n.ndim, ndim),
                n, size)
    if n.ndim < ndim:
-        n = n.reshape((1,)*(ndim-n.ndim) + n.shape)
+        n = n.reshape((1,) * (ndim - n.ndim) + n.shape)
-    if pvals.ndim-1 > ndim:
+    if pvals.ndim - 1 > ndim:
-        raise ValueError('pvals.ndim-1 (%i) should not be larger than len(size) (%i)' % (pvals.ndim-1, ndim),
+        raise ValueError(
+            'pvals.ndim-1 (%i) should not be larger than len(size) (%i)'
+            % (pvals.ndim - 1, ndim),
            pvals, size)
-    if pvals.ndim-1 < ndim:
+    if pvals.ndim - 1 < ndim:
-        pvals = pvals.reshape((1,)*(ndim-pvals.ndim+1) + pvals.shape)
+        pvals = pvals.reshape((1,) * (ndim - pvals.ndim + 1) + pvals.shape)
    if size is not None:
        size = tuple(size)
@@ -609,14 +647,16 @@ def multinomial_helper(random_state, n, pvals, size):
        for dim in xrange(ndim):
            dim_len = max(n.shape[dim], pvals.shape[dim])
            size = size + (dim_len,)
-    out_size = size+(pvals.shape[-1],)
+    out_size = size + (pvals.shape[-1],)
    # Build the indices over which to loop
    # Note that here, the rows (inner-most 1D subtensors) of pvals and out
    # are indexed, not their individual elements
    out = numpy.ndarray(out_size)
-    broadcast_ind = _generate_broadcasting_indices(size, n.shape, pvals.shape[:-1])
+    broadcast_ind = _generate_broadcasting_indices(size, n.shape,
-    # Iterate over these indices, drawing from one multinomial at a time from numpy
+                                                   pvals.shape[:-1])
+    # Iterate over these indices, drawing from one multinomial at a
+    # time from numpy
    assert pvals.min() >= 0
    for mi, ni, pi in zip(*broadcast_ind):
        pvi = pvals[pi]
@@ -627,24 +667,24 @@ def multinomial_helper(random_state, n, pvals, size):
        # In  perfect arithmetic this would be correct, but in float32 or
        # float64 it is too strict.
        pisum = numpy.sum(pvi)
-        if 1.0 < pisum < 1.0+1e-5:#correct if we went a little over
+        if 1.0 < pisum < 1.0 + 1e-5:  # correct if we went a little over
            # because mtrand.pyx has a ValueError that will trigger if
            # sum(pvals[:-1]) > 1.0
            pvi = pvi * (1.0 - 5e-5)
            #pvi = pvi * .9
            pisum = numpy.sum(pvi)
-        elif pvi[-1]<5e-5: #will this even work?
+        elif pvi[-1] < 5e-5:  # will this even work?
            pvi = pvi * (1.0 - 5e-5)
            pisum = numpy.sum(pvi)
-        assert pisum<=1.0, pisum
+        assert pisum <= 1.0, pisum
        out[mi] = random_state.multinomial(n=n[ni],
                                           pvals=pvi.astype('float64'))
    return out
 def multinomial(random_state, size=None, n=1, pvals=[0.5, 0.5],
                ndim=None, dtype='int64'):
-    """
+    """Sample from one or more multinomial distributions defined by
-    Sample from one or more multinomial distributions defined by
    one-dimensional slices in pvals.
    :param pvals: a tensor of shape "nmulti+(L,)" describing each multinomial
@@ -657,15 +697,17 @@ def multinomial(random_state, size=None, n=1, pvals=[0.5, 0.5],
        right in nmulti. (See examples below.)
        Default ``None`` means size=nmulti.
-    :param n: the number of experiments to simulate for each multinomial. This
+    :param n: the number of experiments to simulate for each
-        can be a scalar, or tensor, it will be broadcasted to have shape "nmulti".
+        multinomial. This can be a scalar, or tensor, it will be
+        broadcasted to have shape "nmulti".
    :param dtype: the dtype of the return value (which will represent counts)
-    :returns: tensor of len(size)+1 dimensions, and shape[-1]==L, with the specified ``dtype``,
+    :returns: tensor of len(size)+1 dimensions, and shape[-1]==L, with
-        with the experiment counts.  See examples to understand the shape of the
+        the specified ``dtype``, with the experiment counts.  See
-        return value, which is derived from both size and pvals.shape.
+        examples to understand the shape of the return value, which is
-        In return value rval, "numpy.allclose(rval.sum(axis=-1), n)" will be true.
+        derived from both size and pvals.shape.  In return value rval,
+        "numpy.allclose(rval.sum(axis=-1), n)" will be true.
    For example, to simulate n experiments from each multinomial in a batch of
    size B:
@@ -685,11 +727,12 @@ def multinomial(random_state, size=None, n=1, pvals=[0.5, 0.5],
    Using size for broadcasting of pvals:
-        size=(10,1,-1), pvals.shape=(A,B,L)
+        size=(10, 1, -1), pvals.shape=(A, B, L)
        --> rval.shape=[10,1,B,L], and requires that A==1.
-        rval[l,k,i,j] is the count of possibility j in the distribution specified
+        rval[l,k,i,j] is the count of possibility j in the
-        by pvals[k,i], in the l'th of 10 draws.
+        distribution specified by pvals[k,i], in the l'th of 10
+        draws.
    """
    n = tensor.as_tensor_variable(n)
@@ -697,9 +740,9 @@ def multinomial(random_state, size=None, n=1, pvals=[0.5, 0.5],
    # until ellipsis is implemented (argh)
    tmp = pvals.T[0].T
    ndim, size, bcast = _infer_ndim_bcast(ndim, size, n, tmp)
-    bcast = bcast+(pvals.type.broadcastable[-1],)
+    bcast = bcast + (pvals.type.broadcastable[-1],)
    op = RandomFunction(multinomial_helper,
-            tensor.TensorType(dtype = dtype, broadcastable = bcast),
+            tensor.TensorType(dtype=dtype, broadcastable=bcast),
            ndim_added=1)
    return op(random_state, size, n, pvals)
@@ -708,17 +751,20 @@ def multinomial(random_state, size=None, n=1, pvals=[0.5, 0.5],
 def random_make_inplace(node):
    op = node.op
    if isinstance(op, RandomFunction) and not op.inplace:
-        new_op = RandomFunction(op.fn, op.outtype, inplace=True, ndim_added=op.ndim_added)
+        new_op = RandomFunction(op.fn, op.outtype, inplace=True,
+                                ndim_added=op.ndim_added)
        return new_op.make_node(*node.inputs).outputs
    return False
-optdb.register('random_make_inplace', opt.in2out(random_make_inplace, ignore_newtrees=True), 99, 'fast_run', 'inplace')
+optdb.register('random_make_inplace', opt.in2out(random_make_inplace,
+                                                 ignore_newtrees=True),
+               99, 'fast_run', 'inplace')
 class RandomStreamsBase(object):
-    def binomial(self, size=None, n=1, p=0.5, ndim=None, dtype='int64', prob=None):
+    def binomial(self, size=None, n=1, p=0.5, ndim=None, dtype='int64',
+                 prob=None):
        """
        Sample n times with probability of success prob for each trial,
        return the number of successes.
@@ -754,7 +800,8 @@ class RandomStreamsBase(object):
        """
        return self.gen(normal, size, avg, std, ndim=ndim, dtype=dtype)
-    def random_integers(self, size=None, low=0, high=1, ndim=None, dtype='int64'):
+    def random_integers(self, size=None, low=0, high=1, ndim=None,
+                        dtype='int64'):
        """
        Sample a random integer between low and high, both inclusive.
@@ -762,7 +809,8 @@ class RandomStreamsBase(object):
        ndim may be a plain integer to supplement the missing
        information.
        """
-        return self.gen(random_integers, size, low, high, ndim=ndim, dtype=dtype)
+        return self.gen(random_integers, size, low, high, ndim=ndim,
+                        dtype=dtype)
    def permutation(self, size=None, n=1, ndim=None, dtype='int64'):
        """
@@ -780,7 +828,8 @@ class RandomStreamsBase(object):
        """
        return self.gen(permutation, size, n, ndim=ndim, dtype=dtype)
-    def multinomial(self, size=None, n=1, pvals=[0.5, 0.5], ndim=None, dtype='int64'):
+    def multinomial(self, size=None, n=1, pvals=[0.5, 0.5], ndim=None,
+                    dtype='int64'):
        """
        Sample n times from a multinomial distribution defined by
        probabilities pvals, as many times as required by size. For
@@ -802,7 +851,8 @@ class RandomStreamsBase(object):
        This uses permutation random variable internally, available via
        the ``.permutation`` attribute of the return value.
        """
-        perm = self.permutation(size=input.shape[:-1], n=input.shape[-1], ndim=input.ndim-1)
+        perm = self.permutation(size=input.shape[:-1], n=input.shape[-1],
+                                ndim=input.ndim - 1)
        shuffled = tensor.permute_row_elements(input, perm)
        shuffled.permutation = perm
        return shuffled
--- a/theano/tensor/shared_randomstreams.py
+++ b/theano/tensor/shared_randomstreams.py
-"""Define RandomStreams, providing random number variables for Theano graphs."""
+"""Define RandomStreams, providing random number variables for Theano
+graphs.
+"""
 __docformat__ = "restructuredtext en"
-import copy, sys
+import copy
+import sys
 import numpy
 from theano.gof import Container
-from theano.compile.sharedvalue import SharedVariable, shared_constructor, shared
+from theano.compile.sharedvalue import (SharedVariable, shared_constructor,
+                                        shared)
 import raw_random
 class RandomStateSharedVariable(SharedVariable):
    pass
 @shared_constructor
-def randomstate_constructor(value, name=None, strict=False, allow_downcast=None, borrow=False):
+def randomstate_constructor(value, name=None, strict=False,
+                            allow_downcast=None, borrow=False):
    """SharedVariable Constructor for RandomState"""
    if not isinstance(value, numpy.random.RandomState):
        raise TypeError
@@ -25,17 +33,26 @@ def randomstate_constructor(value, name=None, strict=False, allow_downcast=None,
            strict=strict,
            allow_downcast=allow_downcast)
 class RandomStreams(raw_random.RandomStreamsBase):
-    """Module component with similar interface to numpy.random (numpy.random.RandomState)"""
+    """Module component with similar interface to numpy.random
+    (numpy.random.RandomState)
+    """
    state_updates = []
-    """A list of pairs of the form (input_r, output_r).  This will be over-ridden by the module
+    """A list of pairs of the form (input_r, output_r).  This will be
-    instance to contain stream generators.
+    over-ridden by the module instance to contain stream
+    generators.
    """
    default_instance_seed = None
-    """Instance variable should take None or integer value.  Used to seed the random number
+    """Instance variable should take None or integer value.  Used to
-    generator that provides seeds for member streams"""
+    seed the random number generator that provides seeds for member
+    streams
+    """
    gen_seedgen = None
    """numpy.RandomState instance that gen() uses to seed new streams.
@@ -48,8 +65,10 @@ class RandomStreams(raw_random.RandomStreamsBase):
        """
        :type seed: None or int
-        :param seed: a default seed to initialize the RandomState instances after build.  See
+        :param seed: a default seed to initialize the RandomState
-        `RandomStreamsInstance.__init__` for more details.
+        instances after build.  See `RandomStreamsInstance.__init__`
+        for more details.
        """
        super(RandomStreams, self).__init__()
        self.state_updates = []
@@ -59,47 +78,54 @@ class RandomStreams(raw_random.RandomStreamsBase):
    def seed(self, seed=None):
        """Re-initialize each random stream
-        :param seed: each random stream will be assigned a unique state that depends
+        :param seed: each random stream will be assigned a unique
-        deterministically on this value.
+        state that depends deterministically on this value.
        :type seed: None or integer in range 0 to 2**30
        :rtype: None
        """
        if seed is None:
            seed = self.default_instance_seed
        seedgen = numpy.random.RandomState(seed)
        for old_r, new_r in self.state_updates:
-            old_r_seed = seedgen.randint(2**30)
+            old_r_seed = seedgen.randint(2 ** 30)
            old_r.set_value(numpy.random.RandomState(int(old_r_seed)),
                    borrow=True)
    def __getitem__(self, item):
-        """Retrieve the numpy RandomState instance associated with a particular stream
+        """Retrieve the numpy RandomState instance associated with a
+        particular stream
-        :param item: a variable of type RandomStateType, associated with this RandomStream
+        :param item: a variable of type RandomStateType, associated
+        with this RandomStream
        :rtype: numpy RandomState (or None, before initialize)
-        :note: This is kept for compatibility with `tensor.randomstreams.RandomStreams`.  The
+        :note: This is kept for compatibility with
-        simpler syntax ``item.rng.get_value()`` is also valid.
+        `tensor.randomstreams.RandomStreams`.  The simpler syntax
+        ``item.rng.get_value()`` is also valid.
        """
        return item.get_value(borrow=True)
    def __setitem__(self, item, val):
-        """Set the numpy RandomState instance associated with a particular stream
+        """Set the numpy RandomState instance associated with a
+        particular stream
-        :param item: a variable of type RandomStateType, associated with this RandomStream
+        :param item: a variable of type RandomStateType, associated
+        with this RandomStream
        :param val: the new value
        :type val: numpy RandomState
        :rtype:  None
-        :note: This is kept for compatibility with `tensor.randomstreams.RandomStreams`.  The
+        :note: This is kept for compatibility with
-        simpler syntax ``item.rng.set_value(val)`` is also valid.
+        `tensor.randomstreams.RandomStreams`.  The simpler syntax
+        ``item.rng.set_value(val)`` is also valid.
        """
        item.set_value(val, borrow=True)
@@ -113,12 +139,14 @@ class RandomStreams(raw_random.RandomStreamsBase):
        :param kwargs: interpreted by `op`
-        :returns: The symbolic random draw part of op()'s return value.  This function stores
+        :returns: The symbolic random draw part of op()'s return
-        the updated RandomStateType Variable for use at `build` time.
+        value.  This function stores the updated RandomStateType
+        Variable for use at `build` time.
        :rtype: TensorVariable
        """
-        seed = int(self.gen_seedgen.randint(2**30))
+        seed = int(self.gen_seedgen.randint(2 ** 30))
        random_state_variable = shared(numpy.random.RandomState(seed))
        new_r, out = op(random_state_variable, *args, **kwargs)
        out.rng = random_state_variable

--- a/theano/tensor/signal/tests/test_downsample.py
+++ b/theano/tensor/signal/tests/test_downsample.py
@@ -50,8 +50,8 @@ class TestDownsampleFactorMax(unittest.TestCase):
        for maxpoolshp in maxpoolshps:
            for ignore_border in [True,False]:
-                print 'maxpoolshp =', maxpoolshp
+                #print 'maxpoolshp =', maxpoolshp
-                print 'ignore_border =', ignore_border
+                #print 'ignore_border =', ignore_border
                ## Pure Numpy computation
                numpy_output_val = self.numpy_max_pool_2d(imval, maxpoolshp, ignore_border)
@@ -74,8 +74,8 @@ class TestDownsampleFactorMax(unittest.TestCase):
        for maxpoolshp in maxpoolshps:
            for ignore_border in [True,False]:
-                print 'maxpoolshp =', maxpoolshp
+                #print 'maxpoolshp =', maxpoolshp
-                print 'ignore_border =', ignore_border
+                #print 'ignore_border =', ignore_border
                def mp(input):
                    return DownsampleFactorMax(maxpoolshp, ignore_border=ignore_border)(input)
                utt.verify_grad(mp, [imval], rng=rng)
@@ -89,8 +89,8 @@ class TestDownsampleFactorMax(unittest.TestCase):
        for maxpoolshp in maxpoolshps:
            for ignore_border in [True,False]:
-                print 'maxpoolshp =', maxpoolshp
+                #print 'maxpoolshp =', maxpoolshp
-                print 'ignore_border =', ignore_border
+                #print 'ignore_border =', ignore_border
                numpy_output_val = self.numpy_max_pool_2d(imval, maxpoolshp, ignore_border)
                output = max_pool_2d(images, maxpoolshp, ignore_border)
@@ -110,8 +110,8 @@ class TestDownsampleFactorMax(unittest.TestCase):
        for maxpoolshp in maxpoolshps:
            for ignore_border in [True,False]:
-                print 'maxpoolshp =', maxpoolshp
+                #print 'maxpoolshp =', maxpoolshp
-                print 'ignore_border =', ignore_border
+                #print 'ignore_border =', ignore_border
                numpy_output_val = self.numpy_max_pool_2d(imval, maxpoolshp, ignore_border)
                output = max_pool_2d(images, maxpoolshp, ignore_border)
@@ -144,8 +144,8 @@ class TestDownsampleFactorMax(unittest.TestCase):
        for maxpoolshp in maxpoolshps:
            for ignore_border in [True,False]:
-                print 'maxpoolshp =', maxpoolshp
+                #print 'maxpoolshp =', maxpoolshp
-                print 'ignore_border =', ignore_border
+                #print 'ignore_border =', ignore_border
                numpy_output_val = self.numpy_max_pool_2d(imval, maxpoolshp, ignore_border)
                output = max_pool_2d(images, maxpoolshp, ignore_border)

--- a/theano/tensor/tests/mlp_test.py
+++ b/theano/tensor/tests/mlp_test.py
@@ -264,7 +264,7 @@ def test_mlp():
    ######################
    # BUILD ACTUAL MODEL #
    ######################
-    print '... building the model'
+    #print '... building the model'
    # allocate symbolic variables for the data
    index = T.lscalar()    # index to a [mini]batch
@@ -302,8 +302,8 @@ def test_mlp():
                x:train_set_x[index*batch_size:(index+1)*batch_size],
                y:train_set_y[index*batch_size:(index+1)*batch_size]},
            mode=mode)
-    print 'MODEL 1'
+    #print 'MODEL 1'
-    theano.printing.debugprint(train_model, print_type=True)
+    #theano.printing.debugprint(train_model, print_type=True)
    assert any([isinstance(i.op,T.nnet.CrossentropySoftmax1HotWithBiasDx) for i in train_model.maker.env.toposort()])
    # Even without FeatureShape
@@ -313,9 +313,9 @@ def test_mlp():
            givens={
                x:train_set_x[index*batch_size:(index+1)*batch_size],
                y:train_set_y[index*batch_size:(index+1)*batch_size]})
-    print
+    #print
-    print 'MODEL 2'
+    #print 'MODEL 2'
-    theano.printing.debugprint(train_model, print_type=True)
+    #theano.printing.debugprint(train_model, print_type=True)
    assert any([isinstance(i.op,T.nnet.CrossentropySoftmax1HotWithBiasDx) for i in train_model.maker.env.toposort()])
 if __name__ == '__main__':

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -3049,7 +3049,7 @@ class T_Join_and_Split(unittest.TestCase):
        s = stack(a, b, a, b)
        f = function([a, b], s, mode=self.mode)
        val = f(1, 2)
-        print val
+        #print val
        self.assertTrue(numpy.all(val == [1, 2, 1, 2]))
        topo = f.maker.env.toposort()
        assert len([n for n in topo if isinstance(n.op, opt.MakeVector)]) > 0
@@ -3588,8 +3588,8 @@ class T_add(unittest.TestCase):
                     ("/", lambda x,y: x/y))
            for s, fn in tests:
                f = inplace_func([a,b], fn(a, b))
-                print 'valid output:', fn(a.data, b.data)
+                #print 'valid output:', fn(a.data, b.data)
-                print 'theano output:', f(a.data, b.data)
+                #print 'theano output:', f(a.data, b.data)
                self.assertTrue(a.type.values_eq_approx(fn(a.data, b.data), f(a.data, b.data)))
    def test_grad_scalar_l(self):
@@ -4385,8 +4385,8 @@ class TestARange(unittest.TestCase):
        df = function([dstart, dstop], dout)
        assert dout.dtype == dstart.type.dtype
-        print df(0.2, 5.3)
+        #print df(0.2, 5.3)
-        print numpy.arange(0.2, 5.3)
+        #print numpy.arange(0.2, 5.3)
        assert numpy.all(df(0.2, 5.3) == numpy.arange(0.2, 5.3))
        assert numpy.all(df(0.8, 5.3) == numpy.arange(0.8, 5.3))
        assert numpy.all(df(-0.7, 5.3) == numpy.arange(-0.7, 5.3))
@@ -4957,8 +4957,8 @@ def test_var():
    f = function([a], var(a))
    a_val = numpy.arange(60).reshape(3,4,5)
-    print numpy.var(a_val)
+    #print numpy.var(a_val)
-    print f(a_val)
+    #print f(a_val)
    assert numpy.allclose(numpy.var(a_val), f(a_val))
    f = function([a], var(a, axis=0))
@@ -4994,9 +4994,9 @@ def test_default():
         "It is actually a problem of DEBUG_MODE, see #626."))
 def test_default_state():
    x, y = scalars('xy')
-    print config.floatX
+    #print config.floatX
-    print x.type
+    #print x.type
-    print y.type
+    #print y.type
    z = default(x, 3.8)
    new_x = y + z
    f = function([y, compile.In(x, update = new_x, value = 12.0)], new_x)

--- a/theano/tensor/tests/test_naacl09.py
+++ b/theano/tensor/tests/test_naacl09.py
@@ -185,7 +185,7 @@ class QuadraticDenoisingAA(module.Module):
        #self.validate = theano.Method(self.input, [self.cost, self.output])
    def _instance_initialize(self, obj, input_size, hidden_size, seed, lr, qfilter_relscale):
-        print 'QDAA init'
+        #print 'QDAA init'
        """
        qfilter_relscale is the initial range for any quadratic filters (relative to the linear
        filter's initial range)
@@ -454,11 +454,11 @@ class ConvolutionalMLP(module.FancyModule):
            i.initialize(input_size=self.input_size,
                    hidden_size=self.input_representation_size, noise_level=noise_level,
                    seed=int(R.random_integers(2**30)), lr=lr, qfilter_relscale=qfilter_relscale)
-            print type(i.w1)
+            #print type(i.w1)
            assert isinstance(i.w1, N.ndarray)
        for i in self.input_representations[1:]:
-            print type(i.w1)
+            #print type(i.w1)
            assert isinstance(i.w1, N.ndarray)
            assert (i.w1 == self.input_representations[0].w1).all()
            assert (i.w2 == self.input_representations[0].w2).all()
@@ -528,7 +528,7 @@ def create_realistic(window_size=3,#7,
 def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
        optimizer=None, realistic=False):
-    print "BUILDING MODEL"
+    #print "BUILDING MODEL"
    import time
    t = time.time()
@@ -545,7 +545,7 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
    else:
        m = create(compile_mode=mode)
-    print 'BUILD took %.3fs'%(time.time() - t)
+    #print 'BUILD took %.3fs'%(time.time() - t)
    prog_str = []
    idx_of_node = {}
    for i, node in enumerate(m.pretraining_update.maker.env.toposort()):
@@ -557,7 +557,7 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
    #print input_pretraining_gradients[4].owner.inputs[1].owner.inputs
    #sys.exit()
-    print "PROGRAM LEN %i HASH %i"% (len(m.pretraining_update.maker.env.nodes), reduce(lambda a, b: hash(a) ^ hash(b),prog_str))
+    #print "PROGRAM LEN %i HASH %i"% (len(m.pretraining_update.maker.env.nodes), reduce(lambda a, b: hash(a) ^ hash(b),prog_str))
    rng = N.random.RandomState(unittest_tools.fetch_seed(23904))
@@ -565,35 +565,35 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
    targets = N.asarray([0,3,4,2,3,4,4,2,1,0])
    #print inputs
-    print 'UNSUPERVISED PHASE'
+    #print 'UNSUPERVISED PHASE'
    t = time.time()
    for i in xrange(3):
        for j in xrange(iters_per_unsup):
            m.pretraining_update(*inputs)
        s0, s1 = [str(j) for j in m.pretraining_update(*inputs)]
-        print 'huh?', i, iters_per_unsup, iters_per_unsup * (i+1), s0, s1
+        #print 'huh?', i, iters_per_unsup, iters_per_unsup * (i+1), s0, s1
    if iters_per_unsup == 3:
        assert s0.startswith('0.927793')#'0.403044')
        assert s1.startswith('0.068035')#'0.074898')
-    print 'UNSUPERVISED took %.3fs'%(time.time() - t)
+    #print 'UNSUPERVISED took %.3fs'%(time.time() - t)
-    print 'FINETUNING GRAPH'
+    #print 'FINETUNING GRAPH'
-    print 'SUPERVISED PHASE COSTS (%s)'%optimizer
+    #print 'SUPERVISED PHASE COSTS (%s)'%optimizer
    t = time.time()
    for i in xrange(3):
        for j in xrange(iters_per_unsup):
            m.finetuning_update(*(inputs + [targets]))
        s0 = str(m.finetuning_update(*(inputs + [targets])))
-        print iters_per_sup * (i+1), s0
+        #print iters_per_sup * (i+1), s0
    if iters_per_sup == 10:
        s0f = float(s0)
        assert 19.7042 < s0f and s0f < 19.7043
-    print 'SUPERVISED took %.3fs'%( time.time() - t)
+    #print 'SUPERVISED took %.3fs'%( time.time() - t)
 def jtest_main():
    from theano import gof
    JTEST = theano.compile.mode.optdb.query(*sys.argv[2:])
-    print 'JTEST', JTEST
+    #print 'JTEST', JTEST
    theano.compile.register_optimizer('JTEST', JTEST)
    optimizer = eval(sys.argv[1])
    test_naacl_model(optimizer, 10, 10, realistic=False)