Merge pull request #630 from nouiz/pycuda_init

Pycuda init

Merge pull request #630 from nouiz/pycuda_init
42809e8b · lamblin · cbe15896 · 4d93ae26 · 42809e8b · 42809e8b
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -22,6 +22,8 @@ Bug fixes
 * Fixed many subtle bugs involving mutable default arguments which may have
   led to unexpected behaviour, such as objects sharing instance variables
   they were not supposed to share. (David W-F)
+ * Correctly record the GPU device number used when we let the driver select it.
+   (Frederic B.)
 Documentation
 * Added in the tutorial documentation on how to extend Theano.

--- a/theano/compile/tests/test_builders.py
+++ b/theano/compile/tests/test_builders.py
@@ -22,9 +22,9 @@ class T_OpFromGraph(unittest.TestCase):
        xv = numpy.ones((2, 2), dtype=config.floatX)
        yv = numpy.ones((2, 2), dtype=config.floatX)*3
        zv = numpy.ones((2, 2), dtype=config.floatX)*5
-        print function, function.__module__
+        #print function, function.__module__
-        print fn.maker.env.toposort()
+        #print fn.maker.env.toposort()
-        print fn(xv, yv, zv)
+        fn(xv, yv, zv)
        assert numpy.all(8.0 == fn(xv, yv, zv))
        assert numpy.all(8.0 == fn(xv, yv, zv))

--- a/theano/compile/tests/test_debugmode.py
+++ b/theano/compile/tests/test_debugmode.py
@@ -13,7 +13,7 @@ import unittest
 def test0():
    x = theano.tensor.dvector()
    f = theano.function([x], ((2. * x) + 7) / 2., mode=debugmode.DebugMode())
-    print f([1, 2])
+    f([1, 2])
 class BROKEN_ON_PURPOSE_Add(gof.Op):
@@ -211,7 +211,7 @@ def test_badclinkeroutput():
    try:
        f_inconsistent([1.0, 2.0, 3.0], [2, 3, 4])
    except debugmode.BadCLinkerOutput, e:
-        print repr(e)
+        #print repr(e)
        assert e.r.owner.op is inconsistent
        return  # TEST PASS
@@ -490,7 +490,7 @@ class Test_ViewMap(unittest.TestCase):
            f([1, 2, 3, 4], [5, 6, 7, 8])
            assert False  # DebugMode should have caught the error
        except debugmode.BadViewMap, e:
-            print e
+            #print e
            pass
        # the situation can be rescued by picking one of the inputs and
@@ -554,7 +554,7 @@ class Test_check_isfinite(unittest.TestCase):
        #inf should go through
        infs = numpy.asarray([1.0, 1., 1.]) / 0
-        print infs
+        #print infs
        f(infs)
        return
@@ -576,11 +576,11 @@ class BrokenCImplementationAdd(gof.Op):
        return r
    def perform(self, node, inp, out_):
-        print 'executing python perform'
+        #print 'executing python perform'
        a, b = inp
        out, = out_
        z = a + b
-        print 'out[0] was:', out[0]
+        #print 'out[0] was:', out[0]
        out[0] = z
    def c_code_cache_version(self):
@@ -671,8 +671,8 @@ class Test_preallocated_output(unittest.TestCase):
            f = theano.function([a, b], out, mode='DEBUG_MODE')
            out_val = f(a_val, b_val)
-            print 'out_val =', out_val
+            #print 'out_val =', out_val
-            print out_val.strides
+            #print out_val.strides
            # Should work for now (0.4.0), because the C thunk does not care
            # at all of what is in storage_map initially.
@@ -682,8 +682,8 @@ class Test_preallocated_output(unittest.TestCase):
            f = theano.function([a, b], out, mode='DEBUG_MODE')
            out_val = f(a_val, b_val)
-            print 'out_val =', out_val
+            #print 'out_val =', out_val
-            print out_val.strides
+            #print out_val.strides
        finally:
            config.DebugMode.check_preallocated_output = init_conf_val
--- a/theano/compile/tests/test_function_module.py
+++ b/theano/compile/tests/test_function_module.py
@@ -307,7 +307,7 @@ class T_function(unittest.TestCase):
    def test_constant_output(self):
        # Test that if the output is a constant, we respect the theano memory interface
        f = theano.function([],theano.tensor.constant([4]))
-        print f.maker.env.toposort()
+        #print f.maker.env.toposort()
        out = f()
        assert (out==4).all()
        out[0]=3
@@ -318,7 +318,7 @@ class T_function(unittest.TestCase):
        # Test that if the output is a constant and borrow, we respect the theano memory interface
        f = theano.function([],Out(theano.tensor.constant([4]), borrow=True))
-        print f.maker.env.toposort()
+        #print f.maker.env.toposort()
        out = f()
        assert (out==4).all()
        out[0]=3
@@ -412,8 +412,8 @@ class T_picklefunction(unittest.TestCase):
        self.assertFalse(x in g.container)
        self.assertFalse(x in g.value)
        self.assertTrue(len(f.defaults) == len(g.defaults))
-        print 'f.defaults = %s' % (f.defaults, )
+        #print 'f.defaults = %s' % (f.defaults, )
-        print 'g.defaults = %s' % (g.defaults, )
+        #print 'g.defaults = %s' % (g.defaults, )
        self.assertTrue(all([f_req == g_req and f_feed == g_feed and
            f_val == g_val
            for ((f_req, f_feed, f_val), (g_req, g_feed, g_val)) in zip(

--- a/theano/compile/tests/test_inplace_opt_for_value.py
+++ b/theano/compile/tests/test_inplace_opt_for_value.py
@@ -187,7 +187,7 @@ class ExampleRNN(Module):
        self.minimizer = minimizer([x, y], self.cost, self.params)
    def _instance_initialize(self, obj):
-        print 'INITIALIZE EXAMPLE RNN'
+        #print 'INITIALIZE EXAMPLE RNN'
        n_vis = self.n_vis
        rng = N.random.RandomState(unittest_tools.fetch_seed(2342))
@@ -214,7 +214,7 @@ def test_example_rnn():
    LAG = 4
    y[LAG:] = x[:-LAG, 0:n_out]
-    if 1:
+    if 0:
        for i, node in enumerate(rnn.minimizer.step_cost.maker.env.toposort()):
            print i, node
@@ -223,9 +223,6 @@ def test_example_rnn():
        niter=30
    for i in xrange(niter):
-        if i % 100 == 0:
-            print i, rnn.minimizer.step_cost(x, y), rnn.minimizer.stepsize
-        else:
        rnn.minimizer.step_cost(x, y)
    if theano.config.mode=='DEBUG_MODE':
        assert rnn.minimizer.step_cost(x,y) < -.9 #it starts around -.28
@@ -258,7 +255,7 @@ def test_WEIRD_STUFF():
 #    rnn2 = rnn_module1.make(mode=Mode('c|py', 'fast_run').excluding("inplace_opt"))#work
 #    rnn2 = rnn_module1.make(mode=Mode('py', 'fast_run'))#fail
    m = Mode('py', 'fast_run')
-    for n in m.optimizer: print n.name
+#    for n in m.optimizer: print n.name
    if 0:
        topo1=rnn1.minimizer.step_cost.maker.env.toposort()
@@ -266,7 +263,7 @@ def test_WEIRD_STUFF():
        for i in range(len(topo1)):
            print '1',i, topo1[i]
            print '2',i, topo2[i]
-    if 1:
+    if 0:
        topo1=rnn1.minimizer.step.maker.env.toposort()
        topo2=rnn2.minimizer.step.maker.env.toposort()
        for i in range(len(topo1)):
@@ -274,10 +271,10 @@ def test_WEIRD_STUFF():
            print '2',i, topo2[i]
    import theano.printing
-    print len(rnn1.minimizer.step.maker.inputs)
+    #print len(rnn1.minimizer.step.maker.inputs)
-    print len(rnn2.minimizer.step.maker.inputs)
+    #print len(rnn2.minimizer.step.maker.inputs)
-    print rnn1.minimizer.step.maker.inputs
+    #print rnn1.minimizer.step.maker.inputs
-    print rnn2.minimizer.step.maker.inputs
+    #print rnn2.minimizer.step.maker.inputs
@@ -293,15 +290,15 @@ def test_WEIRD_STUFF():
    niter=3
    for i in xrange(niter):
-        print rnn1.minimizer.step_cost(x, y)
+        #print rnn1.minimizer.step_cost(x, y)
-        print rnn2.minimizer.step_cost(x, y)
+        #print rnn2.minimizer.step_cost(x, y)
    #    assert rnn1.n_vis != rnn2.n_vis or slef.n_hid != rnn2.n_hid or rnn1.n_out != rnn2.n_out
        assert (N.abs(rnn1.z0-rnn2.z0)<1e-8).all()
-        print (N.abs(rnn1.w-rnn2.w)<1e-8).all()
+        #print (N.abs(rnn1.w-rnn2.w)<1e-8).all()
-        print (N.abs(rnn1.w-rnn2.w))
+        #print (N.abs(rnn1.w-rnn2.w))
-        print rnn1.w
+        #print rnn1.w
-        print rnn2.w
+        #print rnn2.w
        assert (N.abs(rnn1.w-rnn2.w)<1e-8).all()
    #    assert b

--- a/theano/compile/tests/test_misc.py
+++ b/theano/compile/tests/test_misc.py
@@ -18,7 +18,7 @@ class NNet(object):
        self.lr = shared(lr, 'learning_rate')
        self.w1 = shared(numpy.zeros((n_hidden, n_input)), 'w1')
        self.w2 = shared(numpy.zeros((n_output, n_hidden)), 'w2')
-        print self.lr.type
+        #print self.lr.type
        self.hidden = sigmoid(tensor.dot(self.w1, self.input))
        self.output = tensor.dot(self.w2, self.hidden)
@@ -51,7 +51,7 @@ class TestNnet(unittest.TestCase):
                output, cost = nnet.sgd_step(input, target)
                mean_cost += cost
            mean_cost /= float(len(data))
-            print 'Mean cost at epoch %s: %s' % (epoch, mean_cost)
+            #print 'Mean cost at epoch %s: %s' % (epoch, mean_cost)
        self.assertTrue(abs(mean_cost - 0.20588975452) < 1e-6)
        # Just call functions to make sure they do not crash.
        out = nnet.compute_output(input)

--- a/theano/compile/tests/test_modes.py
+++ b/theano/compile/tests/test_modes.py
@@ -32,7 +32,7 @@ class T_bunch_of_modes(unittest.TestCase):
            # test that it runs something
            f([[1, 2], [3, 4]], [5, 6])
            linker_classes_involved.append(f.maker.mode.linker.__class__)
-            print 'MODE:', mode, f.maker.mode.linker, 'stop'
+#            print 'MODE:', mode, f.maker.mode.linker, 'stop'
        # regression check:
        # there should be
        # - VM_Linker

--- a/theano/compile/tests/test_module.py
+++ b/theano/compile/tests/test_module.py
@@ -146,7 +146,7 @@ class T_module(unittest.TestCase):
            #assign 4 and 5 to the two variables' containers in m
            m.l = [4, 5]
-            print 'm.f', m.f()
+            m.f()
            assert numpy.all(5 == m.f())
            assert numpy.all(4 == m.g())
@@ -189,9 +189,9 @@ class T_module(unittest.TestCase):
            assert 5 == m.f()
            assert 4 == m.g()
-        print 'dscalar test'
+        #print 'dscalar test'
        local_test(lambda:T.dscalar(),lambda:T.dscalar())
-        print 'value test'
+        #print 'value test'
        local_test(lambda:T.value(1),lambda:T.value(2))
@@ -494,9 +494,9 @@ class T_module(unittest.TestCase):
        M.a = [1,2,3]
        M.make()
        m = M.make()
-        print m.a
+        #print m.a
-        print m.a[0], type(m.a[0]), m.a[0] == 1
+        #print m.a[0], type(m.a[0]), m.a[0] == 1
-        print list(m.a)
+        #print list(m.a)
        assert list(m.a) == [1,2,3]
        assert m.a is not M.a
        try:
@@ -545,7 +545,8 @@ def test_multiple_references():
            self.sub_module = sub_module
        def _instance_initialize(self, obj):
-            print 'Initializing A'
+            pass
+            #print 'Initializing A'
    class B(theano.Module):
@@ -555,7 +556,8 @@ def test_multiple_references():
            self.sub_module = sub_module
        def _instance_initialize(self, obj):
-            print 'Initializing B'
+            pass
+            #print 'Initializing B'
    class C(theano.Module):
@@ -565,11 +567,11 @@ def test_multiple_references():
            self.value = theano.tensor.scalar()
        def _instance_initialize(self, obj):
-            print 'Initializing C'
+            #print 'Initializing C'
            obj.value = 0
        def _instance_set(self, obj, value):
-            print 'Setting C'
+            #print 'Setting C'
            obj.value = value
@@ -584,7 +586,7 @@ def test_multiple_references():
            self.bug = theano.tensor.scalar()
        def _instance_initialize(self, obj):
-            print 'Initializing D'
+            #print 'Initializing D'
            obj.c.set(1)

--- a/theano/compile/tests/test_pfunc.py
+++ b/theano/compile/tests/test_pfunc.py
@@ -369,7 +369,6 @@ class Test_pfunc(unittest.TestCase):
            z: (((x * 5) + y) ** z)})
        up()
-        print x.get_value(borrow=True)
        assert numpy.all(x.get_value() == 20)
        assert numpy.all(y.get_value() == 24)
        assert numpy.all(z.get_value() == (24 ** 2))
@@ -380,7 +379,6 @@ class Test_pfunc(unittest.TestCase):
        f = pfunc([], [x])
        f()
-        print x.get_value()
        assert x.get_value() == 1
        del x.default_update
@@ -399,32 +397,26 @@ class Test_pfunc(unittest.TestCase):
        # Test that the default update is taken into account in the right cases
        f1 = pfunc([], [x], no_default_updates=True)
        f1()
-        print x.get_value()
        assert x.get_value() == 0
        f2 = pfunc([], [x], no_default_updates=[x])
        f2()
-        print x.get_value()
        assert x.get_value() == 0
        f3 = pfunc([], [x], no_default_updates=[x, y])
        f3()
-        print x.get_value()
        assert x.get_value() == 0
        f4 = pfunc([], [x], no_default_updates=[y])
        f4()
-        print x.get_value()
        assert x.get_value() == 2
        f5 = pfunc([], [x], no_default_updates=[])
        f5()
-        print x.get_value()
        assert x.get_value() == 4
        f5 = pfunc([], [x], no_default_updates=False)
        f5()
-        print x.get_value()
        assert x.get_value() == 6
        self.assertRaises(TypeError, pfunc, [], [x], no_default_updates=(x))
@@ -435,32 +427,26 @@ class Test_pfunc(unittest.TestCase):
        # Mix explicit updates and no_default_updates
        g1 = pfunc([], [x], updates=[(x, (x - 1))], no_default_updates=True)
        g1()
-        print x.get_value()
        assert x.get_value() == 5
        g2 = pfunc([], [x], updates=[(x, (x - 1))], no_default_updates=[x])
        g2()
-        print x.get_value()
        assert x.get_value() == 4
        g3 = pfunc([], [x], updates=[(x, (x - 1))], no_default_updates=[x, y])
        g3()
-        print x.get_value()
        assert x.get_value() == 3
        g4 = pfunc([], [x], updates=[(x, (x - 1))], no_default_updates=[y])
        g4()
-        print x.get_value()
        assert x.get_value() == 2
        g5 = pfunc([], [x], updates=[(x, (x - 1))], no_default_updates=[])
        g5()
-        print x.get_value()
        assert x.get_value() == 1
        g5 = pfunc([], [x], updates=[(x, (x - 1))], no_default_updates=False)
        g5()
-        print x.get_value()
        assert x.get_value() == 0
    def test_default_updates_expressions(self):
@@ -473,17 +459,14 @@ class Test_pfunc(unittest.TestCase):
        f1 = pfunc([a], z)
        f1(12)
-        print x
        assert x.get_value() == 1
        f2 = pfunc([a], z, no_default_updates=True)
        assert f2(7) == 7
-        print x
        assert x.get_value() == 1
        f3 = pfunc([a], z, no_default_updates=[x])
        assert f3(9) == 9
-        print x
        assert x.get_value() == 1
    def test_default_updates_multiple(self):
@@ -524,7 +507,6 @@ class Test_pfunc(unittest.TestCase):
        f1 = pfunc([], [x])
        f1()
-        print x.get_value(), y.get_value(), z.get_value()
        assert x.get_value() == 1
        assert y.get_value() == -1
        assert z.get_value() == -2
@@ -598,10 +580,8 @@ class Test_pfunc(unittest.TestCase):
        b = 2 * a
        # Use only the tip of the graph, a is not used
        f = pfunc([b], b)
-        print 'a.get_value() =', a.get_value()
        assert a.get_value() == 0
        f(21)
-        print 'a.get_value() =', a.get_value()
        assert a.get_value() == 0
    def test_givens_replaces_shared_variable(self):
@@ -917,7 +897,7 @@ class Test_aliasing_rules(unittest.TestCase):
        data_of_b = data_of(B)
        f = pfunc([], [], updates=[(A, B[:, ::-1]), (B, A.T)])
-        theano.printing.debugprint(f)
+        #theano.printing.debugprint(f)
        f()
        # correctness (doesn't actually test the view...)
        assert numpy.all(data_of(A) == -.5)
@@ -938,7 +918,6 @@ class Test_aliasing_rules(unittest.TestCase):
            assert numpy.all(data_of(B) < 5)
            data_of_a += 10
-            print data_of(B)
            assert numpy.all(data_of(B) > 5)
            data_of_a -= 10

--- a/theano/gof/tests/test_cc.py
+++ b/theano/gof/tests/test_cc.py
@@ -195,8 +195,8 @@ def test_clinker_literal_inlining():
    fn = lnk.make_function()
    assert abs(fn(2.0, 2.0) + 0.12345678) < 1e-9
    code = lnk.code_gen()
-    print "=== Code generated ==="
+    #print "=== Code generated ==="
-    print code
+    #print code
    assert "4.12345678" in code  # we expect the number to be inlined

--- a/theano/gof/tests/test_destroyhandler.py
+++ b/theano/gof/tests/test_destroyhandler.py
@@ -110,22 +110,22 @@ class FailureWatch:
 def consistent(g):
-    print "Testing consistent:", g
+    #print "Testing consistent:", g
    try:
        assert g.consistent()
    except AssertionError:
        print "Test failed! The graph was marked as NOT consistent."
        raise
-    print "Test OK"
+    #print "Test OK"
 def inconsistent(g):
-    print "Testing NOT consistent:", g
+    #print "Testing NOT consistent:", g
    try:
        assert not g.consistent()
    except AssertionError:
        print "Test failed! The graph was marked as consistent."
        raise
-    print "Test OK"
+    #print "Test OK"

--- a/theano/gof/tests/test_vm.py
+++ b/theano/gof/tests/test_vm.py
@@ -74,10 +74,10 @@ def test_speed():
        numpy_version(x, steps_a)
        t0 = time.time()
-        print numpy_version(x, steps_a)
+        #print numpy_version(x, steps_a)
        t1 = time.time()
        t2 = time.time()
-        print numpy_version(x, steps_b)
+        #print numpy_version(x, steps_b)
        t3 = time.time()
        t_a = t1 - t0
        t_b = t3 - t2
@@ -103,15 +103,15 @@ def test_speed():
                #profile='f_b speed test %s'%name,
                )
-        print f_a([2.0, 3.0])
+        f_a([2.0, 3.0])
        t0 = time.time()
-        print f_a([2.0, 3.0])
+        f_a([2.0, 3.0])
        t1 = time.time()
-        print f_b([2.0, 3.0])
+        f_b([2.0, 3.0])
        t2 = time.time()
-        print f_b([2.0, 3.0])
+        f_b([2.0, 3.0])
        t3 = time.time()
        t_a = t1 - t0
@@ -155,15 +155,15 @@ def test_speed_lazy():
                #profile='f_b lazy ifelse %s'%name,
                )
-        print f_a([2.0])
+        f_a([2.0])
        t0 = time.time()
-        print f_a([2.0])
+        f_a([2.0])
        t1 = time.time()
-        print f_b([2.0])
+        f_b([2.0])
        t2 = time.time()
-        print f_b([2.0])
+        f_b([2.0])
        t3 = time.time()
        t_a = t1 - t0

--- a/theano/misc/pycuda_init.py
+++ b/theano/misc/pycuda_init.py
 import os
+import warnings
 import theano
 import theano.sandbox.cuda as cuda
+cuda_ndarray = cuda.cuda_ndarray.cuda_ndarray
-def select_gpu_from_theano():
+def set_gpu_from_theano():
-    # Transfer the theano gpu binding to pycuda, for consistency
+    """
-    theano_to_pycuda_device_map = {"cpu": "0",
+    This set the GPU used by PyCUDA to the same as the one used by Theano.
-                                   "gpu0": "0",
+    """
-                                   "gpu1": "1",
+    #import pdb;pdb.set_trace()
-                                   "gpu2": "2",
+    if cuda.use.device_number is None:
-                                   "gpu3": "3"}
+        cuda.use("gpu",
-    dev = theano_to_pycuda_device_map.get(theano.config.device, "0")
+                 force=False,
-    if theano.config.device == 'gpu':
+                 default_to_move_computation_to_gpu=False,
-        dev = str(cuda.cuda_ndarray.cuda_ndarray.active_device_number())
+                 move_shared_float32_to_gpu=False,
-    os.environ["CUDA_DEVICE"] = dev
+                 enable_cuda=True,
+                 test_driver=True)
-select_gpu_from_theano()
+    assert cuda.use.device_number == cuda_ndarray.active_device_number()
+#    os.environ["CUDA_DEVICE"] = str(cuda.use.device_number)
+set_gpu_from_theano()
 pycuda_available = False
-try:
+if False:
+    try:
        import pycuda
        import pycuda.autoinit
        pycuda_available = True
-except ImportError:
+    except ImportError:
        # presumably, the user wanted to use pycuda, else they wouldn't have
        # imported this module, so issue a warning that the import failed.
-    import warnings
        warnings.warn("PyCUDA import failed in theano.misc.pycuda_init")
--- a/theano/sandbox/cuda/__init__.py
+++ b/theano/sandbox/cuda/__init__.py
@@ -313,11 +313,15 @@ def use(device,
                gpu_init(device)
                use.device_number = device
            else:
-                # This mean we let the driver select the GPU.
+                # This mean the driver should select the GPU.  As we
-                # But default it is always number 0.
+                # need to get the device number now, we force the
-                # If the driver is in exclusive mode, it will always show
+                # selection of the GPU by the driver now and then we
-                # device 0 event if it use something else.
+                # query the active GPU. If we check the active GPU before
-                use.device_number = 0
+                # the device is initialized we will always receive 0
+                # event if another device is selected later.
+                cuda_ndarray.cuda_ndarray.CudaNdarray.zeros((2, 3))
+                use.device_number = active_device_number()
            if test_driver:
                import theano.sandbox.cuda.tests.test_driver
                theano.sandbox.cuda.tests.test_driver.test_nvidia_driver1()

--- a/theano/sandbox/cuda/tests/test_blas.py
+++ b/theano/sandbox/cuda/tests/test_blas.py
@@ -238,7 +238,7 @@ if 0:
                bval = numpy.arange(0,d0*d1).reshape(1,1,d0,d1)
                r = f(bval)[0]
    #            print bval, bval.shape, border
-                print r, r.shape
+                #print r, r.shape
                assert (ret==r).all()
@@ -284,7 +284,7 @@ def test_downsample():
            if float(shp[3]) / ds[1] > 512:
                continue
            for ignore_border in (True, False):
-                print 'test_downsample', shp, ds, ignore_border
+                #print 'test_downsample', shp, ds, ignore_border
                ds_op = DownsampleFactorMax(ds, ignore_border=ignore_border)
                a = tcn.shared_constructor(my_rand(*shp), 'a')

--- a/theano/sandbox/cuda/tests/test_cuda_ndarray.py
+++ b/theano/sandbox/cuda/tests/test_cuda_ndarray.py
@@ -30,7 +30,7 @@ def advantage(cpu_dt, gpu_dt):
        return cpu_dt / gpu_dt
 def test_host_to_device():
-    print >>sys.stdout, 'starting test_host_to_dev'
+    #print >>sys.stdout, 'starting test_host_to_dev'
    for shape in ((), (3,), (2,3), (3,4,5,6)):
        a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
        b = cuda_ndarray.CudaNdarray(a)
@@ -84,7 +84,7 @@ def test_add_iadd_idiv():
            asum = a0 + a1
            t1 = time.time()
            cpu_dt = t1 - t0
-            print shape, 'adding ', a0.size, 'cpu', cpu_dt, 'advantage', advantage(cpu_dt, gpu_dt)
+            #print shape, 'adding ', a0.size, 'cpu', cpu_dt, 'advantage', advantage(cpu_dt, gpu_dt)
            assert numpy.allclose(asum,  numpy.asarray(bsum))
        #test not contiguous version.
@@ -122,7 +122,7 @@ def test_add_iadd_idiv():
        a0 += a1
        t1 = time.time()
        cpu_dt = t1 - t0
-        print shape, 'adding inplace', a0.size, 'cpu', cpu_dt, 'advantage', advantage(cpu_dt, gpu_dt)
+        #print shape, 'adding inplace', a0.size, 'cpu', cpu_dt, 'advantage', advantage(cpu_dt, gpu_dt)
        assert numpy.allclose(a0, numpy.asarray(b0))
        assert numpy.allclose(a0, a0_orig + a1)
@@ -144,7 +144,7 @@ def test_add_iadd_idiv():
        assert numpy.allclose(a0, ((a0_orig+a1)/a1+a1[..., ::-1])/a1[..., ::-1])
 def test_exp():
-    print >>sys.stdout, 'starting test_exp'
+    #print >>sys.stdout, 'starting test_exp'
    for shape in ((), (3,), (2,3), (1,10000000),(10,1000000), (100,100000),(1000,10000),(10000,1000)):
        a0 = theano._asarray(numpy.random.rand(*shape), dtype='float32')
        a1 = a0.copy()
@@ -158,26 +158,26 @@ def test_exp():
        asum = numpy.exp(a1)
        t1 = time.time()
        cpu_dt = t1 - t0
-        print shape, 'adding ', a0.size, 'cpu', cpu_dt, 'advantage', advantage(cpu_dt, gpu_dt)
+        #print shape, 'adding ', a0.size, 'cpu', cpu_dt, 'advantage', advantage(cpu_dt, gpu_dt)
        #c = numpy.asarray(b0+b1)
        if asum.shape:
            assert numpy.allclose(asum, numpy.asarray(bsum))
 def test_copy():
-    print >>sys.stdout, 'starting test_copy'
+    #print >>sys.stdout, 'starting test_copy'
    shape = (500,499)
    a = theano._asarray(numpy.random.rand(*shape), dtype='float32')
-    print >>sys.stdout, '.. creating device object'
+    #print >>sys.stdout, '.. creating device object'
    b = cuda_ndarray.CudaNdarray(a)
-    print >>sys.stdout, '.. copy'
+    #print >>sys.stdout, '.. copy'
    c = copy.copy(b)
-    print >>sys.stdout, '.. deepcopy'
+    #print >>sys.stdout, '.. deepcopy'
    d = copy.deepcopy(b)
-    print >>sys.stdout, '.. comparisons'
+    #print >>sys.stdout, '.. comparisons'
    assert numpy.allclose(a, numpy.asarray(b))
    assert numpy.allclose(a, numpy.asarray(c))
    assert numpy.allclose(a, numpy.asarray(d))
@@ -268,7 +268,7 @@ class test_DimShuffle(unittest.TestCase):
 def test_dot():
-    print >>sys.stdout, 'starting test_dot'
+    #print >>sys.stdout, 'starting test_dot'
    utt.seed_rng()
    rng = numpy.random.RandomState(utt.fetch_seed())
@@ -320,8 +320,8 @@ def test_sum():
    a0sum = a0.sum(axis=0)
    b0sum = b0.reduce_sum([1,0])
-    print 'asum\n',a0sum
+    #print 'asum\n',a0sum
-    print 'bsum\n',numpy.asarray(b0sum)
+    #print 'bsum\n',numpy.asarray(b0sum)
    assert numpy.allclose(a0.sum(axis=0), numpy.asarray(b0.reduce_sum([1,0])))
    assert numpy.allclose(a0.sum(axis=1), numpy.asarray(b0.reduce_sum([0,1])))
@@ -932,7 +932,7 @@ def test_base():
    c = a[0]
    d = c[:,0]
-    print d.shape
+    #print d.shape
    assert c.base is a
    assert d.base is a

--- a/theano/sandbox/cuda/tests/test_mlp.py
+++ b/theano/sandbox/cuda/tests/test_mlp.py
@@ -103,7 +103,7 @@ def run_nnet(use_gpu, n_batch=60, n_in=1024, n_hid=2048, n_out=10,
    mode = get_mode(use_gpu)
-    print 'building pfunc ...'
+    #print 'building pfunc ...'
    train = pfunc([x, y, lr], [loss], mode=mode,
                  updates=[(p, p - g) for p, g in izip(params, gparams)])
@@ -138,9 +138,9 @@ def test_run_nnet():
                    theano.gradient.numeric_grad.abs_rel_err(rval_gpu,
                                                             rval_cpu)
            max_abs_diff = abs_diff.max()
-            print "max abs diff=%e max rel diff=%e n_in=%d n_hid=%d" % (
+            #print "max abs diff=%e max rel diff=%e n_in=%d n_hid=%d" % (
-                max_abs_diff, rel_diff.max(), n_in, n_hid)
+            #    max_abs_diff, rel_diff.max(), n_in, n_hid)
-            print "time cpu: %f, time gpu: %f, speed up %f" % (tc, tg, tc / tg)
+            #print "time cpu: %f, time gpu: %f, speed up %f" % (tc, tg, tc / tg)
            rtol = 1e-4
            if n_in * n_hid >= 2048 * 4096:
                rtol = 7e-4
@@ -192,14 +192,14 @@ def run_conv_nnet1(use_gpu):
    hid_flat = hid.reshape((n_batch, n_hid))
    out = tensor.tanh(tensor.dot(hid_flat, v)+c)
    loss = tensor.sum(0.5 * (out-y)**2 * lr)
-    print 'loss type', loss.type
+    #print 'loss type', loss.type
    params = [w, b, v, c]
    gparams = tensor.grad(loss, params)
    mode = get_mode(use_gpu)
-    print 'building pfunc ...'
+    #print 'building pfunc ...'
    train = pfunc([x,y,lr], [loss], mode=mode, updates=[(p, p-g) for p,g in zip(params, gparams)])
 #    for i, n in enumerate(train.maker.env.toposort()):
@@ -211,7 +211,7 @@ def run_conv_nnet1(use_gpu):
    for i in xrange(n_train):
        rval = train(xval, yval, lr)
-    print 'training done'
+    #print 'training done'
    print_mode(mode)
    return rval
@@ -281,14 +281,14 @@ def run_conv_nnet2(use_gpu): # pretend we are training LeNet for MNIST
    hid_flat = hid1.reshape((n_batch, n_hid))
    out = tensor.tanh(tensor.dot(hid_flat, v)+c)
    loss = tensor.sum(0.5 * (out-y)**2 * lr)
-    print 'loss type', loss.type
+    #print 'loss type', loss.type
    params = [w0, b0, w1, b1, v, c]
    gparams = tensor.grad(loss, params)
    mode = get_mode(use_gpu)
-    print 'building pfunc ...'
+    #print 'building pfunc ...'
    train = pfunc([x,y,lr], [loss], mode=mode, updates=[(p, p-g) for p,g in zip(params, gparams)])
 #    for i, n in enumerate(train.maker.env.toposort()):
@@ -310,7 +310,7 @@ def test_conv_nnet2():
    if True:
        utt.seed_rng()
        rval_cpu = run_conv_nnet2(False)
-        print rval_cpu[0], rval_gpu[0],rval_cpu[0]-rval_gpu[0]
+        #print rval_cpu[0], rval_gpu[0],rval_cpu[0]-rval_gpu[0]
        assert numpy.allclose(rval_cpu, rval_gpu,rtol=1e-4,atol=1e-4)
@@ -350,9 +350,9 @@ def build_conv_nnet2_classif(use_gpu, isize, ksize, n_batch,
    v = shared_fn(0.01*my_randn(n_hid, n_out), 'v')
    c = shared_fn(my_zeros(n_out), 'c')
-    print 'ALLOCATING ARCH: w0 shape', w0.get_value(borrow=True).shape
+    #print 'ALLOCATING ARCH: w0 shape', w0.get_value(borrow=True).shape
-    print 'ALLOCATING ARCH: w1 shape', w1.get_value(borrow=True).shape
+    #print 'ALLOCATING ARCH: w1 shape', w1.get_value(borrow=True).shape
-    print 'ALLOCATING ARCH: v shape', v.get_value(borrow=True).shape
+    #print 'ALLOCATING ARCH: v shape', v.get_value(borrow=True).shape
    x = tensor.Tensor(dtype='float32', broadcastable=(0,1,0,0))('x')
    y = tensor.fmatrix('y')
@@ -375,14 +375,14 @@ def build_conv_nnet2_classif(use_gpu, isize, ksize, n_batch,
    hid_flat = hid1.reshape((n_batch, n_hid))
    out = tensor.nnet.softmax(tensor.dot(hid_flat, v)+c)
    loss = tensor.sum(tensor.nnet.crossentropy_categorical_1hot(out, tensor.argmax(y, axis=1)) * lr)
-    print 'loss type', loss.type
+    #print 'loss type', loss.type
    params = [w0, b0, w1, b1, v, c]
    gparams = tensor.grad(loss, params, warn_type=True)
    mode = get_mode(use_gpu, check_isfinite)
-    print 'building pfunc ...'
+    #print 'building pfunc ...'
    train = pfunc([x,y,lr], [loss], mode=mode, updates=[(p, p-g) for p,g in zip(params, gparams)])
    if verbose:
@@ -437,9 +437,9 @@ def run_conv_nnet2_classif(use_gpu, seed, isize, ksize, bsize,
        print pickle.dumps(mode)
        print "END %s profile mode dump" % device
-    print "%s time: %.3f" % (device, t1-t0)
+    #print "%s time: %.3f" % (device, t1-t0)
-    print "estimated time for one pass through MNIST with %s: %f" % (
+    #print "estimated time for one pass through MNIST with %s: %f" % (
-            device, (t1-t0) * (60000.0 / (n_train*bsize)))
+    #        device, (t1-t0) * (60000.0 / (n_train*bsize)))
 def cmp_run_conv_nnet2_classif(seed, isize, ksize, bsize,
@@ -465,7 +465,7 @@ def cmp_run_conv_nnet2_classif(seed, isize, ksize, bsize,
    orig_float32_atol = theano.tensor.basic.float32_atol
    try:
        if float_atol:
-            print "float_atol", float_atol
+            #print "float_atol", float_atol
            theano.tensor.basic.float32_atol = float_atol
        if gpu_only and cpu_only:
@@ -565,12 +565,12 @@ def cmp_run_conv_nnet2_classif(seed, isize, ksize, bsize,
            print pickle.dumps(gpu_mode)
            print "END GPU profile mode dump"
-    print "CPU time: %.3f, GPU time: %.3f, speed up %f" % (
+    #print "CPU time: %.3f, GPU time: %.3f, speed up %f" % (
-            (time_cpu, time_gpu, time_cpu/time_gpu))
+    #        (time_cpu, time_gpu, time_cpu/time_gpu))
-    print "Estimated time for one pass through MNIST with CPU: %f" % (
+    #print "Estimated time for one pass through MNIST with CPU: %f" % (
-            (time_cpu * (60000.0 / (n_train*bsize))))
+    #        (time_cpu * (60000.0 / (n_train*bsize))))
-    print "Estimated time for one pass through MNIST with GPU: %f" % (
+    #print "Estimated time for one pass through MNIST with GPU: %f" % (
-            (time_gpu * (60000.0 / (n_train*bsize))))
+    #        (time_gpu * (60000.0 / (n_train*bsize))))
 # Default parameters for all subsequent tests

--- a/theano/sparse/sandbox/sp2.py
+++ b/theano/sparse/sandbox/sp2.py
@@ -497,6 +497,13 @@ class StrucutedAddSVCSR(gof.Op):
        return hash(type(self))
    def make_node(self, a_data, a_indices, a_indptr, b):
+        b = tensor.as_tensor_variable(b)
+        a_data = tensor.as_tensor_variable(a_data)
+        a_indices = tensor.as_tensor_variable(a_indices)
+        a_indptr = tensor.as_tensor_variable(a_indptr)
+        assert a_data.type.ndim == 1
+        assert a_indices.type.ndim == 1
+        assert a_indptr.type.ndim == 1
        assert b.type.ndim == 1
        return gof.Apply(self, [a_data, a_indices, a_indptr, b],
                               [tensor.tensor(b.dtype, (False,))])

--- a/theano/tensor/nnet/tests/test_conv3d.py
+++ b/theano/tensor/nnet/tests/test_conv3d.py
@@ -335,7 +335,7 @@ class TestConv3D(unittest.TestCase):
        col_steps  = self.rng.randint(1,4)
        time_steps = self.rng.randint(1,4)
-        print (row_steps,col_steps,time_steps)
+        #print (row_steps,col_steps,time_steps)
        videoDur    = (time_steps-1)*dt+filterDur   + self.rng.randint(0,3)
        videoWidth  = (col_steps-1)*dc+filterWidth  + self.rng.randint(0,3)

--- a/theano/tensor/nnet/tests/test_nnet.py
+++ b/theano/tensor/nnet/tests/test_nnet.py
@@ -112,8 +112,8 @@ class T_SoftmaxWithBias(unittest.TestCase):
        assert softmax_with_bias not in ops
        assert softmax in ops
-        print f([0,1,0])
+        f([0,1,0])
-        print f.maker.env.toposort()
+        #print f.maker.env.toposort()
    def test_infer_shape(self):
        fff=theano.function([],outputs=softmax_with_bias(numpy.random.rand(3,4),numpy.random.rand(4)).shape)
@@ -299,20 +299,20 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
                [op(softmax(x+b), one_of_n)])
        assert env.outputs[0].owner.op == op
-        print 'BEFORE'
+        #print 'BEFORE'
-        for node in env.toposort():
+        #for node in env.toposort():
-            print node.op
+        #    print node.op
-        print printing.pprint(node.outputs[0])
+        #print printing.pprint(node.outputs[0])
-        print '----'
+        #print '----'
        theano.compile.mode.optdb.query(
                theano.compile.mode.OPT_FAST_RUN).optimize(env)
-        print 'AFTER'
+        #print 'AFTER'
-        for node in env.toposort():
+        #for node in env.toposort():
-            print node.op
+        #    print node.op
-        print printing.pprint(node.outputs[0])
+        #print printing.pprint(node.outputs[0])
-        print '===='
+        #print '===='
        assert len(env.toposort()) == 2
        assert str(env.outputs[0].owner.op) == 'OutputGuard'
@@ -330,18 +330,18 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
                [op(softmax(T.add(x,b,c)), one_of_n)])
        assert env.outputs[0].owner.op == op
-        print 'BEFORE'
+        #print 'BEFORE'
-        for node in env.toposort():
+        #for node in env.toposort():
-            print node.op
+        #    print node.op
-        print '----'
+        #print '----'
        theano.compile.mode.optdb.query(
                theano.compile.mode.OPT_FAST_RUN).optimize(env)
-        print 'AFTER'
+        #print 'AFTER'
-        for node in env.toposort():
+        #for node in env.toposort():
-            print node.op
+        #    print node.op
-        print '===='
+        #print '===='
        assert len(env.toposort()) == 3
        assert str(env.outputs[0].owner.op) == 'OutputGuard'
@@ -356,18 +356,18 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
                [x, b, one_of_n],
                [op(softmax(x+b), one_of_n)])
        assert env.outputs[0].owner.op == op
-        print 'BEFORE'
+        #print 'BEFORE'
-        for node in env.toposort():
+        #for node in env.toposort():
-            print node.op
+        #    print node.op
-        print printing.pprint(node.outputs[0])
+        #print printing.pprint(node.outputs[0])
-        print '----'
+        #print '----'
        theano.compile.mode.optdb.query(
                theano.compile.mode.OPT_FAST_RUN).optimize(env)
-        print 'AFTER'
+        #print 'AFTER'
-        for node in env.toposort():
+        #for node in env.toposort():
-            print node.op
+        #    print node.op
-        print '===='
+        #print '===='
        assert len(env.toposort()) == 3
        assert str(env.outputs[0].owner.op) == 'OutputGuard'
        assert env.outputs[0].owner.inputs[0].owner.op == crossentropy_softmax_argmax_1hot_with_bias
@@ -385,16 +385,16 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
                [x, one_of_n],
                [g_x])
-        print 'BEFORE'
+        #print 'BEFORE'
-        for node in env.toposort():
+        #for node in env.toposort():
-            print node.op, node.inputs
+        #    print node.op, node.inputs
-        print '----'
+        #print '----'
        theano.compile.mode.optdb.query(
                theano.compile.mode.OPT_FAST_RUN).optimize(env)
-        print 'AFTER'
+        #print 'AFTER'
-        for node in env.toposort():
+        #for node in env.toposort():
-            print node.op, node.inputs
+        #    print node.op, node.inputs
        # the function has 9 ops because the dimshuffle and elemwise{second} aren't getting
        # cleaned up as well as we'd like.
@@ -428,16 +428,16 @@ class T_CrossentropyCategorical1Hot(unittest.TestCase):
                [x, one_of_n],
                [g_x])
-        print 'BEFORE'
+        #print 'BEFORE'
-        for node in env.toposort():
+        #for node in env.toposort():
-            print node.op, node.inputs
+        #    print node.op, node.inputs
-        print '----'
+        #print '----'
        theano.compile.mode.optdb.query(
                theano.compile.mode.OPT_FAST_RUN).optimize(env)
-        print 'AFTER'
+        #print 'AFTER'
-        for node in env.toposort():
+        #for node in env.toposort():
-            print node.op, node.inputs
+        #    print node.op, node.inputs
        # the function has 9 ops because the dimshuffle and elemwise{second} aren't getting
        # cleaned up as well as we'd like.
@@ -1021,9 +1021,9 @@ class Test_softmax_opt:
        # test that function contains softmax and no div.
        f = theano.function([c],p_y, mode=self.mode)
        f_ops = [n.op for n in f.maker.env.toposort()]
-        print '--- f ='
+        #print '--- f ='
-        printing.debugprint(f)
+        #printing.debugprint(f)
-        print '==='
+        #print '==='
        assert len(f_ops) == 1
        assert softmax in f_ops
        f(self.rng.rand(3,4).astype(config.floatX))
@@ -1041,9 +1041,9 @@ class Test_softmax_opt:
        finally:
            config.warn.sum_div_dimshuffle_bug = backup
        g_ops = [n.op for n in g.maker.env.toposort()]
-        print '--- g ='
+        #print '--- g ='
-        printing.debugprint(g)
+        #printing.debugprint(g)
-        print '==='
+        #print '==='
        raise SkipTest('Optimization not enabled for the moment')
        assert len(g_ops) == 2
@@ -1058,7 +1058,7 @@ class Test_softmax_opt:
        # test that function contains softmax and no div.
        f = theano.function([c],p_y)
-        printing.debugprint(f)
+        #printing.debugprint(f)
        # test that function contains softmax and no div.
        backup = config.warn.sum_div_dimshuffle_bug
@@ -1067,7 +1067,7 @@ class Test_softmax_opt:
            g = theano.function([c],T.grad(p_y.sum(), c))
        finally:
            config.warn.sum_div_dimshuffle_bug = backup
-        printing.debugprint(g)
+        #printing.debugprint(g)
        raise SkipTest('Optimization not enabled for the moment')
    def test_1d_basic(self):
@@ -1077,7 +1077,7 @@ class Test_softmax_opt:
        # test that function contains softmax and no div.
        f = theano.function([c], p_y)
-        printing.debugprint(f)
+        #printing.debugprint(f)
        # test that function contains softmax and no div.
        backup = config.warn.sum_div_dimshuffle_bug
@@ -1086,7 +1086,7 @@ class Test_softmax_opt:
            g = theano.function([c], T.grad(p_y.sum(), c))
        finally:
            config.warn.sum_div_dimshuffle_bug = backup
-        printing.debugprint(g)
+        #printing.debugprint(g)
        raise SkipTest('Optimization not enabled for the moment')
    # REPEAT 3 CASES in presence of log(softmax) with the advanced indexing etc.

--- a/theano/tensor/randomstreams.py
+++ b/theano/tensor/randomstreams.py
-"""Define RandomStreams, providing random number variables for Theano graphs."""
+"""Define RandomStreams, providing random number variables for Theano
+graphs.
+"""
 __docformat__ = "restructuredtext en"
 import sys
@@ -8,6 +11,7 @@ from theano.compile import module, In, Component
 from theano.gof import Container
 from theano.tensor import raw_random
 class RandomStreamsInstance(object):
    """RandomStreamsInstance"""
    def __init__(self, random_streams, memo, default_seed):
@@ -18,24 +22,26 @@ class RandomStreamsInstance(object):
    def initialize(self, seed=None):
        """Initialize each random stream
-        :param seed: each random stream will be assigned a unique state that depends
+        :param seed: each random stream will be assigned a unique
-        deterministically on this value.
+        state that depends deterministically on this value.
        :type seed: None or integer in range 0 to 2**30
        :rtype: None
        """
        self.seed(seed)
    def seed(self, seed=None):
        """Re-initialize each random stream
-        :param seed: each random stream will be assigned a unique state that depends
+        :param seed: each random stream will be assigned a unique
-        deterministically on this value.
+        state that depends deterministically on this value.
        :type seed: None or integer in range 0 to 2**30
        :rtype: None
        """
        if seed is None:
            seed = self.default_seed
@@ -43,19 +49,24 @@ class RandomStreamsInstance(object):
        #seed = self.default_seed if seed is None else seed
        seedgen = numpy.random.RandomState(seed)
        for old_r, new_r in self.random_streams.random_state_variables:
-            old_r_seed = seedgen.randint(2**30)
+            old_r_seed = seedgen.randint(2 ** 30)
            old_r_container = self.memo[old_r].value
            if old_r_container.value is None:
-                #the cast to int here makes it work on 32bit machines, not sure why
+                #the cast to int here makes it work on 32bit machines,
-                old_r_container.value = numpy.random.RandomState(int(old_r_seed))
+                #not sure why
+                old_r_container.value = numpy.random.RandomState(
+                    int(old_r_seed))
            else:
-                #the cast to int here makes it work on 32bit machines, not sure why
+                #the cast to int here makes it work on 32bit machines,
+                #not sure why
                old_r_container.value.seed(int(old_r_seed))
    def __getitem__(self, item):
-        """Retrieve the numpy RandomState instance associated with a particular stream
+        """Retrieve the numpy RandomState instance associated with a
+        particular stream
-        :param item: a variable of type RandomStateType, associated with this RandomStream
+        :param item: a variable of type RandomStateType, associated
+        with this RandomStream
        :rtype: numpy RandomState (or None, before initialize)
@@ -67,9 +78,11 @@ class RandomStreamsInstance(object):
        raise KeyError(item)
    def __setitem__(self, item, val):
-        """Set the numpy RandomState instance associated with a particular stream
+        """Set the numpy RandomState instance associated with a
+        particular stream
-        :param item: a variable of type RandomStateType, associated with this RandomStream
+        :param item: a variable of type RandomStateType, associated
+        with this RandomStream
        :param val: the new value
        :type val: numpy RandomState
@@ -78,7 +91,8 @@ class RandomStreamsInstance(object):
        """
        if type(val) is not numpy.random.RandomState:
-            raise TypeError('only values of type RandomState are permitted', val)
+            raise TypeError('only values of type RandomState are permitted',
+                            val)
        for old_r, new_r in self.random_streams.random_state_variables:
            if item is old_r:
                container = self.memo[item].value
@@ -86,24 +100,34 @@ class RandomStreamsInstance(object):
                return
        raise KeyError(item)
 class RandomStreams(Component, raw_random.RandomStreamsBase):
-    """Module component with similar interface to numpy.random (numpy.random.RandomState)"""
+    """Module component with similar interface to numpy.random
+    (numpy.random.RandomState)
+    """
    random_state_variables = []
-    """A list of pairs of the form (input_r, output_r).  This will be over-ridden by the module
+    """A list of pairs of the form (input_r, output_r).  This will be
-    instance to contain stream generators.
+    over-ridden by the module instance to contain stream
+    generators.
    """
    default_instance_seed = None
-    """Instance variable should take None or integer value.  Used to seed the random number
+    """Instance variable should take None or integer value.  Used to
-    generator that provides seeds for member streams"""
+    seed the random number generator that provides seeds for member
+    streams
-    def __init__(self, seed=None):
    """
-        :type seed: None or int
-        :param seed: a default seed to initialize the RandomState instances after build.  See
+    def __init__(self, seed=None):
-        `RandomStreamsInstance.__init__` for more details.
+        """:type seed: None or int
+        :param seed: a default seed to initialize the RandomState
+        instances after build.  See `RandomStreamsInstance.__init__`
+        for more details.
        """
        super(RandomStreams, self).__init__()
        self.random_state_variables = []
@@ -124,7 +148,8 @@ class RandomStreams(Component, raw_random.RandomStreamsBase):
        """override `Component.build` """
        if self not in memo:
            print 'creating RandomStreamsInstance'
-            memo[self] = RandomStreamsInstance(self, memo, self.default_instance_seed)
+            memo[self] = RandomStreamsInstance(self, memo,
+                                               self.default_instance_seed)
        return memo[self]
    def gen(self, op, *args, **kwargs):
@@ -136,14 +161,15 @@ class RandomStreams(Component, raw_random.RandomStreamsBase):
        :param kwargs: interpreted by `op`
-        :returns: The symbolic random draw part of op()'s return value.  This function stores
+        :returns: The symbolic random draw part of op()'s return
-        the updated RandomStateType Variable for use at `build` time.
+        value.  This function stores the updated RandomStateType
+        Variable for use at `build` time.
        :rtype: TensorVariable
        """
        random_state_variable = raw_random.random_state_type()
        new_r, out = op(random_state_variable, *args, **kwargs)
        out.rng = random_state_variable
        self.random_state_variables.append((random_state_variable, new_r))
        return out
--- a/theano/tensor/raw_random.py
+++ b/theano/tensor/raw_random.py
--- a/theano/tensor/shared_randomstreams.py
+++ b/theano/tensor/shared_randomstreams.py
-"""Define RandomStreams, providing random number variables for Theano graphs."""
+"""Define RandomStreams, providing random number variables for Theano
+graphs.
+"""
 __docformat__ = "restructuredtext en"
-import copy, sys
+import copy
+import sys
 import numpy
 from theano.gof import Container
-from theano.compile.sharedvalue import SharedVariable, shared_constructor, shared
+from theano.compile.sharedvalue import (SharedVariable, shared_constructor,
+                                        shared)
 import raw_random
 class RandomStateSharedVariable(SharedVariable):
    pass
 @shared_constructor
-def randomstate_constructor(value, name=None, strict=False, allow_downcast=None, borrow=False):
+def randomstate_constructor(value, name=None, strict=False,
+                            allow_downcast=None, borrow=False):
    """SharedVariable Constructor for RandomState"""
    if not isinstance(value, numpy.random.RandomState):
        raise TypeError
@@ -25,17 +33,26 @@ def randomstate_constructor(value, name=None, strict=False, allow_downcast=None,
            strict=strict,
            allow_downcast=allow_downcast)
 class RandomStreams(raw_random.RandomStreamsBase):
-    """Module component with similar interface to numpy.random (numpy.random.RandomState)"""
+    """Module component with similar interface to numpy.random
+    (numpy.random.RandomState)
+    """
    state_updates = []
-    """A list of pairs of the form (input_r, output_r).  This will be over-ridden by the module
+    """A list of pairs of the form (input_r, output_r).  This will be
-    instance to contain stream generators.
+    over-ridden by the module instance to contain stream
+    generators.
    """
    default_instance_seed = None
-    """Instance variable should take None or integer value.  Used to seed the random number
+    """Instance variable should take None or integer value.  Used to
-    generator that provides seeds for member streams"""
+    seed the random number generator that provides seeds for member
+    streams
+    """
    gen_seedgen = None
    """numpy.RandomState instance that gen() uses to seed new streams.
@@ -48,8 +65,10 @@ class RandomStreams(raw_random.RandomStreamsBase):
        """
        :type seed: None or int
-        :param seed: a default seed to initialize the RandomState instances after build.  See
+        :param seed: a default seed to initialize the RandomState
-        `RandomStreamsInstance.__init__` for more details.
+        instances after build.  See `RandomStreamsInstance.__init__`
+        for more details.
        """
        super(RandomStreams, self).__init__()
        self.state_updates = []
@@ -59,47 +78,54 @@ class RandomStreams(raw_random.RandomStreamsBase):
    def seed(self, seed=None):
        """Re-initialize each random stream
-        :param seed: each random stream will be assigned a unique state that depends
+        :param seed: each random stream will be assigned a unique
-        deterministically on this value.
+        state that depends deterministically on this value.
        :type seed: None or integer in range 0 to 2**30
        :rtype: None
        """
        if seed is None:
            seed = self.default_instance_seed
        seedgen = numpy.random.RandomState(seed)
        for old_r, new_r in self.state_updates:
-            old_r_seed = seedgen.randint(2**30)
+            old_r_seed = seedgen.randint(2 ** 30)
            old_r.set_value(numpy.random.RandomState(int(old_r_seed)),
                    borrow=True)
    def __getitem__(self, item):
-        """Retrieve the numpy RandomState instance associated with a particular stream
+        """Retrieve the numpy RandomState instance associated with a
+        particular stream
-        :param item: a variable of type RandomStateType, associated with this RandomStream
+        :param item: a variable of type RandomStateType, associated
+        with this RandomStream
        :rtype: numpy RandomState (or None, before initialize)
-        :note: This is kept for compatibility with `tensor.randomstreams.RandomStreams`.  The
+        :note: This is kept for compatibility with
-        simpler syntax ``item.rng.get_value()`` is also valid.
+        `tensor.randomstreams.RandomStreams`.  The simpler syntax
+        ``item.rng.get_value()`` is also valid.
        """
        return item.get_value(borrow=True)
    def __setitem__(self, item, val):
-        """Set the numpy RandomState instance associated with a particular stream
+        """Set the numpy RandomState instance associated with a
+        particular stream
-        :param item: a variable of type RandomStateType, associated with this RandomStream
+        :param item: a variable of type RandomStateType, associated
+        with this RandomStream
        :param val: the new value
        :type val: numpy RandomState
        :rtype:  None
-        :note: This is kept for compatibility with `tensor.randomstreams.RandomStreams`.  The
+        :note: This is kept for compatibility with
-        simpler syntax ``item.rng.set_value(val)`` is also valid.
+        `tensor.randomstreams.RandomStreams`.  The simpler syntax
+        ``item.rng.set_value(val)`` is also valid.
        """
        item.set_value(val, borrow=True)
@@ -113,12 +139,14 @@ class RandomStreams(raw_random.RandomStreamsBase):
        :param kwargs: interpreted by `op`
-        :returns: The symbolic random draw part of op()'s return value.  This function stores
+        :returns: The symbolic random draw part of op()'s return
-        the updated RandomStateType Variable for use at `build` time.
+        value.  This function stores the updated RandomStateType
+        Variable for use at `build` time.
        :rtype: TensorVariable
        """
-        seed = int(self.gen_seedgen.randint(2**30))
+        seed = int(self.gen_seedgen.randint(2 ** 30))
        random_state_variable = shared(numpy.random.RandomState(seed))
        new_r, out = op(random_state_variable, *args, **kwargs)
        out.rng = random_state_variable

--- a/theano/tensor/signal/tests/test_downsample.py
+++ b/theano/tensor/signal/tests/test_downsample.py
@@ -50,8 +50,8 @@ class TestDownsampleFactorMax(unittest.TestCase):
        for maxpoolshp in maxpoolshps:
            for ignore_border in [True,False]:
-                print 'maxpoolshp =', maxpoolshp
+                #print 'maxpoolshp =', maxpoolshp
-                print 'ignore_border =', ignore_border
+                #print 'ignore_border =', ignore_border
                ## Pure Numpy computation
                numpy_output_val = self.numpy_max_pool_2d(imval, maxpoolshp, ignore_border)
@@ -74,8 +74,8 @@ class TestDownsampleFactorMax(unittest.TestCase):
        for maxpoolshp in maxpoolshps:
            for ignore_border in [True,False]:
-                print 'maxpoolshp =', maxpoolshp
+                #print 'maxpoolshp =', maxpoolshp
-                print 'ignore_border =', ignore_border
+                #print 'ignore_border =', ignore_border
                def mp(input):
                    return DownsampleFactorMax(maxpoolshp, ignore_border=ignore_border)(input)
                utt.verify_grad(mp, [imval], rng=rng)
@@ -89,8 +89,8 @@ class TestDownsampleFactorMax(unittest.TestCase):
        for maxpoolshp in maxpoolshps:
            for ignore_border in [True,False]:
-                print 'maxpoolshp =', maxpoolshp
+                #print 'maxpoolshp =', maxpoolshp
-                print 'ignore_border =', ignore_border
+                #print 'ignore_border =', ignore_border
                numpy_output_val = self.numpy_max_pool_2d(imval, maxpoolshp, ignore_border)
                output = max_pool_2d(images, maxpoolshp, ignore_border)
@@ -110,8 +110,8 @@ class TestDownsampleFactorMax(unittest.TestCase):
        for maxpoolshp in maxpoolshps:
            for ignore_border in [True,False]:
-                print 'maxpoolshp =', maxpoolshp
+                #print 'maxpoolshp =', maxpoolshp
-                print 'ignore_border =', ignore_border
+                #print 'ignore_border =', ignore_border
                numpy_output_val = self.numpy_max_pool_2d(imval, maxpoolshp, ignore_border)
                output = max_pool_2d(images, maxpoolshp, ignore_border)
@@ -144,8 +144,8 @@ class TestDownsampleFactorMax(unittest.TestCase):
        for maxpoolshp in maxpoolshps:
            for ignore_border in [True,False]:
-                print 'maxpoolshp =', maxpoolshp
+                #print 'maxpoolshp =', maxpoolshp
-                print 'ignore_border =', ignore_border
+                #print 'ignore_border =', ignore_border
                numpy_output_val = self.numpy_max_pool_2d(imval, maxpoolshp, ignore_border)
                output = max_pool_2d(images, maxpoolshp, ignore_border)

--- a/theano/tensor/tests/mlp_test.py
+++ b/theano/tensor/tests/mlp_test.py
@@ -264,7 +264,7 @@ def test_mlp():
    ######################
    # BUILD ACTUAL MODEL #
    ######################
-    print '... building the model'
+    #print '... building the model'
    # allocate symbolic variables for the data
    index = T.lscalar()    # index to a [mini]batch
@@ -302,8 +302,8 @@ def test_mlp():
                x:train_set_x[index*batch_size:(index+1)*batch_size],
                y:train_set_y[index*batch_size:(index+1)*batch_size]},
            mode=mode)
-    print 'MODEL 1'
+    #print 'MODEL 1'
-    theano.printing.debugprint(train_model, print_type=True)
+    #theano.printing.debugprint(train_model, print_type=True)
    assert any([isinstance(i.op,T.nnet.CrossentropySoftmax1HotWithBiasDx) for i in train_model.maker.env.toposort()])
    # Even without FeatureShape
@@ -313,9 +313,9 @@ def test_mlp():
            givens={
                x:train_set_x[index*batch_size:(index+1)*batch_size],
                y:train_set_y[index*batch_size:(index+1)*batch_size]})
-    print
+    #print
-    print 'MODEL 2'
+    #print 'MODEL 2'
-    theano.printing.debugprint(train_model, print_type=True)
+    #theano.printing.debugprint(train_model, print_type=True)
    assert any([isinstance(i.op,T.nnet.CrossentropySoftmax1HotWithBiasDx) for i in train_model.maker.env.toposort()])
 if __name__ == '__main__':

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -3049,7 +3049,7 @@ class T_Join_and_Split(unittest.TestCase):
        s = stack(a, b, a, b)
        f = function([a, b], s, mode=self.mode)
        val = f(1, 2)
-        print val
+        #print val
        self.assertTrue(numpy.all(val == [1, 2, 1, 2]))
        topo = f.maker.env.toposort()
        assert len([n for n in topo if isinstance(n.op, opt.MakeVector)]) > 0
@@ -3588,8 +3588,8 @@ class T_add(unittest.TestCase):
                     ("/", lambda x,y: x/y))
            for s, fn in tests:
                f = inplace_func([a,b], fn(a, b))
-                print 'valid output:', fn(a.data, b.data)
+                #print 'valid output:', fn(a.data, b.data)
-                print 'theano output:', f(a.data, b.data)
+                #print 'theano output:', f(a.data, b.data)
                self.assertTrue(a.type.values_eq_approx(fn(a.data, b.data), f(a.data, b.data)))
    def test_grad_scalar_l(self):
@@ -4385,8 +4385,8 @@ class TestARange(unittest.TestCase):
        df = function([dstart, dstop], dout)
        assert dout.dtype == dstart.type.dtype
-        print df(0.2, 5.3)
+        #print df(0.2, 5.3)
-        print numpy.arange(0.2, 5.3)
+        #print numpy.arange(0.2, 5.3)
        assert numpy.all(df(0.2, 5.3) == numpy.arange(0.2, 5.3))
        assert numpy.all(df(0.8, 5.3) == numpy.arange(0.8, 5.3))
        assert numpy.all(df(-0.7, 5.3) == numpy.arange(-0.7, 5.3))
@@ -4957,8 +4957,8 @@ def test_var():
    f = function([a], var(a))
    a_val = numpy.arange(60).reshape(3,4,5)
-    print numpy.var(a_val)
+    #print numpy.var(a_val)
-    print f(a_val)
+    #print f(a_val)
    assert numpy.allclose(numpy.var(a_val), f(a_val))
    f = function([a], var(a, axis=0))
@@ -4994,9 +4994,9 @@ def test_default():
         "It is actually a problem of DEBUG_MODE, see #626."))
 def test_default_state():
    x, y = scalars('xy')
-    print config.floatX
+    #print config.floatX
-    print x.type
+    #print x.type
-    print y.type
+    #print y.type
    z = default(x, 3.8)
    new_x = y + z
    f = function([y, compile.In(x, update = new_x, value = 12.0)], new_x)

--- a/theano/tensor/tests/test_naacl09.py
+++ b/theano/tensor/tests/test_naacl09.py
@@ -185,7 +185,7 @@ class QuadraticDenoisingAA(module.Module):
        #self.validate = theano.Method(self.input, [self.cost, self.output])
    def _instance_initialize(self, obj, input_size, hidden_size, seed, lr, qfilter_relscale):
-        print 'QDAA init'
+        #print 'QDAA init'
        """
        qfilter_relscale is the initial range for any quadratic filters (relative to the linear
        filter's initial range)
@@ -454,11 +454,11 @@ class ConvolutionalMLP(module.FancyModule):
            i.initialize(input_size=self.input_size,
                    hidden_size=self.input_representation_size, noise_level=noise_level,
                    seed=int(R.random_integers(2**30)), lr=lr, qfilter_relscale=qfilter_relscale)
-            print type(i.w1)
+            #print type(i.w1)
            assert isinstance(i.w1, N.ndarray)
        for i in self.input_representations[1:]:
-            print type(i.w1)
+            #print type(i.w1)
            assert isinstance(i.w1, N.ndarray)
            assert (i.w1 == self.input_representations[0].w1).all()
            assert (i.w2 == self.input_representations[0].w2).all()
@@ -528,7 +528,7 @@ def create_realistic(window_size=3,#7,
 def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
        optimizer=None, realistic=False):
-    print "BUILDING MODEL"
+    #print "BUILDING MODEL"
    import time
    t = time.time()
@@ -545,7 +545,7 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
    else:
        m = create(compile_mode=mode)
-    print 'BUILD took %.3fs'%(time.time() - t)
+    #print 'BUILD took %.3fs'%(time.time() - t)
    prog_str = []
    idx_of_node = {}
    for i, node in enumerate(m.pretraining_update.maker.env.toposort()):
@@ -557,7 +557,7 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
    #print input_pretraining_gradients[4].owner.inputs[1].owner.inputs
    #sys.exit()
-    print "PROGRAM LEN %i HASH %i"% (len(m.pretraining_update.maker.env.nodes), reduce(lambda a, b: hash(a) ^ hash(b),prog_str))
+    #print "PROGRAM LEN %i HASH %i"% (len(m.pretraining_update.maker.env.nodes), reduce(lambda a, b: hash(a) ^ hash(b),prog_str))
    rng = N.random.RandomState(unittest_tools.fetch_seed(23904))
@@ -565,35 +565,35 @@ def test_naacl_model(iters_per_unsup=3, iters_per_sup=3,
    targets = N.asarray([0,3,4,2,3,4,4,2,1,0])
    #print inputs
-    print 'UNSUPERVISED PHASE'
+    #print 'UNSUPERVISED PHASE'
    t = time.time()
    for i in xrange(3):
        for j in xrange(iters_per_unsup):
            m.pretraining_update(*inputs)
        s0, s1 = [str(j) for j in m.pretraining_update(*inputs)]
-        print 'huh?', i, iters_per_unsup, iters_per_unsup * (i+1), s0, s1
+        #print 'huh?', i, iters_per_unsup, iters_per_unsup * (i+1), s0, s1
    if iters_per_unsup == 3:
        assert s0.startswith('0.927793')#'0.403044')
        assert s1.startswith('0.068035')#'0.074898')
-    print 'UNSUPERVISED took %.3fs'%(time.time() - t)
+    #print 'UNSUPERVISED took %.3fs'%(time.time() - t)
-    print 'FINETUNING GRAPH'
+    #print 'FINETUNING GRAPH'
-    print 'SUPERVISED PHASE COSTS (%s)'%optimizer
+    #print 'SUPERVISED PHASE COSTS (%s)'%optimizer
    t = time.time()
    for i in xrange(3):
        for j in xrange(iters_per_unsup):
            m.finetuning_update(*(inputs + [targets]))
        s0 = str(m.finetuning_update(*(inputs + [targets])))
-        print iters_per_sup * (i+1), s0
+        #print iters_per_sup * (i+1), s0
    if iters_per_sup == 10:
        s0f = float(s0)
        assert 19.7042 < s0f and s0f < 19.7043
-    print 'SUPERVISED took %.3fs'%( time.time() - t)
+    #print 'SUPERVISED took %.3fs'%( time.time() - t)
 def jtest_main():
    from theano import gof
    JTEST = theano.compile.mode.optdb.query(*sys.argv[2:])
-    print 'JTEST', JTEST
+    #print 'JTEST', JTEST
    theano.compile.register_optimizer('JTEST', JTEST)
    optimizer = eval(sys.argv[1])
    test_naacl_model(optimizer, 10, 10, realistic=False)