now when we import theano.sandbox.cuda, we don't register gpu optimization in…

now when we import theano.sandbox.cuda, we don't register gpu optimization in fast_run and inplace. When we call theano.sandbox.cuda, we add gpu optimizer to fast_run and inplace. This way, they don't get applied when use() is not call.

now when we import theano.sandbox.cuda, we don't register gpu optimization in…
23a061f4 · Frederic Bastien · 5ad0642b · 23a061f4 · 23a061f4 · 23a061f4
--- a/theano/sandbox/cuda/__init__.py
+++ b/theano/sandbox/cuda/__init__.py
@@ -16,6 +16,7 @@ import theano.compile.sandbox
 import os
 import theano.config as config
+from theano.compile import optdb
 import logging, copy
 _logger_name = 'theano_cuda_ndarray'
@@ -44,6 +45,9 @@ def use(device=config.THEANO_GPU):
            logging.getLogger('theano_cuda_ndarray').warning("WARNING: Won't use the GPU as the initialisation of device %i failed. %s" %(device, e))
    elif use.device_number != device:
        logging.getLogger('theano_cuda_ndarray').warning("WARNING: ignoring call to use(%s), GPU number %i is already in use." %(str(device), use.device_number))
+    optdb.add_tags('gpu',
+                   'fast_run',
+                   'inplace')
 use.device_number = None

--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -20,11 +20,7 @@ gpu_cut_copies = EquilibriumDB()
 gpu_seqopt = SequenceDB()
 gpu_seqopt.register('gpu_local_optimizations', gpu_optimizer, 1, 'fast_run', 'inplace')
 gpu_seqopt.register('gpu_cut_transfers', gpu_cut_copies, 2, 'fast_run', 'inplace')
-optdb.register('gpu', 
+optdb.register('gpu', gpu_seqopt, optdb.__priority__.get('inplace_opt', 75) + 5)
-        gpu_seqopt, 
-        optdb.__priority__.get('inplace_opt', 75) + 5, 
-        'fast_run',
-        'inplace')
 def register_opt(*tags, **kwargs):
    def f(local_opt):

--- a/theano/sandbox/cuda/tests/test_basic_ops.py
+++ b/theano/sandbox/cuda/tests/test_basic_ops.py
@@ -13,6 +13,9 @@ except ImportError:
    raise SkipTest('Optional package cuda_ndarray not available')
 import theano.sandbox.cuda as tcn
+import theano.compile.mode
+mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
 def tes_use():
    tcn.use()
@@ -23,7 +26,7 @@ def test_elemwise0():
    b = tensor.fmatrix()
-    f = pfunc([b], [], updates=[(a, a+b)])
+    f = pfunc([b], [], updates=[(a, a+b)], mode=mode_with_gpu)
    a0 = a.value * 1.0
    print 'BEFORE ADD', a.value
@@ -43,21 +46,21 @@ def test_elemwise1():
    #let debugmode catch any mistakes
    print >> sys.stderr, "STARTING FUNCTION 1"
-    f = pfunc([b], [], updates=[(a, b**a)])
+    f = pfunc([b], [], updates=[(a, b**a)], mode=mode_with_gpu)
    for i, node in enumerate(f.maker.env.toposort()):
        print i, node
    f(numpy.random.rand(*shape)+0.3)
    print >> sys.stderr, "STARTING FUNCTION 2"
    #let debugmode catch any mistakes
-    f = pfunc([b], [], updates=[(a, tensor.exp(b**a))])
+    f = pfunc([b], [], updates=[(a, tensor.exp(b**a))], mode=mode_with_gpu)
    for i, node in enumerate(f.maker.env.toposort()):
        print i, node
    f(numpy.random.rand(*shape)+0.3)
    print >> sys.stderr, "STARTING FUNCTION 3"
    #let debugmode catch any mistakes
-    f = pfunc([b], [], updates=[(a, a+b * tensor.exp(b**a))])
+    f = pfunc([b], [], updates=[(a, a+b * tensor.exp(b**a))], mode=mode_with_gpu)
    f(numpy.random.rand(*shape)+0.3)
 def test_elemwise2():
@@ -68,7 +71,7 @@ def test_elemwise2():
    for pattern in [(0,1), (1,0)]:
        a = tcn.shared_constructor(rng.rand(*shape), name=None)
        b = tensor.Tensor(dtype='float32', broadcastable=[0]*len(shape))()
-        f = pfunc([b], [], updates=[(a, (a+b).dimshuffle(pattern))])
+        f = pfunc([b], [], updates=[(a, (a+b).dimshuffle(pattern))], mode=mode_with_gpu)
        has_elemwise = False
        for i, node in enumerate(f.maker.env.toposort()):
            print >> sys.stderr, i, node
@@ -82,7 +85,7 @@ def test_elemwise2():
    a = tcn.shared_constructor(rng.rand(*shape), 'a')
    b = tensor.Tensor(dtype='float32', broadcastable=[0]*len(shape))()
    f = pfunc([b], [], updates=[(a, (a+b).dimshuffle([2,0,3,1]) *
-        tensor.exp(b**a).dimshuffle([2,0,3,1]))])
+        tensor.exp(b**a).dimshuffle([2,0,3,1]))], mode=mode_with_gpu)
    has_elemwise = False
    for i, node in enumerate(f.maker.env.toposort()):
        print i, node
@@ -103,7 +106,7 @@ def test_elemwise3():
    print (1 + b**a).type
    print tensor.exp((1 + b**a)).type
    f = pfunc([b], [], updates=[(a, (a+b).dimshuffle([2,0,3,1]) * tensor.exp(1 +
-        b**a).dimshuffle([2,0,3,1]))])
+        b**a).dimshuffle([2,0,3,1]))], mode=mode_with_gpu)
    has_elemwise = False
    for i, node in enumerate(f.maker.env.toposort()):
        print >> sys.stderr, i, node
@@ -119,7 +122,7 @@ def test_elemwise4():
    a = tcn.shared_constructor(numpy.random.rand(*shape), 'a')
    b = tensor.fvector()
    c = tensor.fvector()
-    f = pfunc([b,c], [], updates=[(a, (a+b.dimshuffle('x', 0)*c.dimshuffle(0, 'x')))])
+    f = pfunc([b,c], [], updates=[(a, (a+b.dimshuffle('x', 0)*c.dimshuffle(0, 'x')))], mode=mode_with_gpu)
    has_elemwise = False
    for i, node in enumerate(f.maker.env.toposort()):
        print >> sys.stderr, i, node

--- a/theano/sandbox/cuda/tests/test_blas.py
+++ b/theano/sandbox/cuda/tests/test_blas.py
@@ -16,13 +16,17 @@ import theano.sandbox.cuda as tcn
 from theano.sandbox.downsample import DownsampleFactorMax
+import theano.compile.mode
+mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
 def test_dot():
    a = tcn.shared_constructor(numpy.random.rand(4,4), 'a')
    b = tensor.fmatrix()
-    f = pfunc([b], [], updates=[(a, tensor.dot(a,b))])
+    f = pfunc([b], [], updates=[(a, tensor.dot(a,b))], mode=mode_with_gpu)
    a0 = a.value * 1.0
    print a0
@@ -41,7 +45,7 @@ def test_gemm():
    b = tensor.fmatrix('b')
    c = tensor.fmatrix('c')
-    f = pfunc([b,c], [], updates=[(a, tensor.dot(a,b) + tensor.exp(c))])
+    f = pfunc([b,c], [], updates=[(a, tensor.dot(a,b) + tensor.exp(c))], mode=mode_with_gpu)
    a0 = a.value * 1.0
    print a0
@@ -69,7 +73,7 @@ if 0:
                a = tcn.blas.DownsampleFactorMax((2,2),border)
                dmatrix4 = tensor.TensorType("float32", (False, False, False, False))
                b = dmatrix4()
-                f = pfunc([b], [a(b)])
+                f = pfunc([b], [a(b)], mode=mode_with_gpu)
                bval = numpy.arange(0,d0*d1).reshape(1,1,d0,d1)
                r = f(bval)[0]
@@ -109,7 +113,7 @@ def test_downsample():
                ds_op = DownsampleFactorMax(ds, ignore_border=ignore_border)
                a = tcn.shared_constructor(numpy.random.rand(*shp), 'a')
-                f = pfunc([], ds_op(tensor.as_tensor_variable(a)))
+                f = pfunc([], ds_op(tensor.as_tensor_variable(a)), mode=mode_with_gpu)
                worked = False
                for i, node in enumerate(f.maker.env.toposort()):
                    print i, node

--- a/theano/sandbox/cuda/tests/test_opt.py
+++ b/theano/sandbox/cuda/tests/test_opt.py
@@ -12,6 +12,10 @@ try:
 except ImportError:
    raise SkipTest('Optional package cuda_ndarray not available')
+import theano.compile.mode
+mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
 import theano.sandbox.cuda as cuda
@@ -20,7 +24,7 @@ def test_no_shared_var_graph():
    """
    a=tensor.fmatrix()
    b=tensor.fmatrix()
-    f = theano.function([a,b],[a+b])
+    f = theano.function([a,b],[a+b], mode=mode_with_gpu)
    l = f.maker.env.toposort()
    assert len(l)==4
    assert any(isinstance(x.op,cuda.GpuElemwise) for x in l)