added new optimizer InputToGpuOptimizer that transfert the input of a graph on…

added new optimizer InputToGpuOptimizer that transfert the input of a graph on the gpu card when needed. This make the gpu code work when not using shared variables

added new optimizer InputToGpuOptimizer that transfert the input of a graph on…
a300e1e2 · Frederic Bastien · 000b5da8 · a300e1e2 · a300e1e2
--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -2,9 +2,10 @@ import sys
 import theano
 import numpy
 from theano import tensor, scalar, compile
-from theano.gof import local_optimizer, EquilibriumDB, SequenceDB
+from theano.gof import local_optimizer, EquilibriumDB, SequenceDB, Optimizer, toolbox, DestroyHandler

 from theano.sandbox.cuda.basic_ops import *
+from theano.sandbox.cuda.type import CudaNdarrayType
 from theano.sandbox.cuda.blas import gpu_dot22, gpu_gemm, GpuConv
 from theano.sandbox.cuda.blas import GpuDownsampleFactorMax, GpuDownsampleFactorMaxGrad
 from theano.sandbox.cuda.nnet import (
@@ -32,6 +33,33 @@ def register_opt(*tags, **kwargs):
        return local_opt
    return f

+class InputToGpuOptimizer(Optimizer):
+    """Transfert the input of a graph to the gpu if needed
+    It should make this part of the optimizer faster we will will need only 1 pass on the env.
+    """
+    def __init__(self):
+        Optimizer.__init__(self)
+
+    def add_requirements(self, env):
+        env.extend(toolbox.ReplaceValidate())
+        env.extend(DestroyHandler())
+
+    def apply(self, env):
+        for input in env.inputs:
+            if not isinstance(input.type, CudaNdarrayType):
+                try:
+                    new_input = host_from_gpu(gpu_from_host(input))
+
+                    env.replace_validate(input, new_input, "To allow further optimisation to move Ops to gpu")
+                except Exception, e:
+                    #as we currently only support float32, this can fail. 
+                    #Using try except make that we won't need 
+                    pass
+
+#we register it before all other gpu optimizer to be sure that the input are on the gpu.
+gpu_seqopt.register('InputToGpuOptimizer', InputToGpuOptimizer(), 
+                    0, 'fast_run', 'fast_compile', 'merge')#TODO: how to make it mandatory for gpu_seqopt?
+
 @local_optimizer([])
 def local_cut_gpu_host_gpu(node):
    if tensor.opt.opt.check_chain(node, gpu_from_host, host_from_gpu):

--- a/theano/sandbox/cuda/tests/test_opt.py
+++ b/theano/sandbox/cuda/tests/test_opt.py
+import sys, time
+from theano.compile.sandbox.sharedvalue import shared
+from theano.compile.sandbox.pfunc import pfunc
+from theano import tensor
+import theano
+import numpy
+
+# Skip test if cuda_ndarray is not available.
+from nose.plugins.skip import SkipTest
+try:
+    import cuda_ndarray
+except ImportError:
+    raise SkipTest('Optional package cuda_ndarray not available')
+
+import theano.sandbox.cuda as cuda
+
+
+def test_no_shared_var_graph():
+    """Test that the InputToGpuOptimizer optimizer make graph that don't have shared variable compiled too.
+    """
+    a=tensor.fmatrix()
+    b=tensor.fmatrix()
+    f = theano.function([a,b],[a+b])
+    l = f.maker.env.toposort()
+    assert len(l)==4
+    assert any(isinstance(x.op,cuda.GpuElemwise) for x in l)
+    assert any(isinstance(x.op,cuda.GpuFromHost) for x in l)
+    assert any(isinstance(x.op,cuda.HostFromGpu) for x in l)