small fix and test the opt that move scan to the gpu.

21f930ce · Frederic Bastien · f7e91849 · 21f930ce · 21f930ce
--- a/theano/scan_module/__init__.py
+++ b/theano/scan_module/__init__.py
@@ -598,7 +598,7 @@ if cuda.cuda_available:
    def safe_to_gpu(x):
        if (isinstance(x.type, TensorType) and
-            x.type.dtype == config.floatX):
+            x.type.dtype == 'float32'):
            return gpu_from_host(x)
        else:
            return x
@@ -611,7 +611,7 @@ if cuda.cuda_available:
    def tensor_to_cuda(x):
        if (isinstance(x.type, TensorType) and
-            x.type.dtype == config.floatX):
+            x.type.dtype == 'float32'):
            y = CudaNdarrayType( broadcastable = x.type.broadcastable)()
            if x.name :
                y.name = x.name +'[cuda]'
@@ -629,11 +629,11 @@ if cuda.cuda_available:
        """
        if node.op == gpu_from_host:
-            # NOT TESTED!!!!
            host_input = node.inputs[0]
            if ( host_input.owner
                and host_input.owner.op == scan_op.Scan
                and not host_input.owner.op.info['gpu']):
+                # NOT TESTED!!!!
                thescan = host_input.owner.op
                inputs = host_input.owner.inputs
                # I need to cast thescan.inputs to gpuhost stuff
@@ -697,10 +697,3 @@ if cuda.cuda_available:
                outputs = [safe_to_cpu(x) for x in _outputs]
                return outputs
        return False
--- a/theano/scan_module/tests/test_scan.py
+++ b/theano/scan_module/tests/test_scan.py
@@ -226,6 +226,58 @@ class T_Scan(unittest.TestCase):
        theano_values = f2(v_u,v_x0, W_in, W)
        assert numpy.allclose(theano_values, v_out)
+    # as test_one_sequence_one_output_weights, but on the gpu
+    def test_one_sequence_one_output_weights_gpu(self):
+        def f_rnn(u_t,x_tm1,W_in, W):
+            return u_t*W_in+x_tm1*W
+        u    = theano.tensor.fvector('u')
+        x0   = theano.tensor.fscalar('x0')
+        W_in = theano.tensor.fscalar('win')
+        W    = theano.tensor.fscalar('w')
+        mode = theano.compile.mode.get_default_mode().including('gpu')
+        output, updates = theano.scan(f_rnn, u,x0,[W_in,W]
+                                      , n_steps           = None
+                                      , truncate_gradient = -1
+                                      , go_backwards      = False
+                                      , mode = mode)
+        f2   = theano.function([u,x0,W_in,W], output, updates = updates,
+                               allow_input_downcast = True,
+                               mode = mode)
+        # get random initial values
+        rng  = numpy.random.RandomState(utt.fetch_seed())
+        v_u  = rng.uniform( size = (4,), low = -5., high = 5.)
+        v_x0 = rng.uniform()
+        W    = rng.uniform()
+        W_in = rng.uniform()
+        # compute the output in numpy
+        v_out = numpy.zeros((4,))
+        v_out[0] = v_u[0]*W_in + v_x0 * W
+        for step in xrange(1,4):
+            v_out[step] = v_u[step]*W_in + v_out[step-1] * W
+        theano_values = f2(v_u,v_x0, W_in, W)
+        assert numpy.allclose(theano_values, v_out)
+        topo = f2.maker.env.toposort()
+        assert sum([isinstance(node.op, theano.sandbox.cuda.HostFromGpu) for node in topo]) == 1
+        assert sum([isinstance(node.op, theano.sandbox.cuda.GpuFromHost) for node in topo]) == 4
+        scan_node = [node for node in topo if isinstance(node.op, theano.scan_module.scan_op.Scan)]
+        assert len(scan_node) == 1
+        scan_node = scan_node[0]
+        scan_node_topo = scan_node.op.fn.maker.env.toposort()
+        theano.printing.pydotprint(f2, outfile='out.png', high_contrast=True)
+        theano.printing.pydotprint(scan_node.op.fn,
+                                   outfile='inner.png', high_contrast=True)
+        #check that there is less gpu transfer
+        assert any([isinstance(node.op, theano.sandbox.cuda.GpuElemwise) for node in scan_node_topo])
+        assert not any([isinstance(node.op, theano.sandbox.cuda.HostFromGpu) for node in scan_node_topo])
+        assert not any([isinstance(node.op, theano.sandbox.cuda.GpuFromHost) for node in scan_node_topo])
    # simple rnn, one input, one state, weights for each; input/state
    # are vectors, weights are scalars; using shared variables