Optimization + test for specify_shape on GPU

Removes host_from_gpu and gpu_from_host around specify_shapes node, making it run transperently on the GPU

Optimization + test for specify_shape on GPU
df11db9e · Razvan Pascanu · 927ac9a4 · df11db9e · df11db9e
--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -236,6 +236,28 @@ def local_gpu_dimshuffle_0(node):
    return False


+@register_opt()
+@local_optimizer([])
+def local_gpu_specifyShape_0(node):
+    """
+    specify_shape(host_from_gpu()) -> host_from_gpu(specify_shape)
+    gpu_from_host(specify_shape) -> specifyshape(gpu_from_host)
+    """
+    if isinstance(node.op, tensor.SpecifyShape):
+        input = node.inputs[0]
+        if input.owner and isinstance(input.owner.op, HostFromGpu):
+            return [host_from_gpu(tensor.specify_shape(gpu_from_host(input),
+                                                      *node.inputs[1:]))]
+    if node.op == gpu_from_host:
+        host_input = node.inputs[0]
+        if host_input.owner and isinstance(host_input.owner.op,
+                                           tensor.SpecifyShape):
+            specifyshape_node = host_input.owner
+            return [tensor.specify_shape(
+                gpu_from_host(specifyshape_node.inputs[0]),
+                *specifyshape_node.inputs[1:])]
+    return False
+
 @register_opt()
 @local_optimizer([])
 def local_gpu_dot_to_dot22(node):

--- a/theano/sandbox/cuda/tests/test_opt.py
+++ b/theano/sandbox/cuda/tests/test_opt.py
@@ -68,6 +68,14 @@ def test_gpualloc():
    assert numpy.any(ininstance(x.op, cuda.GpuAlloc) for x in l )


+def test_gpuspecifyshape():
+    x = theano.shared(numpy.ones(3,dtype='float32'), 'x')
+    m = theano.tensor.specify_shape(x + numpy.float32(1), (3,))
+    f = theano.function([], updates={x:m * numpy.float32(2)})
+    l = f.maker.env.toposort()
+    assert not numpy.any([isinstance(x.op, cuda.HostFromGpu) for x in l])
+
+

 def test_softmax():
    x = tensor.fmatrix()