modif Shape_i to work with CudaNdarray and Tensor Variable

This make the gpu version a little faster. More stuff to fix left.

modif Shape_i to work with CudaNdarray and Tensor Variable
b8b1dcf0 · Frederic Bastien · b096f1ff · b8b1dcf0 · b8b1dcf0 · b8b1dcf0
--- a/theano/sandbox/cuda/tests/test_basic_ops.py
+++ b/theano/sandbox/cuda/tests/test_basic_ops.py
@@ -431,3 +431,35 @@ def test_elemwise_collapse7(atol=1e-6):
    ans=(a+2).reshape(shape[0],1,shape[1],shape[2])
    assert numpy.allclose(out,ans, atol=atol)
    print "Expected collapse to c contiguous"
+def test_hostfromgpu_shape_i():
+    """
+    Test that the shape is lifted over hostfromgpu
+    """
+    pass
+    m = mode_with_gpu.including('local_dot_to_dot22','local_dot22_to_dot22scalar','specialize')
+    a=T.fmatrix('a')
+    b=T.fmatrix('b')
+    c=T.fmatrix('c')
+    ca=theano.sandbox.cuda.var.CudaNdarrayVariable((False,False))
+    cb=theano.sandbox.cuda.var.CudaNdarrayVariable((False,False))
+    cc=theano.sandbox.cuda.var.CudaNdarrayVariable((False,False))
+    av=numpy.asarray(numpy.random.rand(5,5),dtype='float32')
+    bv=numpy.asarray(numpy.random.rand(5,5),dtype='float32')
+    cv=numpy.asarray(numpy.random.rand(5,5),dtype='float32')
+    sa = theano.shared(av, name = 'sa')
+    sb = theano.shared(bv, name = 'sb')
+    f = theano.function([a,b],[(a+b).shape],mode=m)
+    topo = f.maker.env.toposort()
+    assert isinstance(topo[0].op,T.opt.Shape_i)
+    assert isinstance(topo[1].op,T.opt.Shape_i)
+    assert isinstance(topo[2].op,T.opt.MakeVector)
+    f2 = theano.function([],[(sa+sb).shape],mode=m)
+    topo = f2.maker.env.toposort()
+    assert isinstance(topo[0].op,T.opt.Shape_i)
+    assert isinstance(topo[1].op,T.opt.Shape_i)
+    assert isinstance(topo[2].op,T.opt.MakeVector)
--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -69,6 +69,14 @@ def __oplist_tag(thing, tag):
    thing.__oplist_tags = tags
+def as_cuda_or_tensor_variable(x, name = None, ndim=None):
+    """
+    This function do the same as_tensor_variable, but don't transfert the value on the gpu
+    """
+    if hasattr(x, '_as_CudaNdarrayVariable'):
+        return x._as_CudaNdarrayVariable() #TODO: pass name and ndim arguments
+    return as_tensor_variable(x, name, ndim)
 def as_tensor_variable(x, name = None, ndim=None):
    """Return `x`, transformed into a `TensorType`

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -279,7 +279,10 @@ class Shape_i(T.Op):
    def __str__(self):
        return '%s{%i}'%(self.__class__.__name__, self.i)
    def make_node(self, x):
-        x = T.as_tensor_variable(x)
+        #we use as_cuda_or_tensor_variable as we want this op to work for
+        # TensorVariable AND CudaNdarrayVariable. Otherwise, we force the transfert
+        # of the variable to the cpu.
+        x = T.as_cuda_or_tensor_variable(x)
        if x.ndim <= self.i:
            raise TypeError('x has too few dimensions for Shape_i', (x, self.i))
        return T.Apply(self, [x], [T.lscalar()])