Add Shape_i c code for the new gpu back-end.

Refactor it to make a registry op c code.

Add Shape_i c code for the new gpu back-end.
c23e936e · Frederic · 724d0d32 · c23e936e · c23e936e · c23e936e
--- a/theano/compile/__init__.py
+++ b/theano/compile/__init__.py
 from theano.compile.ops import (
        DeepCopyOp, deep_copy_op, register_deep_copy_op_c_code,
+        Shape_i, register_shape_i_c_code,
        ViewOp, view_op, register_view_op_c_code)
 from theano.compile.function_module import *

--- a/theano/compile/ops.py
+++ b/theano/compile/ops.py
@@ -2,7 +2,7 @@
 import copy
 import warnings
-#import theano
+import theano
 from theano import gof
@@ -155,7 +155,7 @@ class DeepCopyOp(gof.Op):
        # Else, we will return a list of (type name, version) pairs.
        for t, (c, v) in sorted(self.c_code_and_version.items(), key=lambda pair: str(pair[0])):
            if not v:
-                warnings.warn("Type %s has C code for OutputGuard, but it has "
+                warnings.warn("Type %s has C code for DeepCopyOp, but it has "
                        "no version. You should add a 'version' keyword arg "
                        "when calling register_OutputGuard_c_code." % t,
                        stacklevel=2)
@@ -180,6 +180,99 @@ class DeepCopyOp(gof.Op):
 deep_copy_op = DeepCopyOp()
+class Shape_i(gof.Op):
+    """
+    L{Op} to return the shape of a matrix.
+    @note: Non-differentiable.
+    """
+    # Mapping from Type to C code (and version) to use.
+    # In the C code, the name of the input variable is %(iname)s,
+    # the output variable is %(oname)s.
+    c_code_and_version = {}
+    def __init__(self, i):
+        self.i = i
+    def __hash__(self):
+        return hash(type(self)) ^ self.i
+    def __eq__(self, other):
+        return type(self) == type(other) and self.i == other.i
+    def __str__(self):
+        return '%s{%i}' % (self.__class__.__name__, self.i)
+    def make_node(self, x):
+        # x could be one of a number of types
+        # the only thing we require is that the variable have a .ndim,
+        # and that the value have a .shape
+        if not isinstance(x, theano.Variable):
+            raise TypeError('x must be Variable with ndim attribute', x)
+        if x.ndim <= self.i:
+            raise TypeError('x has too few dimensions for Shape_i',
+                            (x, self.i))
+        return theano.Apply(self, [x], [theano.tensor.lscalar()])
+    def perform(self, node, inp, out_):
+        x, = inp
+        out, = out_
+        if out[0] is None:
+            out[0] = theano._asarray(x.shape[self.i], dtype='int64')
+        else:
+            out[0][...] = x.shape[self.i]
+    def c_code_cache_version(self):
+        version = []
+        # If any of the c code is unversionned, we have to return ()
+        # Else, we will return a list of (type name, version) pairs.
+        for t, (c, v) in sorted(self.c_code_and_version.items(),
+                                key=lambda pair: str(pair[0])):
+            if not v:
+                warnings.warn("Type %s has C code for Shape_i, but it has "
+                        "no version. You should add a 'version' keyword arg "
+                        "when calling register_OutputGuard_c_code." % t,
+                        stacklevel=2)
+                return ()
+            version.append((str(t), v))
+        return tuple(version)
+    def c_code(self, node, name, inames, onames, sub):
+        iname, = inames
+        oname, = onames
+        fail = sub['fail']
+        i = self.i
+        itype = node.inputs[0].type.__class__
+        if itype in self.c_code_and_version:
+            code, version = self.c_code_and_version[itype]
+            return code % locals()
+        # Else, no C code
+        return super(Shape_i, self).c_code(node, name, inames, onames, sub)
+    def infer_shape(self, node, input_shapes):
+        return [()]
+    def grad(self, inp, grads):
+        return [None]
+def register_shape_i_c_code(typ, code, version=()):
+    """ Tell DeepCopyOp how to generate C code for a Theano Type
+    :param typ: A Theano type. It must be the Theano class itself and not an
+                instance of the class.
+    :param code: C code that deep copies the Theano type 'typ'.
+                 Use %(iname)s and %(oname)s for the input and output C
+                 variable names respectively.
+    :param version: A number indicating the version of the code, for cache.
+    """
+    Shape_i.c_code_and_version[typ] = (code, version)
 # List of Theano Types that one can add an extra dimension and for which
 # Scan can deal with.
 expandable_types = ()
--- a/theano/sandbox/cuda/type.py
+++ b/theano/sandbox/cuda/type.py
@@ -438,6 +438,13 @@ theano.compile.register_view_op_c_code(
        """,
        version=1)
+theano.compile.register_shape_i_c_code(CudaNdarrayType, """
+    if(!%(oname)s)
+        %(oname)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, NPY_INT64, 0);
+    ((npy_int64*)PyArray_DATA(%(oname)s))[0] =
+                              CudaNdarray_HOST_DIMS(%(iname)s)[%(i)s];
+""", version=(0,))
 # Register CudaNdarrayType to the DeepCopyOp list of types with c code.
 theano.compile.register_deep_copy_op_c_code(
        CudaNdarrayType,

--- a/theano/sandbox/gpuarray/tests/test_basic_ops.py
+++ b/theano/sandbox/gpuarray/tests/test_basic_ops.py
@@ -336,3 +336,39 @@ def test_gpueye():
        # M != N, k = 0
        yield check, dtype, 3, 5
        yield check, dtype, 5, 3
+def test_hostfromgpu_shape_i():
+    """
+    Test that the shape is lifted over hostfromgpu
+    """
+    m = mode_with_gpu.including('local_dot_to_dot22',
+                                'local_dot22_to_dot22scalar','specialize')
+    a = T.fmatrix('a')
+    ca = theano.sandbox.gpuarray.type.GpuArrayType('float32', (False, False))()
+    av = numpy.asarray(numpy.random.rand(5, 4), dtype='float32')
+    cv = gpuarray.asarray(numpy.random.rand(5, 4),
+                          dtype='float32')
+    gpu_from_host = theano.sandbox.gpuarray.basic_ops.gpu_from_host
+    host_from_gpu = theano.sandbox.gpuarray.basic_ops.host_from_gpu
+    f = theano.function([a], gpu_from_host(a), mode=m)
+    assert gpu_from_host in [x.op
+                             for x in f.maker.fgraph.toposort()]
+    f = theano.function([a], gpu_from_host(a).shape, mode=m)
+    topo = f.maker.fgraph.toposort()
+    assert isinstance(topo[0].op, T.opt.Shape_i)
+    assert isinstance(topo[1].op, T.opt.Shape_i)
+    assert isinstance(topo[2].op, T.opt.MakeVector)
+    assert tuple(f(av)) == (5, 4)
+    f = theano.function([ca], host_from_gpu(ca), mode=m)
+    assert host_from_gpu in [x.op
+                             for x in f.maker.fgraph.toposort()]
+    f = theano.function([ca], host_from_gpu(ca).shape, mode=m)
+    topo = f.maker.fgraph.toposort()
+    assert isinstance(topo[0].op, theano.compile.Shape_i)
+    assert isinstance(topo[1].op, theano.compile.Shape_i)
+    assert isinstance(topo[2].op, theano.tensor.opt.MakeVector)
+    assert tuple(f(cv)) == (5, 4)
--- a/theano/sandbox/gpuarray/type.py
+++ b/theano/sandbox/gpuarray/type.py
@@ -278,6 +278,13 @@ theano.compile.register_view_op_c_code(GpuArrayType, """
    Py_XINCREF(%(oname)s);
 """, version=(0,))
+theano.compile.register_shape_i_c_code(GpuArrayType, """
+    if(!%(oname)s)
+        %(oname)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, NPY_INT64, 0);
+    ((npy_int64*)PyArray_DATA(%(oname)s))[0] =
+                              %(iname)s->ga.dimensions[%(i)s];
+""", version=(0,))
 theano.compile.register_deep_copy_op_c_code(GpuArrayType, """
    Py_XDECREF(%(oname)s);
    %(oname)s = pygpu_copy(%(iname)s, GA_ANY_ORDER);

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -29,6 +29,7 @@ from theano.tensor.subtensor import (get_idx_list, get_canonical_form_slice,
 from theano import scalar
 from theano.tensor import basic as T
 from theano import compile  # to register the optimizer built by this file
+from theano.compile.ops import Shape_i
 from theano.gof.python25 import any, all
 from theano.gof.opt import (Optimizer, pre_constant_merge,
@@ -637,78 +638,6 @@ T.pprint.assign(lambda pstate, r: r.owner and isinstance(
        r.owner.op, MakeVector), MakeVectorPrinter())
-class Shape_i(T.Op):
-    """
-    L{Op} to return the shape of a matrix.
-    @note: Non-differentiable.
-    """
-    def __init__(self, i):
-        self.i = i
-    def __hash__(self):
-        return hash(type(self)) ^ self.i
-    def __eq__(self, other):
-        return type(self) == type(other) and self.i == other.i
-    def __str__(self):
-        return '%s{%i}' % (self.__class__.__name__, self.i)
-    def make_node(self, x):
-        # x could be one of a number of types
-        # the only thing we require is that the variable have a .ndim,
-        # and that the value have a .shape
-        if not isinstance(x, T.Variable):
-            raise TypeError('x must be Variable with ndim attribute', x)
-        if x.ndim <= self.i:
-            raise TypeError('x has too few dimensions for Shape_i',
-                            (x, self.i))
-        return T.Apply(self, [x], [T.lscalar()])
-    def perform(self, node, inp, out_):
-        x, = inp
-        out, = out_
-        if out[0] is None:
-            out[0] = theano._asarray(x.shape[self.i], dtype='int64')
-        else:
-            out[0][...] = x.shape[self.i]
-    def c_code_cache_version(self):
-        return (0, 1)
-    def c_code(self, node, name, inp, out_, sub):
-        x, = inp
-        out, = out_
-        i = self.i
-        if isinstance(node.inputs[0].type, T.TensorType):
-            return """
-            if(!%(out)s)
-            %(out)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, NPY_INT64, 0);
-            ((npy_int64*)PyArray_DATA(%(out)s))[0]=PyArray_DIMS(%(x)s)[%(i)s];
-            """ % locals()
-        elif node.inputs[0].type.__class__.__name__ == "CudaNdarrayType":
-            #Don't want to import cuda stuff here.
-            return """
-            if(!%(out)s)
-            %(out)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, NPY_INT64, 0);
-            ((npy_int64*)PyArray_DATA(%(out)s))[0]=
-                            CudaNdarray_HOST_DIMS(%(x)s)[%(i)s];
-            """ % locals()
-        else:
-            #TODO: if your type is not listed here, make a damn registry of
-            #      shape_i ops for various types of variables.
-            #      Do not continue this madness.
-            return super(Shape_i, self).c_code(node, name, (x,), (out,), sub)
-    def infer_shape(self, node, input_shapes):
-        return [()]
-    def grad(self, inp, grads):
-        return [None]
 class ShapeFeature(object):
    """Graph optimizer for removing all calls to shape()

--- a/theano/tensor/type.py
+++ b/theano/tensor/type.py
@@ -611,6 +611,16 @@ theano.compile.register_view_op_c_code(
        """,
        version=1)
+# Register TensorType C code for ViewOp.
+theano.compile.register_shape_i_c_code(
+        TensorType,
+        """
+        if(!%(oname)s)
+            %(oname)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, NPY_INT64, 0);
+        ((npy_int64*)PyArray_DATA(%(oname)s))[0]=PyArray_DIMS(%(iname)s)[%(i)s];
+        """,
+        version=1)
 # Register TensorType C code for DeepCopyOp
 theano.compile.register_deep_copy_op_c_code(
        TensorType,