提交 50673363 authored 作者: Frederic's avatar Frederic

fix gpu compilation of shape_i.

上级 312da2df
......@@ -571,7 +571,10 @@ the elements of the shape).
.. code-block:: python
theano.compile.ops.register_shape_c_code(YOUR_TYPE_CLASS, THE_C_CODE, version=())
theano.compile.ops.register_shape_i_c_code(YOUR_TYPE_CLASS, THE_C_CODE, version=())
theano.compile.ops.register_shape_i_c_code(YOUR_TYPE_CLASS, THE_C_CODE, CHECK_INPUT, version=())
The C code works as the ViewOp. Shape_i has the additional ``i`` parameter
that you can use with ``%(i)s``.
In your CHECK_INPUT, you must check that the input have enough ndim to
be able to get the ith shapes.
......@@ -349,8 +349,8 @@ class Shape_i(gof.Op):
version = []
# If any of the c code is unversionned, we have to return ()
# Else, we will return a list of (type name, version) pairs.
for t, (c, v) in sorted(self.c_code_and_version.items(),
key=lambda pair: str(pair[0])):
for t, (c, ci, v) in sorted(self.c_code_and_version.items(),
key=lambda pair: str(pair[0])):
if not v:
warnings.warn("Type %s has C code for Shape_i, but it has "
"no version. You should add a 'version' keyword "
......@@ -372,14 +372,8 @@ class Shape_i(gof.Op):
itype = node.inputs[0].type.__class__
if itype in self.c_code_and_version:
sc = """
if (%(i)s>=PyArray_NDIM(%(iname)s)){
PyErr_SetString(PyExc_TypeError, "Number of dimensions lower than expected");
%(fail)s
}
""" % locals()
code, version = self.c_code_and_version[itype]
return sc + code % locals()
code, check_input, version = self.c_code_and_version[itype]
return (check_input + code) % locals()
# Else, no C code
return super(Shape_i, self).c_code(node, name, inames, onames, sub)
......@@ -391,7 +385,7 @@ class Shape_i(gof.Op):
return [None]
def register_shape_i_c_code(typ, code, version=()):
def register_shape_i_c_code(typ, code, check_input, version=()):
""" Tell Shape_i how to generate C code for a Theano Type
:param typ: A Theano type. It must be the Theano class itself and not an
......@@ -401,13 +395,14 @@ def register_shape_i_c_code(typ, code, version=()):
variable names respectively.
:param version: A number indicating the version of the code, for cache.
"""
Shape_i.c_code_and_version[typ] = (code, version)
Shape_i.c_code_and_version[typ] = (code, check_input, version)
# List of Theano Types that one can add an extra dimension and for which
# Scan can deal with.
expandable_types = ()
class FromFunctionOp(gof.Op):
"""
Build a basic Theano Op around a function.
......
......@@ -454,12 +454,22 @@ theano.compile.register_view_op_c_code(
""",
version=1)
theano.compile.register_shape_i_c_code(CudaNdarrayType, """
theano.compile.register_shape_i_c_code(
CudaNdarrayType,
"""
if(!%(oname)s)
%(oname)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, NPY_INT64, 0);
((npy_int64*)PyArray_DATA(%(oname)s))[0] =
CudaNdarray_HOST_DIMS(%(iname)s)[%(i)s];
""", version=(0,))
""",
"""
if (%(i)s>=CudaNdarray_NDIM(%(iname)s)){
PyErr_SetString(PyExc_TypeError,
"Number of dimensions lower than expected");
%(fail)s
}
""",
version=(1,))
# Register CudaNdarrayType to the DeepCopyOp list of types with c code.
theano.compile.register_deep_copy_op_c_code(
......
......@@ -315,12 +315,22 @@ theano.compile.register_shape_c_code(
""",
version=1)
theano.compile.register_shape_i_c_code(GpuArrayType, """
theano.compile.register_shape_i_c_code(
GpuArrayType,
"""
if(!%(oname)s)
%(oname)s=(PyArrayObject*)PyArray_ZEROS(0, NULL, NPY_INT64, 0);
((npy_int64*)PyArray_DATA(%(oname)s))[0] =
%(iname)s->ga.dimensions[%(i)s];
""", version=(0,))
""",
"""
if (%(i)s>=%(iname)s->ga.nd){
PyErr_SetString(PyExc_TypeError,
"Number of dimensions lower than expected");
%(fail)s
}
""",
version=(1,))
theano.compile.register_deep_copy_op_c_code(GpuArrayType, """
Py_XDECREF(%(oname)s);
......
......@@ -646,7 +646,14 @@ theano.compile.register_shape_i_c_code(
%(oname)s=(PyArrayObject*)PyArray_EMPTY(0, NULL, NPY_INT64, 0);
((npy_int64*)PyArray_DATA(%(oname)s))[0]=PyArray_DIMS(%(iname)s)[%(i)s];
""",
version=2)
"""
if (%(i)s>=PyArray_NDIM(%(iname)s)){
PyErr_SetString(PyExc_TypeError,
"Number of dimensions lower than expected");
%(fail)s
}
""",
version=3)
# Register TensorType C code for DeepCopyOp
theano.compile.register_deep_copy_op_c_code(
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论