提交 1392f523 authored 作者: Frederic's avatar Frederic

Make GpuSubtensor and GpuIncSubtensor use the new NumPy C API.

This simplify the *IncSubtensor code at the same time.
上级 07068846
......@@ -2223,12 +2223,6 @@ class GpuReshape(tensor.Reshape, GpuOp):
out[0] = x.reshape(tuple(shp))
# C Code shared by GpuSubtensor and GpuIncSubtensor
_define_set_data = """
#define CudaNdarray_set_device_data2(obj, ptr, base) \
CudaNdarray_set_device_data(obj, (float *)ptr, base)
"""
class GpuSubtensor(GpuOp, tensor.Subtensor):
"""
Implement subtensor on the gpu.
......@@ -2276,16 +2270,27 @@ class GpuSubtensor(GpuOp, tensor.Subtensor):
view_ndim = node.outputs[0].ndim
fail = sub['fail']
decl = "CudaNdarray* xview = NULL;"
get_xview = self.helper_c_code(node, name, inputs, outputs, sub,
self.idx_list,
view_ndim=view_ndim,
c_prefix='CudaNdarray',
strides_mul=4,
)
build_view = """
//TODO: give this Op a second output so that this view can be cached
//TODO: alternatively, fix the memory leak on failure
CudaNdarray* xview = (CudaNdarray*) CudaNdarray_New(%(view_ndim)s);
xview = (CudaNdarray*) CudaNdarray_New(%(view_ndim)s);
if (!xview)
{
%(fail)s;
}
if (CudaNdarray_set_device_data(xview, CudaNdarray_DEV_DATA(%(x)s),
(PyObject*) NULL))
if (CudaNdarray_set_device_data(
xview,
CudaNdarray_DEV_DATA(%(x)s) + xview_offset/4,
(PyObject*) %(x)s))
{
PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set the"
......@@ -2294,43 +2299,24 @@ class GpuSubtensor(GpuOp, tensor.Subtensor):
%(fail)s;
}
cnda_mark_dev_structure_dirty(xview);
""" % locals()
get_xview = _define_set_data + \
self.helper_c_code(node, name, inputs, outputs, sub,
self.idx_list,
c_prefix='CudaNdarray',
set_data='CudaNdarray_set_device_data2',
set_dim='CudaNdarray_set_dim',
set_stride='CudaNdarray_set_stride',
update_flags="", strides_mul=4)
finish_view = ""
#For broadcasted dimensions, set the strides to 0
#We can't do that only for broadcasted dimensions as this can happen for dimensions of size 0,
#That are rebroadcated later.
for idx in range(node.outputs[0].ndim):
finish_view += """
if(CudaNdarray_HOST_DIMS(xview)[%(idx)s]==1)
CudaNdarray_set_stride(xview, %(idx)s, 0);
""" % locals()
finish_view += """
//Set the base only now
if(CudaNdarray_set_device_data(xview, CudaNdarray_DEV_DATA(xview),
%(x)s)){
PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set"
" the base of the view array");
Py_XDECREF(xview);
%(fail)s;
for(int idx=0;idx <%(view_ndim)s; idx++){
//For broadcasted dimensions, set the strides to 0
//We can't do that only for broadcasted dimensions as this can happen
//for dimensions of size 0. That are rebroadcated later.
if(xview_dims[idx]==1)
CudaNdarray_set_stride(xview, idx, 0);
else
CudaNdarray_set_stride(xview, idx, xview_strides[idx]);
CudaNdarray_set_dim(xview, idx, xview_dims[idx]);
}
""" % locals()
finish_view = """
Py_XDECREF(%(z)s);
%(z)s = xview;
""" % locals()
return build_view + "{" + get_xview + "}" + finish_view
return decl + get_xview + build_view + finish_view
def c_code_cache_version(self):
hv = self.helper_c_code_cache_version()
......@@ -2719,6 +2705,7 @@ class GpuAdvancedIncSubtensor1_dev20(GpuAdvancedIncSubtensor1):
""" %locals()
class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
"""
Implement IncSubtensor on the gpu.
......@@ -2756,6 +2743,9 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
"""
return """(CudaNdarray*) CudaNdarray_Copy(%(x)s)""" % locals()
def decl_view(self):
return "CudaNdarray* zview = NULL;"
def make_view_array(self, x, view_ndim):
"""
:param x: a string identifying an array to be viewed
......@@ -2765,17 +2755,32 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
This doesn't need to actually set up the view with the
right indexing; we'll do that manually later.
"""
return """CudaNdarray* zview = (CudaNdarray*)
CudaNdarray_New(%(view_ndim)s)""" % locals()
ret = """zview = (CudaNdarray*) CudaNdarray_New(%(view_ndim)s);
if (CudaNdarray_set_device_data(
zview,
CudaNdarray_DEV_DATA(%(x)s) + xview_offset/4,
(PyObject*) %(x)s))
{
zview = NULL;
PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set the"
" devdata field of the view");
}else{
cnda_mark_dev_structure_dirty(zview);
for(int idx=0;idx <%(view_ndim)s; idx++){
if(xview_dims[idx]==1)
CudaNdarray_set_stride(zview, idx, 0);
else
CudaNdarray_set_stride(zview, idx, xview_strides[idx]);
CudaNdarray_set_dim(zview, idx, xview_dims[idx]);
}
}
""" % locals()
return ret
def get_helper_c_code_args(self):
""" Return a dictionary of arguments to use with helper_c_code"""
return { 'update_flags' : "",
'c_prefix' : 'CudaNdarray',
'set_data' :'CudaNdarray_set_device_data2',
'set_dim' : 'CudaNdarray_set_dim',
'set_stride' : 'CudaNdarray_set_stride',
'update_flags' : "",
return {'c_prefix': 'CudaNdarray',
'strides_mul': 4
}
......@@ -2789,24 +2794,6 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
"""
return """CudaNdarray_CopyFromCudaNdarray(%(view)s, %(source)s)""" % locals()
def define_set_data(self):
return _define_set_data
def link_view_array(self, x, fail):
return """
if (CudaNdarray_set_device_data(zview, CudaNdarray_DEV_DATA(%(x)s),
(PyObject*) NULL))
{
PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set the"
" devdata field of the view");
Py_XDECREF(zview);
%(fail)s;
}
cnda_mark_dev_structure_dirty(zview);
""" % locals()
def set_view_base(self, x, fail):
return """
//Set the base only now
......@@ -2823,9 +2810,8 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
def add_to_zview(self, x, fail):
return """
PyObject * add_result = CudaNdarray_inplace_add((PyObject *) zview,
(PyObject *) py_%(x)s);
PyObject * add_result = CudaNdarray_inplace_add((PyObject *) zview,
(PyObject *) py_%(x)s);
if (! add_result )
{
......@@ -2839,7 +2825,6 @@ class GpuIncSubtensor(tensor.IncSubtensor, GpuOp):
""" % locals()
def c_code_cache_version(self):
parent_version = super(GpuIncSubtensor, self).c_code_cache_version()
if parent_version:
return parent_version + (0,)
......
......@@ -1098,6 +1098,9 @@ class IncSubtensor(Op):
(x, y) + inputs,
[x.type()])
def decl_view(self):
return "PyArrayObject * zview = NULL;"
def perform(self, node, inputs, out_):
out, = out_
x, y = inputs[:2]
......@@ -1171,7 +1174,6 @@ class IncSubtensor(Op):
numpy.sum([not isinstance(idx, slice)
for idx in self.idx_list]))
decl = "PyArrayObject * zview = NULL;"
copy_of_x = self.copy_of_x(x)
copy_input_if_necessary = """
......@@ -1186,15 +1188,11 @@ class IncSubtensor(Op):
}
else
{
if (%(z)s) Py_DECREF(%(z)s);
Py_XDECREF(%(z)s);
%(z)s = %(copy_of_x)s;
}
""" % locals()
alloc_zview = self.make_view_array(z, view_ndim)
# On GPU, it takes two steps to make a view
link_zview = self.link_view_array(z, fail)
# get info needed to make zview: a view of %(z)s
helper_args = self.get_helper_c_code_args()
......@@ -1210,6 +1208,8 @@ class IncSubtensor(Op):
)
#Make a view on the output, as we will write into it.
alloc_zview = self.make_view_array(z, view_ndim)
build_view = """
//TODO: give this Op a second output so that this view can be cached
//TODO: alternatively, fix the memory leak on failure
......@@ -1218,7 +1218,6 @@ class IncSubtensor(Op):
{
%(fail)s;
}
%(link_zview)s;
""" % locals()
copy_into = self.copy_into("zview", y)
......@@ -1239,8 +1238,7 @@ class IncSubtensor(Op):
%(add_to_zview)s
}
""" % locals()
return (decl +
return (self.decl_view() +
copy_input_if_necessary +
get_zview +
build_view +
......@@ -1322,19 +1320,6 @@ class IncSubtensor(Op):
"""
return """PyArray_CopyInto(%(view)s, %(source)s)""" % locals()
def link_view_array(self, x, fail):
""" Returns code to complete making zview a view of x"""
# On CPU there is nothing to do, make_view_array already did this
return ""
def set_view_base(self, x, fail):
""" Returns code to make zview be a correct view of x,
after helper_c_code is done messing with x"""
# On CPU there is nothing to do
return ""
def add_to_zview(self, x, fail):
""" Return C code to add x to zview. Should DECREF zview if the
add fails."""
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论