提交 e6b53352 authored 作者: Frederic's avatar Frederic

Now GpuSubtensor have c code by reusing the Subtensor helper c code generator.

上级 529f4421
...@@ -1911,7 +1911,7 @@ class GpuReshape(tensor.Reshape, GpuOp): ...@@ -1911,7 +1911,7 @@ class GpuReshape(tensor.Reshape, GpuOp):
out[0] = x.reshape(tuple(shp)) out[0] = x.reshape(tuple(shp))
class GpuSubtensor(tensor.Subtensor, GpuOp): class GpuSubtensor(GpuOp, tensor.Subtensor):
""" """
Implement subtensor on the gpu. Implement subtensor on the gpu.
""" """
...@@ -1952,6 +1952,59 @@ class GpuSubtensor(tensor.Subtensor, GpuOp): ...@@ -1952,6 +1952,59 @@ class GpuSubtensor(tensor.Subtensor, GpuOp):
cdata = cdata[0] cdata = cdata[0]
out[0] = x.__getitem__(cdata) out[0] = x.__getitem__(cdata)
def c_code(self, node, name, inputs, outputs, sub):
x = inputs[0]
z, = outputs
view_ndim = node.outputs[0].ndim
fail = sub['fail']
build_view = """
//TODO: give this Op a second output so that this view can be cached
//TODO: alternatively, fix the memory leak on failure
CudaNdarray* xview = (CudaNdarray*) CudaNdarray_New(%(view_ndim)s);
if (!xview)
{
%(fail)s;
}
if (CudaNdarray_set_device_data(xview, CudaNdarray_DEV_DATA(%(x)s),
(PyObject*) NULL))
{
PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set the"
" devdata field of the view");
Py_XDECREF(xview);
%(fail)s;
}
cnda_mark_dev_structure_dirty(xview);
#define CudaNdarray_set_device_data2(obj, ptr, base) \
CudaNdarray_set_device_data(obj, (float *)ptr, base)
""" % locals()
get_xview = self.helper_c_code(node, name, inputs, outputs, sub,
self.idx_list,
c_prefix='CudaNdarray',
set_data='CudaNdarray_set_device_data2',
set_dim='CudaNdarray_set_dim',
set_stride='CudaNdarray_set_stride',
update_flags="", strides_mul=4)
finish_view = """
//Set the base only now
if(CudaNdarray_set_device_data(xview, CudaNdarray_DEV_DATA(xview),
%(x)s)){
PyErr_Format(PyExc_RuntimeError,
"GpuSubtensor is not able to set"
" the base of the view array");
Py_XDECREF(xview);
%(fail)s;
}
Py_XDECREF(%(z)s);
%(z)s = xview;
""" % locals()
return build_view + "{" + get_xview + "}" + finish_view
class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp): class GpuAdvancedSubtensor1(tensor.AdvancedSubtensor1, GpuOp):
""" """
......
...@@ -161,6 +161,14 @@ DllExport const int *CudaNdarray_DEV_STRIDES(const CudaNdarray * self); ...@@ -161,6 +161,14 @@ DllExport const int *CudaNdarray_DEV_STRIDES(const CudaNdarray * self);
DllExport const int *CudaNdarray_DEV_LOG2DIMS(const CudaNdarray * self); DllExport const int *CudaNdarray_DEV_LOG2DIMS(const CudaNdarray * self);
DllExport float *CudaNdarray_DEV_DATA(const CudaNdarray * self); DllExport float *CudaNdarray_DEV_DATA(const CudaNdarray * self);
// The following 4 macro are here to help make c code generator that work on
// both PyArray and CudaNdarray. This is at least used for Subtensor and
// GpuSubtensor
#define CudaNdarray_DIMS CudaNdarray_HOST_DIMS
#define CudaNdarray_NDIM(self) self->nd
#define CudaNdarray_STRIDES CudaNdarray_HOST_STRIDES
#define CudaNdarray_BYTES CudaNdarray_DEV_DATA
/** /**
* Return the number of elements in the ndarray (product of the dimensions) * Return the number of elements in the ndarray (product of the dimensions)
*/ */
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论