提交 f251db8a authored 作者: Frederic's avatar Frederic

Advance GpuIncSubtensor c code

上级 2f91217e
...@@ -167,6 +167,9 @@ class GpuIncSubtensor(HideC, IncSubtensor): ...@@ -167,6 +167,9 @@ class GpuIncSubtensor(HideC, IncSubtensor):
the c_code for this Op. the c_code for this Op.
""" """
def c_headers(self):
return ['<compyte/numpy_compat.h>']
def make_node(self, x, y, *inputs): def make_node(self, x, y, *inputs):
x = as_gpuarray_variable(x) x = as_gpuarray_variable(x)
y = as_gpuarray_variable(y) y = as_gpuarray_variable(y)
...@@ -234,7 +237,7 @@ class GpuIncSubtensor(HideC, IncSubtensor): ...@@ -234,7 +237,7 @@ class GpuIncSubtensor(HideC, IncSubtensor):
return """pygpu_copy(%(x)s, GA_ANY_ORDER)""" % locals() return """pygpu_copy(%(x)s, GA_ANY_ORDER)""" % locals()
def decl_view(self): def decl_view(self):
return "PyGpuArray* zview = NULL;" return "PyGpuArrayObject* zview = NULL;"
def make_view_array(self, x, view_ndim): def make_view_array(self, x, view_ndim):
"""//TODO """//TODO
...@@ -245,26 +248,20 @@ class GpuIncSubtensor(HideC, IncSubtensor): ...@@ -245,26 +248,20 @@ class GpuIncSubtensor(HideC, IncSubtensor):
This doesn't need to actually set up the view with the This doesn't need to actually set up the view with the
right indexing; we'll do that manually later. right indexing; we'll do that manually later.
""" """
ret = """zview = (CudaNdarray*) CudaNdarray_New(%(view_ndim)s); ret = """
if (CudaNdarray_set_device_data( size_t dims[%(view_ndim)s];
zview, for(int i=0; i<%(view_ndim)s; i++)
CudaNdarray_DEV_DATA(%(x)s) + xview_offset/4, dims[i] = xview_dims[i];
(PyObject*) %(x)s)) zview = pygpu_fromgpudata(%(x)s->ga.data,
{ xview_offset,
zview = NULL; %(x)s->ga.typecode,
PyErr_Format(PyExc_RuntimeError, %(view_ndim)s,
"GpuSubtensor is not able to set the" dims,
" devdata field of the view"); xview_strides,
}else{ pygpu_default_context(),
cnda_mark_dev_structure_dirty(zview); 1,
for(int idx=0;idx <%(view_ndim)s; idx++){ (PyObject *)%(x)s,
if(xview_dims[idx]==1) (PyObject *)&PyGpuArrayType);
CudaNdarray_set_stride(zview, idx, 0);
else
CudaNdarray_set_stride(zview, idx, xview_strides[idx]);
CudaNdarray_set_dim(zview, idx, xview_dims[idx]);
}
}
""" % locals() """ % locals()
return ret return ret
...@@ -282,7 +279,7 @@ class GpuIncSubtensor(HideC, IncSubtensor): ...@@ -282,7 +279,7 @@ class GpuIncSubtensor(HideC, IncSubtensor):
returns a C code expression to copy source into view, and returns a C code expression to copy source into view, and
return 0 on success return 0 on success
""" """
return """GpuArray_move(%(view)s, %(source)s)""" % locals() return """GpuArray_move(&%(view)s->ga, &%(source)s->ga)""" % locals()
def add_to_zview(self, x, fail): def add_to_zview(self, x, fail):
#TODO #TODO
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论