提交 76d1bba8 authored 作者: Frederic's avatar Frederic

Use size_t for shape

上级 544849a2
...@@ -3342,13 +3342,15 @@ class GpuAlloc(GpuOp): ...@@ -3342,13 +3342,15 @@ class GpuAlloc(GpuOp):
} }
if (%(memset_0)s && CudaNdarray_is_c_contiguous(%(out)s)) if (%(memset_0)s && CudaNdarray_is_c_contiguous(%(out)s))
{ {
if (cudaSuccess != cudaMemset(%(out)s->devdata, 0, cudaError_t err = cudaMemset(%(out)s->devdata, 0,
CudaNdarray_SIZE(%(out)s) * 4)) CudaNdarray_SIZEt(%(out)s) * 4);
if (cudaSuccess != err)
{ {
PyErr_Format(PyExc_MemoryError, PyErr_Format(PyExc_MemoryError,
"GpuAlloc: Error memsetting %%d" "GpuAlloc: Error memsetting %%ld"
" bytes of device memory.", " bytes of device memory. %%s",
CudaNdarray_SIZE(%(out)s) * 4); (long)(CudaNdarray_SIZEt(%(out)s) * 4),
cudaGetErrorString(err));
Py_XDECREF(%(out)s); Py_XDECREF(%(out)s);
%(out)s = NULL; %(out)s = NULL;
%(fail)s; %(fail)s;
...@@ -3372,7 +3374,7 @@ class GpuAlloc(GpuOp): ...@@ -3372,7 +3374,7 @@ class GpuAlloc(GpuOp):
return [None for i in inputs] return [None for i in inputs]
def c_code_cache_version(self): def c_code_cache_version(self):
return (7,) return (9,)
def do_constant_folding(self, node): def do_constant_folding(self, node):
for client in node.outputs[0].clients: for client in node.outputs[0].clients:
......
...@@ -5117,6 +5117,17 @@ CudaNdarray_SIZE(const CudaNdarray *self) ...@@ -5117,6 +5117,17 @@ CudaNdarray_SIZE(const CudaNdarray *self)
} }
return size; return size;
} }
size_t
CudaNdarray_SIZEt(const CudaNdarray *self)
{
if (self->nd == -1) return 0;
size_t size = 1;
for (int i = 0; i < self->nd; ++i)
{
size *= CudaNdarray_HOST_DIMS(self)[i];
}
return size;
}
PyObject * PyObject *
CudaNdarray_SIZE_Object(const CudaNdarray *self, void *closure) CudaNdarray_SIZE_Object(const CudaNdarray *self, void *closure)
......
...@@ -286,6 +286,9 @@ DllExport float *CudaNdarray_DEV_DATA(const CudaNdarray * self); ...@@ -286,6 +286,9 @@ DllExport float *CudaNdarray_DEV_DATA(const CudaNdarray * self);
* Return the number of elements in the ndarray (product of the dimensions) * Return the number of elements in the ndarray (product of the dimensions)
*/ */
DllExport int CudaNdarray_SIZE(const CudaNdarray *self); DllExport int CudaNdarray_SIZE(const CudaNdarray *self);
// Useful as many cuda function use size_t as input. This make sure we use the
// most precission and not int.
DllExport size_t CudaNdarray_SIZEt(const CudaNdarray *self);
static PyObject *CudaNdarray_SIZE_Object(const CudaNdarray *self, void *closure); static PyObject *CudaNdarray_SIZE_Object(const CudaNdarray *self, void *closure);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论