提交 70ed05cc authored 作者: Vivek Kulkarni's avatar Vivek Kulkarni

Moving an assert out of for loop (perf optimization) and adding back a…

Moving an assert out of for loop (perf optimization) and adding back a declartion of a CudaNdarray_inplace_add
上级 4b831a4a
...@@ -2471,6 +2471,7 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp): ...@@ -2471,6 +2471,7 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
x_obj = (PyObject*)CudaNdarray_View(%(x)s); x_obj = (PyObject*)CudaNdarray_View(%(x)s);
y_obj = (PyObject*)CudaNdarray_View(%(y)s); y_obj = (PyObject*)CudaNdarray_View(%(y)s);
num_indices = PyArray_SIZE(%(ind)s); num_indices = PyArray_SIZE(%(ind)s);
assert((num_indices - 1) <= LONG_MAX);
for (j = 0;j < num_indices; j++) { for (j = 0;j < num_indices; j++) {
...@@ -2478,7 +2479,6 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp): ...@@ -2478,7 +2479,6 @@ class GpuAdvancedIncSubtensor1(tensor.AdvancedIncSubtensor1, GpuOp):
x_rowind_obj = PyInt_FromLong(*p_index); x_rowind_obj = PyInt_FromLong(*p_index);
assert(PyInt_AsLong(x_rowind_obj) == (*p_index)); assert(PyInt_AsLong(x_rowind_obj) == (*p_index));
y_rowind_obj = PyInt_FromLong(j); y_rowind_obj = PyInt_FromLong(j);
assert(PyInt_AsLong(y_rowind_obj) == j);
row_x = CudaNdarray_Subscript(x_obj, x_rowind_obj); row_x = CudaNdarray_Subscript(x_obj, x_rowind_obj);
row_y = CudaNdarray_Subscript(y_obj, y_rowind_obj); row_y = CudaNdarray_Subscript(y_obj, y_rowind_obj);
......
...@@ -487,6 +487,7 @@ int fprint_CudaNdarray(FILE * fd, const CudaNdarray *self); ...@@ -487,6 +487,7 @@ int fprint_CudaNdarray(FILE * fd, const CudaNdarray *self);
PyObject * CudaNdarray_View(const CudaNdarray * self); PyObject * CudaNdarray_View(const CudaNdarray * self);
PyObject * CudaNdarray_inplace_add(PyObject* py_self, PyObject * py_other);
PyObject * CudaNdarray_Subscript(PyObject * py_self, PyObject * key); PyObject * CudaNdarray_Subscript(PyObject * py_self, PyObject * key);
int CudaNdarray_inplace_elemwise(PyObject* py_self, PyObject * py_other, operator_t fct_nb); int CudaNdarray_inplace_elemwise(PyObject* py_self, PyObject * py_other, operator_t fct_nb);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论