提交 a91c0116 authored 作者: Pierre Luc Carrier's avatar Pierre Luc Carrier

Convert CudaNdarray_HOST_DIMS(x) to PyGpuArray_DIMS(x) in op GpuImages2Neibs

上级 6c806974
...@@ -243,54 +243,54 @@ class GpuImages2Neibs(Images2Neibs, Op): ...@@ -243,54 +243,54 @@ class GpuImages2Neibs(Images2Neibs, Op):
"Images2Neibs: in mode wrap_centered need patch with odd shapes"); "Images2Neibs: in mode wrap_centered need patch with odd shapes");
%(fail)s; %(fail)s;
} }
if ( CudaNdarray_HOST_DIMS(%(ten4)s)[2] < c || if ( PyGpuArray_DIMS(%(ten4)s)[2] < c ||
CudaNdarray_HOST_DIMS(%(ten4)s)[3] < d) PyGpuArray_DIMS(%(ten4)s)[3] < d)
{ {
PyErr_Format(PyExc_TypeError, PyErr_Format(PyExc_TypeError,
"Images2Neibs: in wrap_centered mode, don't" "Images2Neibs: in wrap_centered mode, don't"
" support image shapes smaller then the patch" " support image shapes smaller then the patch"
" shapes: neib_shape=(%%d,%%d)," " shapes: neib_shape=(%%d,%%d),"
" ten4[2:]=[%%d,%%d]", " ten4[2:]=[%%d,%%d]",
c, d, CudaNdarray_HOST_DIMS(%(ten4)s)[2], c, d, PyGpuArray_DIMS(%(ten4)s)[2],
CudaNdarray_HOST_DIMS(%(ten4)s)[3]); PyGpuArray_DIMS(%(ten4)s)[3]);
%(fail)s; %(fail)s;
} }
grid_c = CEIL_INTDIV(((CudaNdarray_HOST_DIMS(%(ten4)s))[2]), grid_c = CEIL_INTDIV(((PyGpuArray_DIMS(%(ten4)s))[2]),
step_x); step_x);
grid_d = CEIL_INTDIV(((CudaNdarray_HOST_DIMS(%(ten4)s))[3]), grid_d = CEIL_INTDIV(((PyGpuArray_DIMS(%(ten4)s))[3]),
step_y); step_y);
}else if ( "%(mode)s" == "valid") { }else if ( "%(mode)s" == "valid") {
if ( ((CudaNdarray_HOST_DIMS(%(ten4)s))[2] < c) || if ( ((PyGpuArray_DIMS(%(ten4)s))[2] < c) ||
((((CudaNdarray_HOST_DIMS(%(ten4)s))[2]-c) %% step_x)!=0)) ((((PyGpuArray_DIMS(%(ten4)s))[2]-c) %% step_x)!=0))
{ {
PyErr_Format(PyExc_TypeError, PyErr_Format(PyExc_TypeError,
"neib_shape[0]=%%d, neib_step[0]=%%d and" "neib_shape[0]=%%d, neib_step[0]=%%d and"
" ten4.shape[2]=%%d not consistent", " ten4.shape[2]=%%d not consistent",
c, step_x, c, step_x,
CudaNdarray_HOST_DIMS(%(ten4)s)[2]); PyGpuArray_DIMS(%(ten4)s)[2]);
%(fail)s; %(fail)s;
} }
if ( ((CudaNdarray_HOST_DIMS(%(ten4)s))[3] < d) || if ( ((PyGpuArray_DIMS(%(ten4)s))[3] < d) ||
((((CudaNdarray_HOST_DIMS(%(ten4)s))[3]-d) %% step_y)!=0)) ((((PyGpuArray_DIMS(%(ten4)s))[3]-d) %% step_y)!=0))
{ {
PyErr_Format(PyExc_TypeError, PyErr_Format(PyExc_TypeError,
"neib_shape[1]=%%d, neib_step[1]=%%d and" "neib_shape[1]=%%d, neib_step[1]=%%d and"
" ten4.shape[3]=%%d not consistent", " ten4.shape[3]=%%d not consistent",
d, step_y, d, step_y,
CudaNdarray_HOST_DIMS(%(ten4)s)[3]); PyGpuArray_DIMS(%(ten4)s)[3]);
%(fail)s; %(fail)s;
} }
//number of patch in height //number of patch in height
grid_c = 1+(((CudaNdarray_HOST_DIMS(%(ten4)s))[2]-c)/step_x); grid_c = 1+(((PyGpuArray_DIMS(%(ten4)s))[2]-c)/step_x);
//number of patch in width //number of patch in width
grid_d = 1+(((CudaNdarray_HOST_DIMS(%(ten4)s))[3]-d)/step_y); grid_d = 1+(((PyGpuArray_DIMS(%(ten4)s))[3]-d)/step_y);
}else if ( "%(mode)s" == "ignore_borders") { }else if ( "%(mode)s" == "ignore_borders") {
//number of patch in height //number of patch in height
grid_c = 1+(((CudaNdarray_HOST_DIMS(%(ten4)s))[2]-c)/step_x); grid_c = 1+(((PyGpuArray_DIMS(%(ten4)s))[2]-c)/step_x);
//number of patch in width //number of patch in width
grid_d = 1+(((CudaNdarray_HOST_DIMS(%(ten4)s))[3]-d)/step_y); grid_d = 1+(((PyGpuArray_DIMS(%(ten4)s))[3]-d)/step_y);
}else{ }else{
PyErr_Format(PyExc_TypeError, PyErr_Format(PyExc_TypeError,
"Images2Neibs: unknow mode '%(mode)s'"); "Images2Neibs: unknow mode '%(mode)s'");
...@@ -301,12 +301,12 @@ class GpuImages2Neibs(Images2Neibs, Op): ...@@ -301,12 +301,12 @@ class GpuImages2Neibs(Images2Neibs, Op):
const int z_dim1 = c * d; const int z_dim1 = c * d;
const int z_dim0 = grid_c const int z_dim0 = grid_c
* grid_d * grid_d
* CudaNdarray_HOST_DIMS(%(ten4)s)[1] * PyGpuArray_DIMS(%(ten4)s)[1]
* CudaNdarray_HOST_DIMS(%(ten4)s)[0]; * PyGpuArray_DIMS(%(ten4)s)[0];
if ((NULL == %(z)s) if ((NULL == %(z)s)
|| (CudaNdarray_HOST_DIMS(%(z)s)[0] != z_dim0) || (PyGpuArray_DIMS(%(z)s)[0] != z_dim0)
|| (CudaNdarray_HOST_DIMS(%(z)s)[1] != z_dim1)) || (PyGpuArray_DIMS(%(z)s)[1] != z_dim1))
{ {
Py_XDECREF(%(z)s); Py_XDECREF(%(z)s);
npy_intp dims[2]; npy_intp dims[2];
...@@ -325,10 +325,10 @@ class GpuImages2Neibs(Images2Neibs, Op): ...@@ -325,10 +325,10 @@ class GpuImages2Neibs(Images2Neibs, Op):
{ // NESTED SCOPE { // NESTED SCOPE
const int nb_batch = CudaNdarray_HOST_DIMS(%(ten4)s)[0]; const int nb_batch = PyGpuArray_DIMS(%(ten4)s)[0];
const int nb_stack = CudaNdarray_HOST_DIMS(%(ten4)s)[1]; const int nb_stack = PyGpuArray_DIMS(%(ten4)s)[1];
const int height = CudaNdarray_HOST_DIMS(%(ten4)s)[2]; const int height = PyGpuArray_DIMS(%(ten4)s)[2];
const int width = CudaNdarray_HOST_DIMS(%(ten4)s)[3]; const int width = PyGpuArray_DIMS(%(ten4)s)[3];
const int c = *(dtype_%(neib_shape)s*) PyArray_GETPTR1( const int c = *(dtype_%(neib_shape)s*) PyArray_GETPTR1(
%(neib_shape)s, 0); %(neib_shape)s, 0);
...@@ -348,14 +348,14 @@ class GpuImages2Neibs(Images2Neibs, Op): ...@@ -348,14 +348,14 @@ class GpuImages2Neibs(Images2Neibs, Op):
//a higher core utilisation. for smaller patch size //a higher core utilisation. for smaller patch size
while(c*d*(n_threads.z+1) < 128 && n_threads.z<64 && while(c*d*(n_threads.z+1) < 128 && n_threads.z<64 &&
n_threads.z<CudaNdarray_HOST_DIMS(%(z)s)[0]){ n_threads.z<PyGpuArray_DIMS(%(z)s)[0]){
n_threads.z++; n_threads.z++;
} }
int nb_block; int nb_block;
if (CudaNdarray_HOST_DIMS(%(z)s)[0] %% n_threads.z == 0) if (PyGpuArray_DIMS(%(z)s)[0] %% n_threads.z == 0)
nb_block = CudaNdarray_HOST_DIMS(%(z)s)[0] / n_threads.z; nb_block = PyGpuArray_DIMS(%(z)s)[0] / n_threads.z;
else else
nb_block = (CudaNdarray_HOST_DIMS(%(z)s)[0] / n_threads.z) + 1; nb_block = (PyGpuArray_DIMS(%(z)s)[0] / n_threads.z) + 1;
dim3 n_blocks(std::min(32*1024,nb_block)); dim3 n_blocks(std::min(32*1024,nb_block));
int n_shared = 0; int n_shared = 0;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论