提交 c1abbdd5 authored 作者: abergeron's avatar abergeron

Merge pull request #1991 from nouiz/old_numpy

[ENH] Make the gpu back-end use the new numpy c interface
...@@ -50,33 +50,33 @@ class GpuConv3D(GpuOp): ...@@ -50,33 +50,33 @@ class GpuConv3D(GpuOp):
//printf("\t\t\t\tConv3DGPU c code\\n"); //printf("\t\t\t\tConv3DGPU c code\\n");
//Check dimensionality of inputs //Check dimensionality of inputs
if (%(W)s->nd != 5) if (CudaNdarray_NDIM(%(W)s) != 5)
{ {
PyErr_Format(PyExc_ValueError, "GpuConv3D: W must be a 5 dimensional CudaNdarray"); PyErr_Format(PyExc_ValueError, "GpuConv3D: W must be a 5 dimensional CudaNdarray");
%(fail)s %(fail)s
} }
if (%(V)s->nd != 5) if (CudaNdarray_NDIM(%(V)s) != 5)
{ {
PyErr_Format(PyExc_ValueError, "GpuConv3D: V must be a 5 dimensional CudaNdarray"); PyErr_Format(PyExc_ValueError, "GpuConv3D: V must be a 5 dimensional CudaNdarray");
%(fail)s %(fail)s
} }
if (%(b)s->nd != 1) if (CudaNdarray_NDIM(%(b)s) != 1)
{ {
PyErr_Format(PyExc_ValueError, "GpuConv3D: b must be a vector CudaNdarray"); PyErr_Format(PyExc_ValueError, "GpuConv3D: b must be a vector CudaNdarray");
%(fail)s %(fail)s
} }
if (%(d)s->nd != 1) if (CudaNdarray_NDIM(%(d)s) != 1)
{ {
PyErr_Format(PyExc_ValueError, "GpuConv3D: d must be a vector CudaNdarray"); PyErr_Format(PyExc_ValueError, "GpuConv3D: d must be a vector CudaNdarray");
%(fail)s %(fail)s
} }
if (%(d)s->dimensions[0] != 3) if (PyArray_DIMS(%(d)s)[0] != 3)
{ {
PyErr_Format(PyExc_ValueError, "GpuConv3D: 3 stride length arguments expected (row, col, time) but %%li were given", %(d)s->dimensions[0]); PyErr_Format(PyExc_ValueError, "GpuConv3D: 3 stride length arguments expected (row, col, time) but %%li were given", PyArray_DIMS(%(d)s)[0]);
%(fail)s %(fail)s
} }
......
...@@ -79,33 +79,33 @@ class GpuConvGrad3D(GpuOp): ...@@ -79,33 +79,33 @@ class GpuConvGrad3D(GpuOp):
//printf("\t\t\t\tGpuConvGrad3DW c code\\n"); //printf("\t\t\t\tGpuConvGrad3DW c code\\n");
//Check dimensionality of inputs //Check dimensionality of inputs
if (%(dCdH)s->nd != 5) if (CudaNdarray_NDIM(%(dCdH)s) != 5)
{ {
PyErr_Format(PyExc_ValueError, "GpuConvGrad3D: dCdH must be a 5-d CudaNdArray"); PyErr_Format(PyExc_ValueError, "GpuConvGrad3D: dCdH must be a 5-d CudaNdArray");
%(fail)s %(fail)s
} }
if (%(V)s->nd != 5) if (CudaNdarray_NDIM(%(V)s) != 5)
{ {
PyErr_Format(PyExc_ValueError, "GpuConvGrad3D: V must be a 5-d CudaNdArray"); PyErr_Format(PyExc_ValueError, "GpuConvGrad3D: V must be a 5-d CudaNdArray");
%(fail)s %(fail)s
} }
if (%(WShape)s->nd != 1) if (CudaNdarray_NDIM(%(WShape)s) != 1)
{ {
PyErr_Format(PyExc_ValueError, "GpuConvGrad3D: WShape must be a 1-d CudaNdArray"); PyErr_Format(PyExc_ValueError, "GpuConvGrad3D: WShape must be a 1-d CudaNdArray");
%(fail)s %(fail)s
} }
if (%(d)s->nd != 1) if (PyArray_NDIM(%(d)s) != 1)
{ {
PyErr_Format(PyExc_ValueError, "GpuConvGrad3D: d must be a 1-d CudaNdArray"); PyErr_Format(PyExc_ValueError, "GpuConvGrad3D: d must be a 1-d CudaNdArray");
%(fail)s %(fail)s
} }
if (%(d)s->dimensions[0] != 3) if (PyArray_DIMS(%(d)s)[0] != 3)
{ {
PyErr_Format(PyExc_ValueError, "GpuConvGrad3D: 3 stride lengths arguments expected(for row, col, and time) but %%li were given", %(d)s->dimensions[0]); PyErr_Format(PyExc_ValueError, "GpuConvGrad3D: 3 stride lengths arguments expected(for row, col, and time) but %%li were given", PyArray_DIMS(%(d)s)[0]);
%(fail)s %(fail)s
} }
...@@ -113,7 +113,7 @@ class GpuConvGrad3D(GpuOp): ...@@ -113,7 +113,7 @@ class GpuConvGrad3D(GpuOp):
//Read and check sizes of inputs //Read and check sizes of inputs
const int batchSize = CudaNdarray_HOST_DIMS(%(V)s)[0]; const int batchSize = CudaNdarray_HOST_DIMS(%(V)s)[0];
if (%(WShape)s->dimensions[0] != 5) if (PyArray_DIMS(%(WShape)s)[0] != 5)
{ {
PyErr_Format(PyExc_ValueError, "GpuConvGrad3D: WShape must specify a 5-d shape"); PyErr_Format(PyExc_ValueError, "GpuConvGrad3D: WShape must specify a 5-d shape");
%(fail)s %(fail)s
...@@ -125,7 +125,7 @@ class GpuConvGrad3D(GpuOp): ...@@ -125,7 +125,7 @@ class GpuConvGrad3D(GpuOp):
} }
{ //for fail { //for fail
dtype_%(WShape)s * WShape = (dtype_%(WShape)s *) %(WShape)s->data; dtype_%(WShape)s * WShape = (dtype_%(WShape)s *) PyArray_DATA(%(WShape)s);
const int outputChannels = WShape[0]; const int outputChannels = WShape[0];
const int inputChannels = CudaNdarray_HOST_DIMS(%(V)s)[4]; const int inputChannels = CudaNdarray_HOST_DIMS(%(V)s)[4];
if (WShape[4] != inputChannels) if (WShape[4] != inputChannels)
......
...@@ -61,34 +61,34 @@ class GpuConvTransp3D(GpuOp): ...@@ -61,34 +61,34 @@ class GpuConvTransp3D(GpuOp):
//printf("\t\t\t\tGpuConvTransp c code\\n"); //printf("\t\t\t\tGpuConvTransp c code\\n");
//Check dimensionality of inputs //Check dimensionality of inputs
if (%(H)s->nd != 5) if (CudaNdarray_NDIM(%(H)s) != 5)
{ {
PyErr_Format(PyExc_ValueError, "GpuConvTransp3D: H must be a 5-D tensor but it is %%i-D",%(H)s->nd); PyErr_Format(PyExc_ValueError, "GpuConvTransp3D: H must be a 5-D tensor but it is %%i-D", CudaNdarray_NDIM(%(H)s));
%(fail)s %(fail)s
} }
if (%(W)s->nd != 5) if (CudaNdarray_NDIM(%(W)s) != 5)
{ {
PyErr_Format(PyExc_ValueError, "GpuConvTransp3D: W must be a 5-D tensor"); PyErr_Format(PyExc_ValueError, "GpuConvTransp3D: W must be a 5-D tensor");
%(fail)s %(fail)s
} }
if (%(b)s->nd != 1) if (CudaNdarray_NDIM(%(b)s) != 1)
{ {
PyErr_Format(PyExc_ValueError, "GpuConvTransp3D: b must be a vector"); PyErr_Format(PyExc_ValueError, "GpuConvTransp3D: b must be a vector");
%(fail)s %(fail)s
} }
if (%(d)s->nd != 1) if (PyArray_NDIM(%(d)s) != 1)
{ {
PyErr_Format(PyExc_ValueError, "GpuConvTransp3D: d must be a vector"); PyErr_Format(PyExc_ValueError, "GpuConvTransp3D: d must be a vector");
%(fail)s %(fail)s
} }
//Read and check stride arguments //Read and check stride arguments
if (%(d)s->dimensions[0] != 3) if (PyArray_DIMS(%(d)s)[0] != 3)
{ {
PyErr_Format(PyExc_ValueError,"GpuConvTransp3D: 3 stride length arguments expected (for row, col, and time) but %%li were given", %(d)s->dimensions[0]); PyErr_Format(PyExc_ValueError,"GpuConvTransp3D: 3 stride length arguments expected (for row, col, and time) but %%li were given", PyArray_DIMS(%(d)s)[0]);
%(fail)s %(fail)s
} }
{ // for fail { // for fail
...@@ -138,13 +138,13 @@ class GpuConvTransp3D(GpuOp): ...@@ -138,13 +138,13 @@ class GpuConvTransp3D(GpuOp):
if (%(RShape)s) if (%(RShape)s)
{ {
if (%(RShape)s->nd != 1) if (PyArray_NDIM(%(RShape)s) != 1)
{ {
PyErr_Format(PyExc_ValueError, "RShape must be a vector"); PyErr_Format(PyExc_ValueError, "RShape must be a vector");
%(fail)s %(fail)s
} }
if (%(RShape)s->dimensions[0] != 3) if (PyArray_DIMS(%(RShape)s)[0] != 3)
{ {
PyErr_Format(PyExc_ValueError, "RShape must specify a 3D shape ( [height,width,duration] )"); PyErr_Format(PyExc_ValueError, "RShape must specify a 3D shape ( [height,width,duration] )");
%(fail)s %(fail)s
...@@ -189,7 +189,7 @@ class GpuConvTransp3D(GpuOp): ...@@ -189,7 +189,7 @@ class GpuConvTransp3D(GpuOp):
%(fail)s; %(fail)s;
} }
} }
cudaMemset(%(R)s->devdata, 0, 4 * batchSize * inputChannels * videoHeight * videoWidth * videoDur); cudaMemset(CudaNdarray_DEV_DATA(%(R)s), 0, 4 * batchSize * inputChannels * videoHeight * videoWidth * videoDur);
{ // for fail { // for fail
......
...@@ -124,8 +124,8 @@ class GpuDot22Scalar(GpuOp): ...@@ -124,8 +124,8 @@ class GpuDot22Scalar(GpuOp):
return """ return """
#define REAL float #define REAL float
float %(name)s_a = (PyArray_TYPE(%(a)s) == NPY_FLOAT) float %(name)s_a = (PyArray_TYPE(%(a)s) == NPY_FLOAT)
? (REAL)(((float*)%(a)s->data)[0]) ? (REAL)(((float*)PyArray_DATA(%(a)s))[0])
: (REAL)(((double*)%(a)s->data)[0]); : (REAL)(((double*)PyArray_DATA(%(a)s))[0]);
#undef REAL #undef REAL
if (%(x)s->nd != 2) if (%(x)s->nd != 2)
{ {
...@@ -232,12 +232,12 @@ class GpuGemm(GpuOp): ...@@ -232,12 +232,12 @@ class GpuGemm(GpuOp):
#define REAL float #define REAL float
float %(name)s_a = (PyArray_TYPE(%(a)s) == NPY_FLOAT) float %(name)s_a = (PyArray_TYPE(%(a)s) == NPY_FLOAT)
? (REAL)(((float*)%(a)s->data)[0]) ? (REAL)(((float*)PyArray_DATA(%(a)s))[0])
: (REAL)(((double*)%(a)s->data)[0]); : (REAL)(((double*)PyArray_DATA(%(a)s))[0]);
float %(name)s_b = (PyArray_TYPE(%(b)s) == NPY_FLOAT) ? float %(name)s_b = (PyArray_TYPE(%(b)s) == NPY_FLOAT) ?
(REAL)(((float*)%(b)s->data)[0]) (REAL)(((float*)PyArray_DATA(%(b)s))[0])
: (REAL)(((double*)%(b)s->data)[0]); : (REAL)(((double*)PyArray_DATA(%(b)s))[0]);
#undef REAL #undef REAL
if (%(inplace)s if (%(inplace)s
...@@ -344,8 +344,8 @@ class GpuGemv(GpuOp): ...@@ -344,8 +344,8 @@ class GpuGemv(GpuOp):
sio = StringIO() sio = StringIO()
print >> sio, """ print >> sio, """
float %(name)s_alpha = ((dtype_%(a)s*)(%(a)s->data))[0]; float %(name)s_alpha = ((dtype_%(a)s*)(PyArray_DATA(%(a)s)))[0];
float %(name)s_beta = ((dtype_%(b)s*)(%(b)s->data))[0]; float %(name)s_beta = ((dtype_%(b)s*)(PyArray_DATA(%(b)s)))[0];
if (%(inplace)s if (%(inplace)s
&& ((CudaNdarray_HOST_STRIDES(%(z_in)s)[0] > 0) && ((CudaNdarray_HOST_STRIDES(%(z_in)s)[0] > 0)
...@@ -441,7 +441,7 @@ class GpuGer(GpuOp): ...@@ -441,7 +441,7 @@ class GpuGer(GpuOp):
sio = StringIO() sio = StringIO()
print >> sio, """ print >> sio, """
float %(name)s_alpha = ((dtype_%(a)s*)(%(a)s->data))[0]; float %(name)s_alpha = ((dtype_%(a)s*)(PyArray_DATA(%(a)s)))[0];
if (%(inplace)s if (%(inplace)s
&& (CudaNdarray_HOST_STRIDES(%(z_in)s)[0] >= 0) && (CudaNdarray_HOST_STRIDES(%(z_in)s)[0] >= 0)
......
...@@ -498,7 +498,7 @@ PyObject * CudaNdarray_CreateArrayObj(CudaNdarray * self, PyObject *args) ...@@ -498,7 +498,7 @@ PyObject * CudaNdarray_CreateArrayObj(CudaNdarray * self, PyObject *args)
if (!rval){ if (!rval){
return NULL; return NULL;
} }
assert (PyArray_ITEMSIZE(rval) == sizeof(real)); assert (PyArray_ITEMSIZE((PyArrayObject *)rval) == sizeof(real));
return rval; return rval;
} }
if ((self->nd < 0) || (self->devdata == 0)) if ((self->nd < 0) || (self->devdata == 0))
...@@ -527,7 +527,9 @@ PyObject * CudaNdarray_CreateArrayObj(CudaNdarray * self, PyObject *args) ...@@ -527,7 +527,9 @@ PyObject * CudaNdarray_CreateArrayObj(CudaNdarray * self, PyObject *args)
assert (npydims); assert (npydims);
for (int i = 0; i < self->nd; ++i) for (int i = 0; i < self->nd; ++i)
npydims[i] = (npy_intp)(CudaNdarray_HOST_DIMS(self)[i]); npydims[i] = (npy_intp)(CudaNdarray_HOST_DIMS(self)[i]);
PyObject * rval = PyArray_SimpleNew(self->nd, npydims, REAL_TYPENUM); PyArrayObject * rval = (PyArrayObject *) PyArray_SimpleNew(self->nd,
npydims,
REAL_TYPENUM);
free(npydims); free(npydims);
if (!rval) if (!rval)
{ {
...@@ -555,7 +557,7 @@ PyObject * CudaNdarray_CreateArrayObj(CudaNdarray * self, PyObject *args) ...@@ -555,7 +557,7 @@ PyObject * CudaNdarray_CreateArrayObj(CudaNdarray * self, PyObject *args)
} }
Py_DECREF(contiguous_self); Py_DECREF(contiguous_self);
return rval; return (PyObject *)rval;
} }
// TODO-- we have two functions here, ZEROS and Zeros. // TODO-- we have two functions here, ZEROS and Zeros.
...@@ -978,7 +980,7 @@ CudaNdarray_TakeFrom(CudaNdarray * self, PyObject *args){ ...@@ -978,7 +980,7 @@ CudaNdarray_TakeFrom(CudaNdarray * self, PyObject *args){
return NULL; return NULL;
if (verbose) printf("ndarray indices\n"); if (verbose) printf("ndarray indices\n");
if (PyArray_TYPE(indices_obj) != NPY_INT32) { if (PyArray_TYPE((PyArrayObject *)indices_obj) != NPY_INT32) {
PyErr_SetString(PyExc_TypeError, "CudaNdarray_TakeFrom: need a ndarray for indices with dtype int32"); PyErr_SetString(PyExc_TypeError, "CudaNdarray_TakeFrom: need a ndarray for indices with dtype int32");
return NULL; return NULL;
} }
...@@ -3357,7 +3359,7 @@ filter(PyObject* __unsed_self, PyObject *args) // args = (data, broadcastable, s ...@@ -3357,7 +3359,7 @@ filter(PyObject* __unsed_self, PyObject *args) // args = (data, broadcastable, s
} }
for (int i = 0; i < PyArray_NDIM(data); ++i) for (int i = 0; i < PyArray_NDIM(data); ++i)
{ {
if ((data->dimensions[i] > 1) && PyInt_AsLong(PyTuple_GetItem(broadcastable, Py_ssize_t(i)))) if ((PyArray_DIMS(data)[i] > 1) && PyInt_AsLong(PyTuple_GetItem(broadcastable, Py_ssize_t(i))))
{ {
PyErr_Format(PyExc_TypeError, "Non-unit size in broadcastable dimension %i", i); PyErr_Format(PyExc_TypeError, "Non-unit size in broadcastable dimension %i", i);
Py_DECREF(data); Py_DECREF(data);
...@@ -3603,7 +3605,8 @@ cublas_shutdown() ...@@ -3603,7 +3605,8 @@ cublas_shutdown()
int int
CudaNdarray_CopyFromArray(CudaNdarray * self, PyArrayObject*obj) CudaNdarray_CopyFromArray(CudaNdarray * self, PyArrayObject*obj)
{ {
int err = CudaNdarray_alloc_contiguous(self, PyArray_NDIM(obj), obj->dimensions); int err = CudaNdarray_alloc_contiguous(self, PyArray_NDIM(obj),
PyArray_DIMS(obj));
if (err) { if (err) {
return err; return err;
} }
...@@ -3615,7 +3618,8 @@ CudaNdarray_CopyFromArray(CudaNdarray * self, PyArrayObject*obj) ...@@ -3615,7 +3618,8 @@ CudaNdarray_CopyFromArray(CudaNdarray * self, PyArrayObject*obj)
return -1; return -1;
} }
assert( 4 == PyArray_ITEMSIZE(obj)); assert( 4 == PyArray_ITEMSIZE(obj));
PyObject * py_src = PyArray_ContiguousFromAny((PyObject*)obj, typenum, self->nd, self->nd); PyArrayObject * py_src = (PyArrayObject *)PyArray_ContiguousFromAny(
(PyObject*)obj, typenum, self->nd, self->nd);
if (!py_src) { if (!py_src) {
return -1; return -1;
} }
...@@ -4122,10 +4126,12 @@ int CudaNdarray_gemm(float alpha, const CudaNdarray * A, const CudaNdarray * B, ...@@ -4122,10 +4126,12 @@ int CudaNdarray_gemm(float alpha, const CudaNdarray * A, const CudaNdarray * B,
{ {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"cublasSgemm failed (%i) %s\n" "cublasSgemm failed (%i) %s\n"
" unit=%h N=%d, c.dims=[%d %d], a.dim=[%d %d], alpha=%f, beta=%f, a=%p, b=%p, c=%p" " unit=%x N=%d, c.dims=[%d %d], a.dim=[%d %d], alpha=%f, beta=%f, a=%p, b=%p, c=%p"
" sa_0=%d, sa_1=%d, sb_0=%d, sb_1=%d, sc_0=%d, sc_1=%d", " sa_0=%d, sa_1=%d, sb_0=%d, sb_1=%d, sc_0=%d, sc_1=%d",
err, cublasGetErrorString(err), err, cublasGetErrorString(err),
unit, N, CudaNdarray_HOST_DIMS(C)[0], CudaNdarray_HOST_DIMS(C)[1], unit, N,
CudaNdarray_HOST_DIMS(C)[0],
CudaNdarray_HOST_DIMS(C)[1],
CudaNdarray_HOST_DIMS(A)[0], CudaNdarray_HOST_DIMS(A)[1], CudaNdarray_HOST_DIMS(A)[0], CudaNdarray_HOST_DIMS(A)[1],
alpha, beta, a, b, c, sa_0, sa_1, sb_0, sb_1, sc_0, sc_1); alpha, beta, a, b, c, sa_0, sa_1, sb_0, sb_1, sc_0, sc_1);
......
...@@ -206,12 +206,12 @@ class GpuImages2Neibs(Images2Neibs, GpuOp): ...@@ -206,12 +206,12 @@ class GpuImages2Neibs(Images2Neibs, GpuOp):
int grid_d = -1; int grid_d = -1;
{ {
if (%(ten4)s->nd != 4) if (CudaNdarray_NDIM(%(ten4)s) != 4)
{ {
PyErr_Format(PyExc_TypeError, "pvals wrong rank"); PyErr_Format(PyExc_TypeError, "pvals wrong rank");
%(fail)s; %(fail)s;
} }
if (%(neib_shape)s->nd != 1) if (PyArray_NDIM(%(neib_shape)s) != 1)
{ {
PyErr_Format(PyExc_TypeError, "unis wrong rank"); PyErr_Format(PyExc_TypeError, "unis wrong rank");
%(fail)s; %(fail)s;
......
...@@ -95,17 +95,17 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuOp): ...@@ -95,17 +95,17 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuOp):
fail = sub['fail'] fail = sub['fail']
sio = StringIO() sio = StringIO()
print >> sio, """ print >> sio, """
if (%(y_idx)s->nd != 1) if (CudaNdarray_NDIM(%(y_idx)s) != 1)
{ {
PyErr_SetString(PyExc_ValueError, "y_idx not 1d tensor"); PyErr_SetString(PyExc_ValueError, "y_idx not 1d tensor");
%(fail)s; %(fail)s;
} }
if (%(x)s->nd != 2) if (CudaNdarray_NDIM(%(x)s) != 2)
{ {
PyErr_SetString(PyExc_ValueError, "x not 2d tensor"); PyErr_SetString(PyExc_ValueError, "x not 2d tensor");
%(fail)s; %(fail)s;
} }
if (%(b)s->nd != 1) if (CudaNdarray_NDIM(%(b)s) != 1)
{ {
PyErr_SetString(PyExc_ValueError, "b not 1d tensor"); PyErr_SetString(PyExc_ValueError, "b not 1d tensor");
%(fail)s; %(fail)s;
...@@ -247,9 +247,9 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuOp): ...@@ -247,9 +247,9 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuOp):
dx, = out dx, = out
fail = sub['fail'] fail = sub['fail']
return """ return """
if ((%(dnll)s->nd != 1) if ((CudaNdarray_NDIM(%(dnll)s) != 1)
|| (%(sm)s->nd != 2) || (CudaNdarray_NDIM(%(sm)s) != 2)
|| (%(y_idx)s->nd != 1)) || (CudaNdarray_NDIM(%(y_idx)s) != 1))
{ {
PyErr_SetString(PyExc_ValueError, "rank error"); PyErr_SetString(PyExc_ValueError, "rank error");
%(fail)s; %(fail)s;
...@@ -391,7 +391,7 @@ class GpuSoftmax(GpuOp): ...@@ -391,7 +391,7 @@ class GpuSoftmax(GpuOp):
z, = out z, = out
fail = sub['fail'] fail = sub['fail']
return """ return """
if (%(x)s->nd != 2) if (CudaNdarray_NDIM(%(x)s) != 2)
{ {
PyErr_SetString(PyExc_ValueError, "rank error"); PyErr_SetString(PyExc_ValueError, "rank error");
%(fail)s; %(fail)s;
...@@ -556,12 +556,12 @@ class GpuSoftmaxWithBias(GpuOp): ...@@ -556,12 +556,12 @@ class GpuSoftmaxWithBias(GpuOp):
z, = out z, = out
fail = sub['fail'] fail = sub['fail']
return """ return """
if (%(x)s->nd != 2) if (CudaNdarray_NDIM(%(x)s) != 2)
{ {
PyErr_SetString(PyExc_ValueError, "rank error input"); PyErr_SetString(PyExc_ValueError, "rank error input");
%(fail)s; %(fail)s;
} }
if (%(b)s->nd != 1) if (CudaNdarray_NDIM(%(b)s) != 1)
{ {
PyErr_SetString(PyExc_ValueError, "rank error for the bias"); PyErr_SetString(PyExc_ValueError, "rank error for the bias");
%(fail)s; %(fail)s;
......
...@@ -156,6 +156,12 @@ class NVCC_compiler(object): ...@@ -156,6 +156,12 @@ class NVCC_compiler(object):
os.path.join(os.path.split(__file__)[0], 'cuda_ndarray.cuh')) os.path.join(os.path.split(__file__)[0], 'cuda_ndarray.cuh'))
flags.append('-DCUDA_NDARRAY_CUH=' + cuda_ndarray_cuh_hash) flags.append('-DCUDA_NDARRAY_CUH=' + cuda_ndarray_cuh_hash)
# NumPy 1.7 Deprecate the old API. I updated most of the places
# to use the new API, but not everywhere. When finished, enable
# the following macro to assert that we don't bring new code
# that use the old API.
flags.append("-D NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION")
# numpy 1.7 deprecated the following macro but the didn't # numpy 1.7 deprecated the following macro but the didn't
# existed in the past # existed in the past
numpy_ver = [int(n) for n in numpy.__version__.split('.')[:2]] numpy_ver = [int(n) for n in numpy.__version__.split('.')[:2]]
......
...@@ -148,27 +148,28 @@ class CURAND_Base(GpuOp): ...@@ -148,27 +148,28 @@ class CURAND_Base(GpuOp):
int n_elements = 1; int n_elements = 1;
int must_alloc_sample = ((NULL == %(o_sample)s) int must_alloc_sample = ((NULL == %(o_sample)s)
|| !CudaNdarray_Check(py_%(o_sample)s) || !CudaNdarray_Check(py_%(o_sample)s)
|| (%(o_sample)s->nd != %(ndim)s)); || (CudaNdarray_NDIM(%(o_sample)s) != %(ndim)s));
if (%(size)s->nd != 1) if (PyArray_NDIM(%(size)s) != 1)
{ {
PyErr_SetString(PyExc_ValueError, "size must be vector"); PyErr_SetString(PyExc_ValueError, "size must be vector");
%(fail)s %(fail)s
} }
if (%(size)s->dimensions[0] != %(ndim)s) if (PyArray_DIMS(%(size)s)[0] != %(ndim)s)
{ {
PyErr_Format(PyExc_ValueError, "size must have length %%i (not %%i)", PyErr_Format(PyExc_ValueError, "size must have length %%i (not %%i)",
%(ndim)s, %(size)s->dimensions[0]); %(ndim)s, PyArray_DIMS(%(size)s)[0]);
%(fail)s %(fail)s
} }
if (PyArray_DESCR(%(size)s)->type_num != NPY_INT32) if (PyArray_TYPE(%(size)s) != NPY_INT32)
{ {
PyErr_SetString(PyExc_ValueError, "size must be int32"); PyErr_SetString(PyExc_ValueError, "size must be int32");
%(fail)s %(fail)s
} }
for (int i = 0; i < %(ndim)s; ++i) for (int i = 0; i < %(ndim)s; ++i)
{ {
odims[i] = ((npy_int32*)(%(size)s->data + %(size)s->strides[0] * i))[0]; odims[i] = ((npy_int32*)(PyArray_DATA(%(size)s) +
PyArray_STRIDES(%(size)s)[0] * i))[0];
n_elements *= odims[i]; n_elements *= odims[i];
must_alloc_sample = (must_alloc_sample must_alloc_sample = (must_alloc_sample
|| CudaNdarray_HOST_DIMS(%(o_sample)s)[i] != odims[i]); || CudaNdarray_HOST_DIMS(%(o_sample)s)[i] != odims[i]);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论