提交 c1abbdd5 authored 作者: abergeron's avatar abergeron

Merge pull request #1991 from nouiz/old_numpy

[ENH] Make the gpu back-end use the new numpy c interface
......@@ -50,33 +50,33 @@ class GpuConv3D(GpuOp):
//printf("\t\t\t\tConv3DGPU c code\\n");
//Check dimensionality of inputs
if (%(W)s->nd != 5)
if (CudaNdarray_NDIM(%(W)s) != 5)
{
PyErr_Format(PyExc_ValueError, "GpuConv3D: W must be a 5 dimensional CudaNdarray");
%(fail)s
}
if (%(V)s->nd != 5)
if (CudaNdarray_NDIM(%(V)s) != 5)
{
PyErr_Format(PyExc_ValueError, "GpuConv3D: V must be a 5 dimensional CudaNdarray");
%(fail)s
}
if (%(b)s->nd != 1)
if (CudaNdarray_NDIM(%(b)s) != 1)
{
PyErr_Format(PyExc_ValueError, "GpuConv3D: b must be a vector CudaNdarray");
%(fail)s
}
if (%(d)s->nd != 1)
if (CudaNdarray_NDIM(%(d)s) != 1)
{
PyErr_Format(PyExc_ValueError, "GpuConv3D: d must be a vector CudaNdarray");
%(fail)s
}
if (%(d)s->dimensions[0] != 3)
if (PyArray_DIMS(%(d)s)[0] != 3)
{
PyErr_Format(PyExc_ValueError, "GpuConv3D: 3 stride length arguments expected (row, col, time) but %%li were given", %(d)s->dimensions[0]);
PyErr_Format(PyExc_ValueError, "GpuConv3D: 3 stride length arguments expected (row, col, time) but %%li were given", PyArray_DIMS(%(d)s)[0]);
%(fail)s
}
......
......@@ -79,33 +79,33 @@ class GpuConvGrad3D(GpuOp):
//printf("\t\t\t\tGpuConvGrad3DW c code\\n");
//Check dimensionality of inputs
if (%(dCdH)s->nd != 5)
if (CudaNdarray_NDIM(%(dCdH)s) != 5)
{
PyErr_Format(PyExc_ValueError, "GpuConvGrad3D: dCdH must be a 5-d CudaNdArray");
%(fail)s
}
if (%(V)s->nd != 5)
if (CudaNdarray_NDIM(%(V)s) != 5)
{
PyErr_Format(PyExc_ValueError, "GpuConvGrad3D: V must be a 5-d CudaNdArray");
%(fail)s
}
if (%(WShape)s->nd != 1)
if (CudaNdarray_NDIM(%(WShape)s) != 1)
{
PyErr_Format(PyExc_ValueError, "GpuConvGrad3D: WShape must be a 1-d CudaNdArray");
%(fail)s
}
if (%(d)s->nd != 1)
if (PyArray_NDIM(%(d)s) != 1)
{
PyErr_Format(PyExc_ValueError, "GpuConvGrad3D: d must be a 1-d CudaNdArray");
%(fail)s
}
if (%(d)s->dimensions[0] != 3)
if (PyArray_DIMS(%(d)s)[0] != 3)
{
PyErr_Format(PyExc_ValueError, "GpuConvGrad3D: 3 stride lengths arguments expected(for row, col, and time) but %%li were given", %(d)s->dimensions[0]);
PyErr_Format(PyExc_ValueError, "GpuConvGrad3D: 3 stride lengths arguments expected(for row, col, and time) but %%li were given", PyArray_DIMS(%(d)s)[0]);
%(fail)s
}
......@@ -113,7 +113,7 @@ class GpuConvGrad3D(GpuOp):
//Read and check sizes of inputs
const int batchSize = CudaNdarray_HOST_DIMS(%(V)s)[0];
if (%(WShape)s->dimensions[0] != 5)
if (PyArray_DIMS(%(WShape)s)[0] != 5)
{
PyErr_Format(PyExc_ValueError, "GpuConvGrad3D: WShape must specify a 5-d shape");
%(fail)s
......@@ -125,7 +125,7 @@ class GpuConvGrad3D(GpuOp):
}
{ //for fail
dtype_%(WShape)s * WShape = (dtype_%(WShape)s *) %(WShape)s->data;
dtype_%(WShape)s * WShape = (dtype_%(WShape)s *) PyArray_DATA(%(WShape)s);
const int outputChannels = WShape[0];
const int inputChannels = CudaNdarray_HOST_DIMS(%(V)s)[4];
if (WShape[4] != inputChannels)
......
......@@ -61,34 +61,34 @@ class GpuConvTransp3D(GpuOp):
//printf("\t\t\t\tGpuConvTransp c code\\n");
//Check dimensionality of inputs
if (%(H)s->nd != 5)
if (CudaNdarray_NDIM(%(H)s) != 5)
{
PyErr_Format(PyExc_ValueError, "GpuConvTransp3D: H must be a 5-D tensor but it is %%i-D",%(H)s->nd);
PyErr_Format(PyExc_ValueError, "GpuConvTransp3D: H must be a 5-D tensor but it is %%i-D", CudaNdarray_NDIM(%(H)s));
%(fail)s
}
if (%(W)s->nd != 5)
if (CudaNdarray_NDIM(%(W)s) != 5)
{
PyErr_Format(PyExc_ValueError, "GpuConvTransp3D: W must be a 5-D tensor");
%(fail)s
}
if (%(b)s->nd != 1)
if (CudaNdarray_NDIM(%(b)s) != 1)
{
PyErr_Format(PyExc_ValueError, "GpuConvTransp3D: b must be a vector");
%(fail)s
}
if (%(d)s->nd != 1)
if (PyArray_NDIM(%(d)s) != 1)
{
PyErr_Format(PyExc_ValueError, "GpuConvTransp3D: d must be a vector");
%(fail)s
}
//Read and check stride arguments
if (%(d)s->dimensions[0] != 3)
if (PyArray_DIMS(%(d)s)[0] != 3)
{
PyErr_Format(PyExc_ValueError,"GpuConvTransp3D: 3 stride length arguments expected (for row, col, and time) but %%li were given", %(d)s->dimensions[0]);
PyErr_Format(PyExc_ValueError,"GpuConvTransp3D: 3 stride length arguments expected (for row, col, and time) but %%li were given", PyArray_DIMS(%(d)s)[0]);
%(fail)s
}
{ // for fail
......@@ -138,13 +138,13 @@ class GpuConvTransp3D(GpuOp):
if (%(RShape)s)
{
if (%(RShape)s->nd != 1)
if (PyArray_NDIM(%(RShape)s) != 1)
{
PyErr_Format(PyExc_ValueError, "RShape must be a vector");
%(fail)s
}
if (%(RShape)s->dimensions[0] != 3)
if (PyArray_DIMS(%(RShape)s)[0] != 3)
{
PyErr_Format(PyExc_ValueError, "RShape must specify a 3D shape ( [height,width,duration] )");
%(fail)s
......@@ -189,7 +189,7 @@ class GpuConvTransp3D(GpuOp):
%(fail)s;
}
}
cudaMemset(%(R)s->devdata, 0, 4 * batchSize * inputChannels * videoHeight * videoWidth * videoDur);
cudaMemset(CudaNdarray_DEV_DATA(%(R)s), 0, 4 * batchSize * inputChannels * videoHeight * videoWidth * videoDur);
{ // for fail
......
......@@ -124,8 +124,8 @@ class GpuDot22Scalar(GpuOp):
return """
#define REAL float
float %(name)s_a = (PyArray_TYPE(%(a)s) == NPY_FLOAT)
? (REAL)(((float*)%(a)s->data)[0])
: (REAL)(((double*)%(a)s->data)[0]);
? (REAL)(((float*)PyArray_DATA(%(a)s))[0])
: (REAL)(((double*)PyArray_DATA(%(a)s))[0]);
#undef REAL
if (%(x)s->nd != 2)
{
......@@ -232,12 +232,12 @@ class GpuGemm(GpuOp):
#define REAL float
float %(name)s_a = (PyArray_TYPE(%(a)s) == NPY_FLOAT)
? (REAL)(((float*)%(a)s->data)[0])
: (REAL)(((double*)%(a)s->data)[0]);
? (REAL)(((float*)PyArray_DATA(%(a)s))[0])
: (REAL)(((double*)PyArray_DATA(%(a)s))[0]);
float %(name)s_b = (PyArray_TYPE(%(b)s) == NPY_FLOAT) ?
(REAL)(((float*)%(b)s->data)[0])
: (REAL)(((double*)%(b)s->data)[0]);
(REAL)(((float*)PyArray_DATA(%(b)s))[0])
: (REAL)(((double*)PyArray_DATA(%(b)s))[0]);
#undef REAL
if (%(inplace)s
......@@ -344,8 +344,8 @@ class GpuGemv(GpuOp):
sio = StringIO()
print >> sio, """
float %(name)s_alpha = ((dtype_%(a)s*)(%(a)s->data))[0];
float %(name)s_beta = ((dtype_%(b)s*)(%(b)s->data))[0];
float %(name)s_alpha = ((dtype_%(a)s*)(PyArray_DATA(%(a)s)))[0];
float %(name)s_beta = ((dtype_%(b)s*)(PyArray_DATA(%(b)s)))[0];
if (%(inplace)s
&& ((CudaNdarray_HOST_STRIDES(%(z_in)s)[0] > 0)
......@@ -441,7 +441,7 @@ class GpuGer(GpuOp):
sio = StringIO()
print >> sio, """
float %(name)s_alpha = ((dtype_%(a)s*)(%(a)s->data))[0];
float %(name)s_alpha = ((dtype_%(a)s*)(PyArray_DATA(%(a)s)))[0];
if (%(inplace)s
&& (CudaNdarray_HOST_STRIDES(%(z_in)s)[0] >= 0)
......
......@@ -498,7 +498,7 @@ PyObject * CudaNdarray_CreateArrayObj(CudaNdarray * self, PyObject *args)
if (!rval){
return NULL;
}
assert (PyArray_ITEMSIZE(rval) == sizeof(real));
assert (PyArray_ITEMSIZE((PyArrayObject *)rval) == sizeof(real));
return rval;
}
if ((self->nd < 0) || (self->devdata == 0))
......@@ -527,7 +527,9 @@ PyObject * CudaNdarray_CreateArrayObj(CudaNdarray * self, PyObject *args)
assert (npydims);
for (int i = 0; i < self->nd; ++i)
npydims[i] = (npy_intp)(CudaNdarray_HOST_DIMS(self)[i]);
PyObject * rval = PyArray_SimpleNew(self->nd, npydims, REAL_TYPENUM);
PyArrayObject * rval = (PyArrayObject *) PyArray_SimpleNew(self->nd,
npydims,
REAL_TYPENUM);
free(npydims);
if (!rval)
{
......@@ -555,7 +557,7 @@ PyObject * CudaNdarray_CreateArrayObj(CudaNdarray * self, PyObject *args)
}
Py_DECREF(contiguous_self);
return rval;
return (PyObject *)rval;
}
// TODO-- we have two functions here, ZEROS and Zeros.
......@@ -978,7 +980,7 @@ CudaNdarray_TakeFrom(CudaNdarray * self, PyObject *args){
return NULL;
if (verbose) printf("ndarray indices\n");
if (PyArray_TYPE(indices_obj) != NPY_INT32) {
if (PyArray_TYPE((PyArrayObject *)indices_obj) != NPY_INT32) {
PyErr_SetString(PyExc_TypeError, "CudaNdarray_TakeFrom: need a ndarray for indices with dtype int32");
return NULL;
}
......@@ -3357,7 +3359,7 @@ filter(PyObject* __unsed_self, PyObject *args) // args = (data, broadcastable, s
}
for (int i = 0; i < PyArray_NDIM(data); ++i)
{
if ((data->dimensions[i] > 1) && PyInt_AsLong(PyTuple_GetItem(broadcastable, Py_ssize_t(i))))
if ((PyArray_DIMS(data)[i] > 1) && PyInt_AsLong(PyTuple_GetItem(broadcastable, Py_ssize_t(i))))
{
PyErr_Format(PyExc_TypeError, "Non-unit size in broadcastable dimension %i", i);
Py_DECREF(data);
......@@ -3603,7 +3605,8 @@ cublas_shutdown()
int
CudaNdarray_CopyFromArray(CudaNdarray * self, PyArrayObject*obj)
{
int err = CudaNdarray_alloc_contiguous(self, PyArray_NDIM(obj), obj->dimensions);
int err = CudaNdarray_alloc_contiguous(self, PyArray_NDIM(obj),
PyArray_DIMS(obj));
if (err) {
return err;
}
......@@ -3615,7 +3618,8 @@ CudaNdarray_CopyFromArray(CudaNdarray * self, PyArrayObject*obj)
return -1;
}
assert( 4 == PyArray_ITEMSIZE(obj));
PyObject * py_src = PyArray_ContiguousFromAny((PyObject*)obj, typenum, self->nd, self->nd);
PyArrayObject * py_src = (PyArrayObject *)PyArray_ContiguousFromAny(
(PyObject*)obj, typenum, self->nd, self->nd);
if (!py_src) {
return -1;
}
......@@ -4122,10 +4126,12 @@ int CudaNdarray_gemm(float alpha, const CudaNdarray * A, const CudaNdarray * B,
{
PyErr_Format(PyExc_RuntimeError,
"cublasSgemm failed (%i) %s\n"
" unit=%h N=%d, c.dims=[%d %d], a.dim=[%d %d], alpha=%f, beta=%f, a=%p, b=%p, c=%p"
" unit=%x N=%d, c.dims=[%d %d], a.dim=[%d %d], alpha=%f, beta=%f, a=%p, b=%p, c=%p"
" sa_0=%d, sa_1=%d, sb_0=%d, sb_1=%d, sc_0=%d, sc_1=%d",
err, cublasGetErrorString(err),
unit, N, CudaNdarray_HOST_DIMS(C)[0], CudaNdarray_HOST_DIMS(C)[1],
unit, N,
CudaNdarray_HOST_DIMS(C)[0],
CudaNdarray_HOST_DIMS(C)[1],
CudaNdarray_HOST_DIMS(A)[0], CudaNdarray_HOST_DIMS(A)[1],
alpha, beta, a, b, c, sa_0, sa_1, sb_0, sb_1, sc_0, sc_1);
......
......@@ -206,12 +206,12 @@ class GpuImages2Neibs(Images2Neibs, GpuOp):
int grid_d = -1;
{
if (%(ten4)s->nd != 4)
if (CudaNdarray_NDIM(%(ten4)s) != 4)
{
PyErr_Format(PyExc_TypeError, "pvals wrong rank");
%(fail)s;
}
if (%(neib_shape)s->nd != 1)
if (PyArray_NDIM(%(neib_shape)s) != 1)
{
PyErr_Format(PyExc_TypeError, "unis wrong rank");
%(fail)s;
......
......@@ -95,17 +95,17 @@ class GpuCrossentropySoftmaxArgmax1HotWithBias(GpuOp):
fail = sub['fail']
sio = StringIO()
print >> sio, """
if (%(y_idx)s->nd != 1)
if (CudaNdarray_NDIM(%(y_idx)s) != 1)
{
PyErr_SetString(PyExc_ValueError, "y_idx not 1d tensor");
%(fail)s;
}
if (%(x)s->nd != 2)
if (CudaNdarray_NDIM(%(x)s) != 2)
{
PyErr_SetString(PyExc_ValueError, "x not 2d tensor");
%(fail)s;
}
if (%(b)s->nd != 1)
if (CudaNdarray_NDIM(%(b)s) != 1)
{
PyErr_SetString(PyExc_ValueError, "b not 1d tensor");
%(fail)s;
......@@ -247,9 +247,9 @@ class GpuCrossentropySoftmax1HotWithBiasDx(GpuOp):
dx, = out
fail = sub['fail']
return """
if ((%(dnll)s->nd != 1)
|| (%(sm)s->nd != 2)
|| (%(y_idx)s->nd != 1))
if ((CudaNdarray_NDIM(%(dnll)s) != 1)
|| (CudaNdarray_NDIM(%(sm)s) != 2)
|| (CudaNdarray_NDIM(%(y_idx)s) != 1))
{
PyErr_SetString(PyExc_ValueError, "rank error");
%(fail)s;
......@@ -391,7 +391,7 @@ class GpuSoftmax(GpuOp):
z, = out
fail = sub['fail']
return """
if (%(x)s->nd != 2)
if (CudaNdarray_NDIM(%(x)s) != 2)
{
PyErr_SetString(PyExc_ValueError, "rank error");
%(fail)s;
......@@ -556,12 +556,12 @@ class GpuSoftmaxWithBias(GpuOp):
z, = out
fail = sub['fail']
return """
if (%(x)s->nd != 2)
if (CudaNdarray_NDIM(%(x)s) != 2)
{
PyErr_SetString(PyExc_ValueError, "rank error input");
%(fail)s;
}
if (%(b)s->nd != 1)
if (CudaNdarray_NDIM(%(b)s) != 1)
{
PyErr_SetString(PyExc_ValueError, "rank error for the bias");
%(fail)s;
......
......@@ -156,6 +156,12 @@ class NVCC_compiler(object):
os.path.join(os.path.split(__file__)[0], 'cuda_ndarray.cuh'))
flags.append('-DCUDA_NDARRAY_CUH=' + cuda_ndarray_cuh_hash)
# NumPy 1.7 Deprecate the old API. I updated most of the places
# to use the new API, but not everywhere. When finished, enable
# the following macro to assert that we don't bring new code
# that use the old API.
flags.append("-D NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION")
# numpy 1.7 deprecated the following macro but the didn't
# existed in the past
numpy_ver = [int(n) for n in numpy.__version__.split('.')[:2]]
......
......@@ -148,27 +148,28 @@ class CURAND_Base(GpuOp):
int n_elements = 1;
int must_alloc_sample = ((NULL == %(o_sample)s)
|| !CudaNdarray_Check(py_%(o_sample)s)
|| (%(o_sample)s->nd != %(ndim)s));
|| (CudaNdarray_NDIM(%(o_sample)s) != %(ndim)s));
if (%(size)s->nd != 1)
if (PyArray_NDIM(%(size)s) != 1)
{
PyErr_SetString(PyExc_ValueError, "size must be vector");
%(fail)s
}
if (%(size)s->dimensions[0] != %(ndim)s)
if (PyArray_DIMS(%(size)s)[0] != %(ndim)s)
{
PyErr_Format(PyExc_ValueError, "size must have length %%i (not %%i)",
%(ndim)s, %(size)s->dimensions[0]);
%(ndim)s, PyArray_DIMS(%(size)s)[0]);
%(fail)s
}
if (PyArray_DESCR(%(size)s)->type_num != NPY_INT32)
if (PyArray_TYPE(%(size)s) != NPY_INT32)
{
PyErr_SetString(PyExc_ValueError, "size must be int32");
%(fail)s
}
for (int i = 0; i < %(ndim)s; ++i)
{
odims[i] = ((npy_int32*)(%(size)s->data + %(size)s->strides[0] * i))[0];
odims[i] = ((npy_int32*)(PyArray_DATA(%(size)s) +
PyArray_STRIDES(%(size)s)[0] * i))[0];
n_elements *= odims[i];
must_alloc_sample = (must_alloc_sample
|| CudaNdarray_HOST_DIMS(%(o_sample)s)[i] != odims[i]);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论