提交 4d8e60e7 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Move over to the pygpu API.

上级 0f7d5930
...@@ -783,6 +783,10 @@ if (theano_prep_output(&%(zz)s, %(ndim)s, shape, %(type)s, GA_C_ORDER, ...@@ -783,6 +783,10 @@ if (theano_prep_output(&%(zz)s, %(ndim)s, shape, %(type)s, GA_C_ORDER,
raise NotImplementedError("grad disabled") raise NotImplementedError("grad disabled")
def empty_like(var):
return GpuAllocEmpty(var.type.dtype)(*var.shape)
class GpuContiguous(Op): class GpuContiguous(Op):
""" """
Always return a c contiguous output. Copy the input only if it is Always return a c contiguous output. Copy the input only if it is
......
#section support_code_struct #section support_code_struct
int int
APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
CudaNdarray *om, cudnnConvolutionDescriptor_t desc, PyGpuArrayObject *om,
float alpha, float beta, CudaNdarray **output) { cudnnConvolutionDescriptor_t desc,
double alpha, double beta,
PyGpuArrayObject **output) {
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
if (CudaNdarray_HOST_DIMS(input)[1] != CudaNdarray_HOST_DIMS(kerns)[1]) { float af = alpha, bf = beta;
void *alpha_p;
void *beta_p;
if (PyGpuArray_DIMS(input)[1] != PyGpuArray_DIMS(kerns)[1]) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"GpuDnnConv images and kernel must have the same stack size\n"); "GpuDnnConv images and kernel must have the same stack size");
return 1; return 1;
} }
...@@ -16,14 +22,29 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, ...@@ -16,14 +22,29 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1) if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1; return 1;
switch (input->ga.typecode) {
case GA_DOUBLE:
alpha_p = (void *)α
beta_p = (void *)β
break;
case GA_FLOAT:
alpha_p = (void *)⁡
beta_p = (void *)&bf;
break;
default:
PyErr_SetString(PyExc_TypeError, "Unsupported type in convolution");
return 1;
}
#ifdef CONV_INPLACE #ifdef CONV_INPLACE
Py_XDECREF(*output); Py_XDECREF(*output);
*output = om; *output = om;
Py_INCREF(*output); Py_INCREF(*output);
#else #else
if (CudaNdarray_prep_output(output, 4, CudaNdarray_HOST_DIMS(om)) != 0) if (theano_prep_output(output, PyGpuArray_NDIM(om), PyGpuArray_DIMS(om),
om->ga.typecode, GA_C_ORDER) != 0)
return 1; return 1;
if (beta != 0.0 && CudaNdarray_CopyFromCudaNdarray(*output, om)) if (beta != 0.0 && pygpu_move(*output, om))
return 1; return 1;
#endif #endif
...@@ -32,7 +53,7 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, ...@@ -32,7 +53,7 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
{ {
size_t worksize; size_t worksize;
void *workspace; gpudata *workspace;
err = cudnnGetConvolutionForwardWorkspaceSize(_handle, err = cudnnGetConvolutionForwardWorkspaceSize(_handle,
APPLY_SPECIFIC(input), APPLY_SPECIFIC(input),
...@@ -48,21 +69,34 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, ...@@ -48,21 +69,34 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
return 1; return 1;
} }
workspace = get_work_mem(worksize); /*
if (workspace == NULL && worksize != 0) * This is less than ideal since we need to free it after (which
return 1; * introduces a synchronization point. But we don't have a module
* to place a nice get_work_mem() function in.
*/
if (worksize != 0) {
workspace = pygpu_default_context->ops->buffer_alloc(
pygpu_default_context->ctx, worksize, NULL, 0, NULL);
if (workspace == NULL) {
PyErr_SetString(PyExc_RuntimeError,
"Could not allocate working memory");
return 1;
}
}
err = cudnnConvolutionForward( err = cudnnConvolutionForward(
_handle, _handle,
(void *)&alpha, alpha_p,
APPLY_SPECIFIC(input), CudaNdarray_DEV_DATA(input), APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(input),
APPLY_SPECIFIC(kerns), CudaNdarray_DEV_DATA(kerns), APPLY_SPECIFIC(kerns), PyGpuArray_DEV_DATA(kerns),
desc, desc, CONV_ALGO,
CONV_ALGO, worksize == 0 ? NULL : *(void **)workspace, worksize,
workspace, worksize, beta_p,
(void *)&beta, APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(*output));
APPLY_SPECIFIC(output), CudaNdarray_DEV_DATA(*output));
} }
pygpu_default_context->ops->buffer_release(workspace);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "GpuDnnConv: error doing operation: %s", PyErr_Format(PyExc_RuntimeError, "GpuDnnConv: error doing operation: %s",
cudnnGetErrorString(err)); cudnnGetErrorString(err));
......
#section support_code_struct #section support_code_struct
int int
APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output, APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
CudaNdarray *im, cudnnConvolutionDescriptor_t desc, PyGpuArrayObject *im,
float alpha, float beta, CudaNdarray **input) { cudnnConvolutionDescriptor_t desc,
double alpha, double beta, PyGpuArrayObject **input) {
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
float af = alpha, bf = beta;
void *alpha_p;
void *beta_p;
if (CudaNdarray_HOST_DIMS(im)[1] != CudaNdarray_HOST_DIMS(kerns)[1]) { if (PyGpuArray_DIMS(im)[1] != PyGpuArray_DIMS(kerns)[1]) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"GpuDnnConv images and kernel must have the same stack size\n"); "GpuDnnConv images and kernel must have the same stack size");
return 1; return 1;
} }
...@@ -17,14 +21,29 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output, ...@@ -17,14 +21,29 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1) if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1; return 1;
switch (input->ga.typecode) {
case GA_DOUBLE:
alpha_p = (void *)α
beta_p = (void *)β
break;
case GA_FLOAT:
alpha_p = (void *)⁡
beta_p = (void *)&bf;
break;
default:
PyErr_SetString(PyExc_TypeError, "Unsupported type in convolution");
return 1;
}
#ifdef CONV_INPLACE #ifdef CONV_INPLACE
Py_XDECREF(*input); Py_XDECREF(*input);
*input = im; *input = im;
Py_INCREF(*input); Py_INCREF(*input);
#else #else
if (CudaNdarray_prep_output(input, 4, CudaNdarray_HOST_DIMS(im)) != 0) if (theano_prep_output(input, PyGpuArray_NDIM(im), PyGpuArray_DIMS(im),
im->ga.typecode, GA_C_ORDER) != 0)
return 1; return 1;
if (beta != 0.0 && CudaNdarray_CopyFromCudaNdarray(*input, im)) if (beta != 0.0 && pygpu_move(*input, im))
return 1; return 1;
#endif #endif
...@@ -33,12 +52,12 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output, ...@@ -33,12 +52,12 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
err = cudnnConvolutionBackwardData( err = cudnnConvolutionBackwardData(
_handle, _handle,
(void *)&alpha, alpha_p,
APPLY_SPECIFIC(kerns), CudaNdarray_DEV_DATA(kerns), APPLY_SPECIFIC(kerns), PyGpuArray_DEV_DATA(kerns),
APPLY_SPECIFIC(output), CudaNdarray_DEV_DATA(output), APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(output),
desc, desc,
(void *)&beta, beta_p,
APPLY_SPECIFIC(input), CudaNdarray_DEV_DATA(*input)); APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(*input));
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "GpuDnnConvGradI: error doing operation: %s", PyErr_Format(PyExc_RuntimeError, "GpuDnnConvGradI: error doing operation: %s",
cudnnGetErrorString(err)); cudnnGetErrorString(err));
......
#section support_code_struct #section support_code_struct
int int
APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output, APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
CudaNdarray *km, cudnnConvolutionDescriptor_t desc, PyGpuArrayObject *km,
float alpha, float beta, CudaNdarray **kerns) { cudnnConvolutionDescriptor_t desc,
double alpha, double beta, PyGpuArrayObject **kerns) {
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
float af = alpha, bf = beta;
void *alpha_p;
void *beta_p;
if (CudaNdarray_HOST_DIMS(input)[1] != CudaNdarray_HOST_DIMS(km)[1]) { if (PyGpuArray_DIMS(input)[1] != PyGpuArray_DIMS(km)[1]) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"GpuDnnConv images and kernel must have the same stack size\n"); "GpuDnnConv images and kernel must have the same stack size");
return 1; return 1;
} }
...@@ -17,14 +21,29 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output, ...@@ -17,14 +21,29 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
if (c_set_tensor4d(output, APPLY_SPECIFIC(output)) == -1) if (c_set_tensor4d(output, APPLY_SPECIFIC(output)) == -1)
return 1; return 1;
switch (input->ga.typecode) {
case GA_DOUBLE:
alpha_p = (void *)α
beta_p = (void *)β
break;
case GA_FLOAT:
alpha_p = (void *)⁡
beta_p = (void *)&bf;
break;
default:
PyErr_SetString(PyExc_TypeError, "Unsupported type in convolution");
return 1;
}
#ifdef CONV_INPLACE #ifdef CONV_INPLACE
Py_XDECREF(*kerns); Py_XDECREF(*kerns);
*kerns = km; *kerns = km;
Py_INCREF(*kerns); Py_INCREF(*kerns);
#else #else
if (CudaNdarray_prep_output(kerns, 4, CudaNdarray_HOST_DIMS(km)) != 0) if (theano_prep_output(kerns, PyGpuArray_NDIM(km), PyGpuArray_DIMS(km),
km->ga.typecode, GA_C_ORDER) != 0)
return 1; return 1;
if (beta != 0.0 && CudaNdarray_CopyFromCudaNdarray(*kerns, km)) if (beta != 0.0 && pygpu_move(*kerns, km))
return 1; return 1;
#endif #endif
...@@ -33,12 +52,12 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output, ...@@ -33,12 +52,12 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
err = cudnnConvolutionBackwardFilter( err = cudnnConvolutionBackwardFilter(
_handle, _handle,
(void *)&alpha, alpha_p,
APPLY_SPECIFIC(input), CudaNdarray_DEV_DATA(input), APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(input),
APPLY_SPECIFIC(output), CudaNdarray_DEV_DATA(output), APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(output),
desc, desc,
(void *)&beta, beta_p,
APPLY_SPECIFIC(kerns), CudaNdarray_DEV_DATA(*kerns)); APPLY_SPECIFIC(kerns), PyGpuArray_DEV_DATA(*kerns));
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "GpuDnnConvGradW: error doing operation: %s", PyErr_Format(PyExc_RuntimeError, "GpuDnnConvGradW: error doing operation: %s",
cudnnGetErrorString(err)); cudnnGetErrorString(err));
......
...@@ -42,4 +42,9 @@ static PyGpuArrayObject *theano_try_copy(PyGpuArrayObject *out, ...@@ -42,4 +42,9 @@ static PyGpuArrayObject *theano_try_copy(PyGpuArrayObject *out,
return out; return out;
} }
/* This is guaranteed to work and return the raw CUDA/OpenCL object on
* all recent (as of June 2015) version of libgpuarray. This is also
* promised to keep working in future versions. */
#define PyGpuArray_DEV_DATA(ary) (*(void **)((ary)->ga.data))
#endif #endif
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论