提交 08957330 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Make GpuDnnConv support inplace operation.

上级 065e0f5e
......@@ -103,11 +103,11 @@ cudnnConvolutionForward_v2(
const cudnnTensorDescriptor_t destDesc,
void *destData) {
assert(*(float *)alpha == 1.0);
assert(*(float *)beta == 0.0);
assert(*(float *)beta == 1.0);
return cudnnConvolutionForward(handle, srcDesc, srcData,
filterDesc, filterData,
convDesc, destDesc, destData,
CUDNN_RESULT_NO_ACCUMULATE);
CUDNN_RESULT_ACCUMULATE);
}
#define cudnnConvolutionForward cudnnConvolutionForward_v2
......@@ -124,11 +124,11 @@ cudnnConvolutionBackwardFilter_v2(
const cudnnFilterDescriptor_t gradDesc,
void *gradData) {
assert(*(float *)alpha == 1.0);
assert(*(float *)beta == 0.0);
assert(*(float *)beta == 1.0);
return cudnnConvolutionBackwardFilter(handle, srcDesc, srcData,
diffDesc, diffData,
convDesc, gradDesc, gradData,
CUDNN_RESULT_NO_ACCUMULATE);
CUDNN_RESULT_ACCUMULATE);
}
#define cudnnConvolutionBackwardFilter cudnnConvolutionBackwardFilter_v2
......@@ -146,7 +146,7 @@ cudnnConvolutionBackwardData_v2(
const cudnnTensorDescriptor_t gradDesc,
void *gradData) {
assert(*(float *)alpha == 1.0);
assert(*(float *)beta == 0.0);
assert(*(float *)beta == 1.0);
return cudnnConvolutionBackwardData(handle,
(cudnnFilterDescriptor_t)filterDesc,
filterData,
......@@ -155,7 +155,7 @@ cudnnConvolutionBackwardData_v2(
(cudnnConvolutionDescriptor_t)convDesc,
(cudnnTensorDescriptor_t)gradDesc,
gradData,
CUDNN_RESULT_NO_ACCUMULATE);
CUDNN_RESULT_ACCUMULATE);
}
#define cudnnConvolutionBackwardData cudnnConvolutionBackwardData_v2
......
差异被折叠。
......@@ -2,9 +2,8 @@
int
APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
cudnnConvolutionDescriptor_t desc,
float alpha, float beta,
CudaNdarray **output) {
CudaNdarray *om, cudnnConvolutionDescriptor_t desc,
float alpha, CudaNdarray **output) {
cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
if (c_set_tensor4d(input, APPLY_SPECIFIC(input)) == -1)
......@@ -12,23 +11,16 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1;
{
int out_dims[4];
err = cudnnGetConvolution2dForwardOutputDim(
desc,
APPLY_SPECIFIC(input),
APPLY_SPECIFIC(kerns),
&out_dims[0], &out_dims[1], &out_dims[2], &out_dims[3]);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"GpuDnnConv: error while computing the output shape: %s",
cudnnGetErrorString(err));
return 1;
}
if (CudaNdarray_prep_output(output, 4, out_dims) != 0) {
return 1;
}
}
#ifdef CONV_INPLACE
Py_XDECREF(*output);
*output = om;
Py_INCREF(*output);
#else
if (CudaNdarray_prep_output(output, 4, CudaNdarray_HOST_DIMS(om)) != 0)
return 1;
if (CudaNdarray_CopyFromCudaNdarray(*output, om))
return 1;
#endif
if (c_set_tensor4d(*output, APPLY_SPECIFIC(output)) == -1)
return 1;
......@@ -55,6 +47,8 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
if (workspace == NULL && worksize != 0)
return 1;
const float beta = 1;
err = cudnnConvolutionForward(
_handle,
(void *)&alpha,
......
......@@ -2,9 +2,8 @@
int
APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
cudnnConvolutionDescriptor_t desc,
int h, int w, float alpha, float beta,
CudaNdarray **input) {
CudaNdarray *im, cudnnConvolutionDescriptor_t desc,
float alpha, CudaNdarray **input) {
cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
if (c_set_tensor4d(output, APPLY_SPECIFIC(output)) == -1)
......@@ -12,33 +11,33 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1;
{
int out_dims[4];
out_dims[0] = CudaNdarray_HOST_DIMS(output)[0];
out_dims[1] = CudaNdarray_HOST_DIMS(kerns)[1];
out_dims[2] = h;
out_dims[3] = w;
if (CudaNdarray_prep_output(input, 4, out_dims) != 0) {
return 1;
}
}
#ifdef CONV_INPLACE
Py_XDECREF(*input);
*input = im;
Py_INCREF(*input);
#else
if (CudaNdarray_prep_output(input, 4, CudaNdarray_HOST_DIMS(im)) != 0)
return 1;
if (CudaNdarray_CopyFromCudaNdarray(*input, im))
return 1;
#endif
if (c_set_tensor4d(*input, APPLY_SPECIFIC(input)) == -1)
return 1;
{
err = cudnnConvolutionBackwardData(
_handle,
(void *)&alpha,
APPLY_SPECIFIC(kerns), CudaNdarray_DEV_DATA(kerns),
APPLY_SPECIFIC(output), CudaNdarray_DEV_DATA(output),
desc,
(void *)&beta,
APPLY_SPECIFIC(input), CudaNdarray_DEV_DATA(*input));
}
const float beta = 1;
err = cudnnConvolutionBackwardData(
_handle,
(void *)&alpha,
APPLY_SPECIFIC(kerns), CudaNdarray_DEV_DATA(kerns),
APPLY_SPECIFIC(output), CudaNdarray_DEV_DATA(output),
desc,
(void *)&beta,
APPLY_SPECIFIC(input), CudaNdarray_DEV_DATA(*input));
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "GpuDnnConvGradI: error doing operation: %s",
cudnnGetErrorString(err));
cudnnGetErrorString(err));
return 1;
}
return 0;
......
......@@ -2,9 +2,8 @@
int
APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
cudnnConvolutionDescriptor_t desc,
int h, int w, float alpha, float beta,
CudaNdarray **kerns) {
CudaNdarray *km, cudnnConvolutionDescriptor_t desc,
float alpha, CudaNdarray **kerns) {
cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
if (c_set_tensor4d(input, APPLY_SPECIFIC(input)) == -1)
......@@ -12,33 +11,33 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
if (c_set_tensor4d(output, APPLY_SPECIFIC(output)) == -1)
return 1;
{
int out_dims[4];
out_dims[0] = CudaNdarray_HOST_DIMS(output)[1];
out_dims[1] = CudaNdarray_HOST_DIMS(input)[1];
out_dims[2] = h;
out_dims[3] = w;
if (CudaNdarray_prep_output(kerns, 4, out_dims) != 0) {
return 1;
}
}
#ifdef CONV_INPLACE
Py_XDECREF(*kerns);
*kerns = km;
Py_INCREF(*kerns);
#else
if (CudaNdarray_prep_output(kerns, 4, CudaNdarray_HOST_DIMS(km)) != 0)
return 1;
if (CudaNdarray_CopyFromCudaNdarray(*kerns, km))
return 1;
#endif
if (c_set_filter(*kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1;
{
err = cudnnConvolutionBackwardFilter(
_handle,
(void *)&alpha,
APPLY_SPECIFIC(input), CudaNdarray_DEV_DATA(input),
APPLY_SPECIFIC(output), CudaNdarray_DEV_DATA(output),
desc,
(void *)&beta,
APPLY_SPECIFIC(kerns), CudaNdarray_DEV_DATA(*kerns));
}
const float beta = 1;
err = cudnnConvolutionBackwardFilter(
_handle,
(void *)&alpha,
APPLY_SPECIFIC(input), CudaNdarray_DEV_DATA(input),
APPLY_SPECIFIC(output), CudaNdarray_DEV_DATA(output),
desc,
(void *)&beta,
APPLY_SPECIFIC(kerns), CudaNdarray_DEV_DATA(*kerns));
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "GpuDnnConvGradW: error doing operation: %s",
cudnnGetErrorString(err));
cudnnGetErrorString(err));
return 1;
}
return 0;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论