提交 08957330 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Make GpuDnnConv support inplace operation.

上级 065e0f5e
...@@ -103,11 +103,11 @@ cudnnConvolutionForward_v2( ...@@ -103,11 +103,11 @@ cudnnConvolutionForward_v2(
const cudnnTensorDescriptor_t destDesc, const cudnnTensorDescriptor_t destDesc,
void *destData) { void *destData) {
assert(*(float *)alpha == 1.0); assert(*(float *)alpha == 1.0);
assert(*(float *)beta == 0.0); assert(*(float *)beta == 1.0);
return cudnnConvolutionForward(handle, srcDesc, srcData, return cudnnConvolutionForward(handle, srcDesc, srcData,
filterDesc, filterData, filterDesc, filterData,
convDesc, destDesc, destData, convDesc, destDesc, destData,
CUDNN_RESULT_NO_ACCUMULATE); CUDNN_RESULT_ACCUMULATE);
} }
#define cudnnConvolutionForward cudnnConvolutionForward_v2 #define cudnnConvolutionForward cudnnConvolutionForward_v2
...@@ -124,11 +124,11 @@ cudnnConvolutionBackwardFilter_v2( ...@@ -124,11 +124,11 @@ cudnnConvolutionBackwardFilter_v2(
const cudnnFilterDescriptor_t gradDesc, const cudnnFilterDescriptor_t gradDesc,
void *gradData) { void *gradData) {
assert(*(float *)alpha == 1.0); assert(*(float *)alpha == 1.0);
assert(*(float *)beta == 0.0); assert(*(float *)beta == 1.0);
return cudnnConvolutionBackwardFilter(handle, srcDesc, srcData, return cudnnConvolutionBackwardFilter(handle, srcDesc, srcData,
diffDesc, diffData, diffDesc, diffData,
convDesc, gradDesc, gradData, convDesc, gradDesc, gradData,
CUDNN_RESULT_NO_ACCUMULATE); CUDNN_RESULT_ACCUMULATE);
} }
#define cudnnConvolutionBackwardFilter cudnnConvolutionBackwardFilter_v2 #define cudnnConvolutionBackwardFilter cudnnConvolutionBackwardFilter_v2
...@@ -146,7 +146,7 @@ cudnnConvolutionBackwardData_v2( ...@@ -146,7 +146,7 @@ cudnnConvolutionBackwardData_v2(
const cudnnTensorDescriptor_t gradDesc, const cudnnTensorDescriptor_t gradDesc,
void *gradData) { void *gradData) {
assert(*(float *)alpha == 1.0); assert(*(float *)alpha == 1.0);
assert(*(float *)beta == 0.0); assert(*(float *)beta == 1.0);
return cudnnConvolutionBackwardData(handle, return cudnnConvolutionBackwardData(handle,
(cudnnFilterDescriptor_t)filterDesc, (cudnnFilterDescriptor_t)filterDesc,
filterData, filterData,
...@@ -155,7 +155,7 @@ cudnnConvolutionBackwardData_v2( ...@@ -155,7 +155,7 @@ cudnnConvolutionBackwardData_v2(
(cudnnConvolutionDescriptor_t)convDesc, (cudnnConvolutionDescriptor_t)convDesc,
(cudnnTensorDescriptor_t)gradDesc, (cudnnTensorDescriptor_t)gradDesc,
gradData, gradData,
CUDNN_RESULT_NO_ACCUMULATE); CUDNN_RESULT_ACCUMULATE);
} }
#define cudnnConvolutionBackwardData cudnnConvolutionBackwardData_v2 #define cudnnConvolutionBackwardData cudnnConvolutionBackwardData_v2
......
差异被折叠。
...@@ -2,9 +2,8 @@ ...@@ -2,9 +2,8 @@
int int
APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
cudnnConvolutionDescriptor_t desc, CudaNdarray *om, cudnnConvolutionDescriptor_t desc,
float alpha, float beta, float alpha, CudaNdarray **output) {
CudaNdarray **output) {
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
if (c_set_tensor4d(input, APPLY_SPECIFIC(input)) == -1) if (c_set_tensor4d(input, APPLY_SPECIFIC(input)) == -1)
...@@ -12,23 +11,16 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, ...@@ -12,23 +11,16 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1) if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1; return 1;
{ #ifdef CONV_INPLACE
int out_dims[4]; Py_XDECREF(*output);
err = cudnnGetConvolution2dForwardOutputDim( *output = om;
desc, Py_INCREF(*output);
APPLY_SPECIFIC(input), #else
APPLY_SPECIFIC(kerns), if (CudaNdarray_prep_output(output, 4, CudaNdarray_HOST_DIMS(om)) != 0)
&out_dims[0], &out_dims[1], &out_dims[2], &out_dims[3]);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"GpuDnnConv: error while computing the output shape: %s",
cudnnGetErrorString(err));
return 1; return 1;
} if (CudaNdarray_CopyFromCudaNdarray(*output, om))
if (CudaNdarray_prep_output(output, 4, out_dims) != 0) {
return 1; return 1;
} #endif
}
if (c_set_tensor4d(*output, APPLY_SPECIFIC(output)) == -1) if (c_set_tensor4d(*output, APPLY_SPECIFIC(output)) == -1)
return 1; return 1;
...@@ -55,6 +47,8 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, ...@@ -55,6 +47,8 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
if (workspace == NULL && worksize != 0) if (workspace == NULL && worksize != 0)
return 1; return 1;
const float beta = 1;
err = cudnnConvolutionForward( err = cudnnConvolutionForward(
_handle, _handle,
(void *)&alpha, (void *)&alpha,
......
...@@ -2,9 +2,8 @@ ...@@ -2,9 +2,8 @@
int int
APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output, APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
cudnnConvolutionDescriptor_t desc, CudaNdarray *im, cudnnConvolutionDescriptor_t desc,
int h, int w, float alpha, float beta, float alpha, CudaNdarray **input) {
CudaNdarray **input) {
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
if (c_set_tensor4d(output, APPLY_SPECIFIC(output)) == -1) if (c_set_tensor4d(output, APPLY_SPECIFIC(output)) == -1)
...@@ -12,21 +11,22 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output, ...@@ -12,21 +11,22 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1) if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1; return 1;
{ #ifdef CONV_INPLACE
int out_dims[4]; Py_XDECREF(*input);
out_dims[0] = CudaNdarray_HOST_DIMS(output)[0]; *input = im;
out_dims[1] = CudaNdarray_HOST_DIMS(kerns)[1]; Py_INCREF(*input);
out_dims[2] = h; #else
out_dims[3] = w; if (CudaNdarray_prep_output(input, 4, CudaNdarray_HOST_DIMS(im)) != 0)
if (CudaNdarray_prep_output(input, 4, out_dims) != 0) {
return 1; return 1;
} if (CudaNdarray_CopyFromCudaNdarray(*input, im))
} return 1;
#endif
if (c_set_tensor4d(*input, APPLY_SPECIFIC(input)) == -1) if (c_set_tensor4d(*input, APPLY_SPECIFIC(input)) == -1)
return 1; return 1;
{ const float beta = 1;
err = cudnnConvolutionBackwardData( err = cudnnConvolutionBackwardData(
_handle, _handle,
(void *)&alpha, (void *)&alpha,
...@@ -35,7 +35,6 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output, ...@@ -35,7 +35,6 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
desc, desc,
(void *)&beta, (void *)&beta,
APPLY_SPECIFIC(input), CudaNdarray_DEV_DATA(*input)); APPLY_SPECIFIC(input), CudaNdarray_DEV_DATA(*input));
}
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "GpuDnnConvGradI: error doing operation: %s", PyErr_Format(PyExc_RuntimeError, "GpuDnnConvGradI: error doing operation: %s",
cudnnGetErrorString(err)); cudnnGetErrorString(err));
......
...@@ -2,9 +2,8 @@ ...@@ -2,9 +2,8 @@
int int
APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output, APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
cudnnConvolutionDescriptor_t desc, CudaNdarray *km, cudnnConvolutionDescriptor_t desc,
int h, int w, float alpha, float beta, float alpha, CudaNdarray **kerns) {
CudaNdarray **kerns) {
cudnnStatus_t err = CUDNN_STATUS_SUCCESS; cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
if (c_set_tensor4d(input, APPLY_SPECIFIC(input)) == -1) if (c_set_tensor4d(input, APPLY_SPECIFIC(input)) == -1)
...@@ -12,21 +11,22 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output, ...@@ -12,21 +11,22 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
if (c_set_tensor4d(output, APPLY_SPECIFIC(output)) == -1) if (c_set_tensor4d(output, APPLY_SPECIFIC(output)) == -1)
return 1; return 1;
{ #ifdef CONV_INPLACE
int out_dims[4]; Py_XDECREF(*kerns);
out_dims[0] = CudaNdarray_HOST_DIMS(output)[1]; *kerns = km;
out_dims[1] = CudaNdarray_HOST_DIMS(input)[1]; Py_INCREF(*kerns);
out_dims[2] = h; #else
out_dims[3] = w; if (CudaNdarray_prep_output(kerns, 4, CudaNdarray_HOST_DIMS(km)) != 0)
if (CudaNdarray_prep_output(kerns, 4, out_dims) != 0) {
return 1; return 1;
} if (CudaNdarray_CopyFromCudaNdarray(*kerns, km))
} return 1;
#endif
if (c_set_filter(*kerns, APPLY_SPECIFIC(kerns)) == -1) if (c_set_filter(*kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1; return 1;
{ const float beta = 1;
err = cudnnConvolutionBackwardFilter( err = cudnnConvolutionBackwardFilter(
_handle, _handle,
(void *)&alpha, (void *)&alpha,
...@@ -35,7 +35,6 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output, ...@@ -35,7 +35,6 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
desc, desc,
(void *)&beta, (void *)&beta,
APPLY_SPECIFIC(kerns), CudaNdarray_DEV_DATA(*kerns)); APPLY_SPECIFIC(kerns), CudaNdarray_DEV_DATA(*kerns));
}
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "GpuDnnConvGradW: error doing operation: %s", PyErr_Format(PyExc_RuntimeError, "GpuDnnConvGradW: error doing operation: %s",
cudnnGetErrorString(err)); cudnnGetErrorString(err));
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论