提交 3a422ba3 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Make _handle apply specific.

上级 7bbbe727
...@@ -102,17 +102,19 @@ setup_ext_cuda(); ...@@ -102,17 +102,19 @@ setup_ext_cuda();
#section support_code_struct #section support_code_struct
cudnnHandle_t _handle; cudnnHandle_t APPLY_SPECIFIC(_handle);
#section init_code_struct #section init_code_struct
cuda_enter(pygpu_default_context()->ctx); {
cudnnStatus_t err; cuda_enter(pygpu_default_context()->ctx);
_handle = NULL; cudnnStatus_t err;
if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) { APPLY_SPECIFIC(_handle) = NULL;
if ((err = cudnnCreate(&APPLY_SPECIFIC(_handle))) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not create cuDNN handle: %s", PyErr_Format(PyExc_RuntimeError, "could not create cuDNN handle: %s",
cudnnGetErrorString(err)); cudnnGetErrorString(err));
cuda_exit(pygpu_default_context()->ctx); cuda_exit(pygpu_default_context()->ctx);
FAIL; FAIL;
}
cuda_exit(pygpu_default_context()->ctx);
} }
cuda_exit(pygpu_default_context()->ctx);
...@@ -80,7 +80,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -80,7 +80,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
int count; int count;
cudnnConvolutionFwdAlgoPerf_t choice; cudnnConvolutionFwdAlgoPerf_t choice;
err = cudnnFindConvolutionForwardAlgorithm( err = cudnnFindConvolutionForwardAlgorithm(
_handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns), APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns),
desc, APPLY_SPECIFIC(output), 1, &count, &choice); desc, APPLY_SPECIFIC(output), 1, &count, &choice);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
...@@ -103,7 +103,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -103,7 +103,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
} }
err = cudnnGetConvolutionForwardAlgorithm( err = cudnnGetConvolutionForwardAlgorithm(
_handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns), APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns),
desc, APPLY_SPECIFIC(output), desc, APPLY_SPECIFIC(output),
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, free, &algo); CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, free, &algo);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
...@@ -181,7 +181,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -181,7 +181,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
{ {
size_t worksize; size_t worksize;
gpudata *workspace; gpudata *workspace;
err = cudnnGetConvolutionForwardWorkspaceSize(_handle, err = cudnnGetConvolutionForwardWorkspaceSize(APPLY_SPECIFIC(_handle),
APPLY_SPECIFIC(input), APPLY_SPECIFIC(input),
APPLY_SPECIFIC(kerns), APPLY_SPECIFIC(kerns),
desc, desc,
...@@ -212,7 +212,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -212,7 +212,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
} }
err = cudnnConvolutionForward( err = cudnnConvolutionForward(
_handle, APPLY_SPECIFIC(_handle),
alpha_p, alpha_p,
APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(input), APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(input),
APPLY_SPECIFIC(kerns), PyGpuArray_DEV_DATA(kerns), APPLY_SPECIFIC(kerns), PyGpuArray_DEV_DATA(kerns),
......
...@@ -79,7 +79,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -79,7 +79,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
cudnnConvolutionBwdDataAlgoPerf_t choice; cudnnConvolutionBwdDataAlgoPerf_t choice;
err = cudnnFindConvolutionBackwardDataAlgorithm( err = cudnnFindConvolutionBackwardDataAlgorithm(
_handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(output), desc, APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(input), APPLY_SPECIFIC(output), desc,
APPLY_SPECIFIC(kerns), 1, &count, &choice); APPLY_SPECIFIC(kerns), 1, &count, &choice);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
...@@ -102,7 +102,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -102,7 +102,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
} }
err = cudnnGetConvolutionBackwardDataAlgorithm( err = cudnnGetConvolutionBackwardDataAlgorithm(
_handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(output), APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(input), APPLY_SPECIFIC(output),
desc, APPLY_SPECIFIC(kerns), desc, APPLY_SPECIFIC(kerns),
CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT, free, &algo); CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT, free, &algo);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
...@@ -157,7 +157,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -157,7 +157,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
gpudata *workspace; gpudata *workspace;
err = cudnnGetConvolutionBackwardDataWorkspaceSize( err = cudnnGetConvolutionBackwardDataWorkspaceSize(
_handle, APPLY_SPECIFIC(kerns), APPLY_SPECIFIC(output), desc, APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(kerns), APPLY_SPECIFIC(output), desc,
APPLY_SPECIFIC(input), algo, &worksize); APPLY_SPECIFIC(input), algo, &worksize);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
...@@ -178,7 +178,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -178,7 +178,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
} }
err = cudnnConvolutionBackwardData_v3( err = cudnnConvolutionBackwardData_v3(
_handle, APPLY_SPECIFIC(_handle),
alpha_p, alpha_p,
APPLY_SPECIFIC(kerns), PyGpuArray_DEV_DATA(kerns), APPLY_SPECIFIC(kerns), PyGpuArray_DEV_DATA(kerns),
APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(output), APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(output),
......
...@@ -79,7 +79,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -79,7 +79,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
cudnnConvolutionBwdFilterAlgoPerf_t choice; cudnnConvolutionBwdFilterAlgoPerf_t choice;
err = cudnnFindConvolutionBackwardFilterAlgorithm( err = cudnnFindConvolutionBackwardFilterAlgorithm(
_handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(output), desc, APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(input), APPLY_SPECIFIC(output), desc,
APPLY_SPECIFIC(kerns), 1, &count, &choice); APPLY_SPECIFIC(kerns), 1, &count, &choice);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
...@@ -103,7 +103,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -103,7 +103,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
} }
err = cudnnGetConvolutionBackwardFilterAlgorithm( err = cudnnGetConvolutionBackwardFilterAlgorithm(
_handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(output), APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(input), APPLY_SPECIFIC(output),
desc, APPLY_SPECIFIC(kerns), desc, APPLY_SPECIFIC(kerns),
CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT, free, &algo); CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT, free, &algo);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
...@@ -159,7 +159,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -159,7 +159,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
gpudata *workspace; gpudata *workspace;
err = cudnnGetConvolutionBackwardFilterWorkspaceSize( err = cudnnGetConvolutionBackwardFilterWorkspaceSize(
_handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(output), desc, APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(input), APPLY_SPECIFIC(output), desc,
APPLY_SPECIFIC(kerns), algo, &worksize); APPLY_SPECIFIC(kerns), algo, &worksize);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
...@@ -179,7 +179,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -179,7 +179,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
} }
err = cudnnConvolutionBackwardFilter_v3( err = cudnnConvolutionBackwardFilter_v3(
_handle, APPLY_SPECIFIC(_handle),
alpha_p, alpha_p,
APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(input), APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(input),
APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(output), APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(output),
......
...@@ -76,7 +76,7 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img, ...@@ -76,7 +76,7 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
cuda_enter(c->ctx); cuda_enter(c->ctx);
err = cudnnPoolingForward( err = cudnnPoolingForward(
_handle, desc, APPLY_SPECIFIC(_handle), desc,
&alpha, &alpha,
APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(img), APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(img),
&beta, &beta,
......
...@@ -94,7 +94,7 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp, ...@@ -94,7 +94,7 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
cuda_enter(c->ctx); cuda_enter(c->ctx);
err = cudnnPoolingBackward( err = cudnnPoolingBackward(
_handle, desc, APPLY_SPECIFIC(_handle), desc,
&alpha, &alpha,
APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(out), APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(out),
APPLY_SPECIFIC(output_grad), PyGpuArray_DEV_DATA(out_grad), APPLY_SPECIFIC(output_grad), PyGpuArray_DEV_DATA(out_grad),
......
...@@ -55,7 +55,7 @@ int APPLY_SPECIFIC(softmax)(PyGpuArrayObject *x, ...@@ -55,7 +55,7 @@ int APPLY_SPECIFIC(softmax)(PyGpuArrayObject *x,
cuda_enter(c->ctx); cuda_enter(c->ctx);
err = cudnnSoftmaxForward( err = cudnnSoftmaxForward(
_handle, APPLY_SPECIFIC(_handle),
SOFTMAX_ALGO, SOFTMAX_ALGO,
SOFTMAX_MODE, SOFTMAX_MODE,
(void *)&alpha, (void *)&alpha,
......
...@@ -68,7 +68,7 @@ int APPLY_SPECIFIC(softmax_grad)(PyGpuArrayObject *dy, ...@@ -68,7 +68,7 @@ int APPLY_SPECIFIC(softmax_grad)(PyGpuArrayObject *dy,
cuda_enter(c->ctx); cuda_enter(c->ctx);
err = cudnnSoftmaxBackward( err = cudnnSoftmaxBackward(
_handle, APPLY_SPECIFIC(_handle),
SOFTMAX_ALGO, SOFTMAX_ALGO,
SOFTMAX_MODE, SOFTMAX_MODE,
(void *)&alpha, (void *)&alpha,
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论