提交 1b72aa54 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Add proper wait/record pair for cudnn wrapper.

上级 f4268206
......@@ -211,6 +211,10 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
}
}
cuda_wait(&input->ga, GPUARRAY_CUDA_WAIT_READ);
cuda_wait(&kerns->ga, GPUARRAY_CUDA_WAIT_READ);
cuda_wait(&(*output)->ga, GPUARRAY_CUDA_WAIT_WRITE);
err = cudnnConvolutionForward(
APPLY_SPECIFIC(_handle),
alpha_p,
......@@ -223,6 +227,10 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
if (worksize != 0)
c->ops->buffer_release(workspace);
cuda_record(&input->ga, GPUARRAY_CUDA_WAIT_READ);
cuda_record(&kerns->ga, GPUARRAY_CUDA_WAIT_READ);
cuda_record(&(*output)->ga, GPUARRAY_CUDA_WAIT_WRITE);
}
cuda_exit(c->ctx);
......
......@@ -177,6 +177,10 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
}
}
cuda_wait(&kerns->ga, GPUARRAY_CUDA_WAIT_READ);
cuda_wait(&output->ga, GPUARRAY_CUDA_WAIT_READ);
cuda_wait(&(*input)->ga, GPUARRAY_CUDA_WAIT_WRITE);
err = cudnnConvolutionBackwardData_v3(
APPLY_SPECIFIC(_handle),
alpha_p,
......@@ -189,6 +193,10 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
if (worksize != 0)
c->ops->buffer_release(workspace);
cuda_record(&kerns->ga, GPUARRAY_CUDA_WAIT_READ);
cuda_record(&output->ga, GPUARRAY_CUDA_WAIT_READ);
cuda_record(&(*input)->ga, GPUARRAY_CUDA_WAIT_WRITE);
cuda_exit(c->ctx);
if (err != CUDNN_STATUS_SUCCESS) {
......
......@@ -178,6 +178,10 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
}
}
cuda_wait(&input->ga, GPUARRAY_CUDA_WAIT_READ);
cuda_wait(&output->ga, GPUARRAY_CUDA_WAIT_READ);
cuda_wait(&(*kerns)->ga, GPUARRAY_CUDA_WAIT_WRITE);
err = cudnnConvolutionBackwardFilter_v3(
APPLY_SPECIFIC(_handle),
alpha_p,
......@@ -190,6 +194,10 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
if (worksize != 0)
c->ops->buffer_release(workspace);
cuda_record(&input->ga, GPUARRAY_CUDA_WAIT_READ);
cuda_record(&output->ga, GPUARRAY_CUDA_WAIT_READ);
cuda_record(&(*kerns)->ga, GPUARRAY_CUDA_WAIT_WRITE);
cuda_exit(c->ctx);
if (err != CUDNN_STATUS_SUCCESS) {
......
......@@ -93,12 +93,20 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
}
cuda_enter(c->ctx);
cuda_wait(&img->ga, GPUARRAY_CUDA_WAIT_READ);
cuda_wait(&(*out)->ga, GPUARRAY_CUDA_WAIT_WRITE);
err = cudnnPoolingForward(
APPLY_SPECIFIC(_handle), desc,
alpha,
APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(img),
beta,
APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(*out));
cuda_record(&img->ga, GPUARRAY_CUDA_WAIT_READ);
cuda_record(&(*out)->ga, GPUARRAY_CUDA_WAIT_WRITE);
cuda_exit(c->ctx);
}
if (err != CUDNN_STATUS_SUCCESS) {
......
......@@ -111,6 +111,12 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
}
cuda_enter(c->ctx);
cuda_wait(&out->ga, GPUARRAY_CUDA_WAIT_READ);
cuda_wait(&out_grad->ga, GPUARRAY_CUDA_WAIT_READ);
cuda_wait(&inp->ga, GPUARRAY_CUDA_WAIT_READ);
cuda_wait(&(*inp_grad)->ga, GPUARRAY_CUDA_WAIT_WRITE);
err = cudnnPoolingBackward(
APPLY_SPECIFIC(_handle), desc,
alpha,
......@@ -120,6 +126,12 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
beta,
APPLY_SPECIFIC(input_grad), PyGpuArray_DEV_DATA(*inp_grad)
);
cuda_record(&out->ga, GPUARRAY_CUDA_WAIT_READ);
cuda_record(&out_grad->ga, GPUARRAY_CUDA_WAIT_READ);
cuda_record(&inp->ga, GPUARRAY_CUDA_WAIT_READ);
cuda_record(&(*inp_grad)->ga, GPUARRAY_CUDA_WAIT_WRITE);
cuda_exit(c->ctx);
}
......
......@@ -72,6 +72,10 @@ int APPLY_SPECIFIC(softmax)(PyGpuArrayObject *x,
}
cuda_enter(c->ctx);
cuda_wait(&x->ga, GPUARRAY_CUDA_WAIT_READ);
cuda_wait(&(*out)->ga, GPUARRAY_CUDA_WAIT_WRITE);
err = cudnnSoftmaxForward(
APPLY_SPECIFIC(_handle),
SOFTMAX_ALGO,
......@@ -83,6 +87,10 @@ int APPLY_SPECIFIC(softmax)(PyGpuArrayObject *x,
APPLY_SPECIFIC(output),
PyGpuArray_DEV_DATA(*out)
);
cuda_record(&x->ga, GPUARRAY_CUDA_WAIT_READ);
cuda_record(&(*out)->ga, GPUARRAY_CUDA_WAIT_WRITE);
cuda_exit(c->ctx);
}
......
......@@ -85,6 +85,11 @@ int APPLY_SPECIFIC(softmax_grad)(PyGpuArrayObject *dy,
}
cuda_enter(c->ctx);
cuda_wait(&sm->ga, GPUARRAY_CUDA_WAIT_READ);
cuda_wait(&dy->ga, GPUARRAY_CUDA_WAIT_READ);
cuda_wait(&(*dx)->ga, GPUARRAY_CUDA_WAIT_WRITE);
err = cudnnSoftmaxBackward(
APPLY_SPECIFIC(_handle),
SOFTMAX_ALGO,
......@@ -98,6 +103,11 @@ int APPLY_SPECIFIC(softmax_grad)(PyGpuArrayObject *dy,
APPLY_SPECIFIC(dx),
PyGpuArray_DEV_DATA(*dx)
);
cuda_record(&sm->ga, GPUARRAY_CUDA_WAIT_READ);
cuda_record(&dy->ga, GPUARRAY_CUDA_WAIT_READ);
cuda_record(&(*dx)->ga, GPUARRAY_CUDA_WAIT_WRITE);
cuda_exit(c->ctx);
}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论