提交 20586f0a authored 作者: notoraptor's avatar notoraptor

Apply cuda_record/wait to workspace for all convs.

上级 07ba2115
......@@ -434,6 +434,8 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
}
}
if (worksize != 0)
cuda_wait(workspace, GPUARRAY_CUDA_WAIT_WRITE);
cuda_wait(input->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_wait(kerns->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_wait((*output)->ga.data, GPUARRAY_CUDA_WAIT_WRITE);
......@@ -454,8 +456,10 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
APPLY_SPECIFIC(output), ((char *)PyGpuArray_DEV_DATA(*output)) + output_offset * g);
}
if (worksize != 0)
if (worksize != 0) {
cuda_record(workspace, GPUARRAY_CUDA_WAIT_WRITE);
gpudata_release(workspace);
}
cuda_record(input->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_record(kerns->ga.data, GPUARRAY_CUDA_WAIT_READ);
......
......@@ -342,6 +342,8 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
}
}
if (worksize != 0)
cuda_wait(workspace, GPUARRAY_CUDA_WAIT_WRITE);
cuda_wait(kerns->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_wait(output->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_wait((*input)->ga.data, GPUARRAY_CUDA_WAIT_WRITE);
......@@ -357,8 +359,10 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
APPLY_SPECIFIC(input), ((char *)PyGpuArray_DEV_DATA(*input)) + input_offset * g);
}
if (worksize != 0)
if (worksize != 0) {
cuda_record(workspace, GPUARRAY_CUDA_WAIT_WRITE);
gpudata_release(workspace);
}
cuda_record(kerns->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_record(output->ga.data, GPUARRAY_CUDA_WAIT_READ);
......
......@@ -333,6 +333,8 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
}
}
if (worksize != 0)
cuda_wait(workspace, GPUARRAY_CUDA_WAIT_WRITE);
cuda_wait(input->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_wait(output->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_wait((*kerns)->ga.data, GPUARRAY_CUDA_WAIT_WRITE);
......@@ -348,8 +350,10 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
APPLY_SPECIFIC(kerns), ((char *)PyGpuArray_DEV_DATA(*kerns)) + kern_offset * g);
}
if (worksize != 0)
if (worksize != 0) {
cuda_record(workspace, GPUARRAY_CUDA_WAIT_WRITE);
gpudata_release(workspace);
}
cuda_record(input->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_record(output->ga.data, GPUARRAY_CUDA_WAIT_READ);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论