提交 20586f0a authored 作者: notoraptor's avatar notoraptor

Apply cuda_record/wait to workspace for all convs.

上级 07ba2115
...@@ -434,6 +434,8 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -434,6 +434,8 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
} }
} }
if (worksize != 0)
cuda_wait(workspace, GPUARRAY_CUDA_WAIT_WRITE);
cuda_wait(input->ga.data, GPUARRAY_CUDA_WAIT_READ); cuda_wait(input->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_wait(kerns->ga.data, GPUARRAY_CUDA_WAIT_READ); cuda_wait(kerns->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_wait((*output)->ga.data, GPUARRAY_CUDA_WAIT_WRITE); cuda_wait((*output)->ga.data, GPUARRAY_CUDA_WAIT_WRITE);
...@@ -454,8 +456,10 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -454,8 +456,10 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
APPLY_SPECIFIC(output), ((char *)PyGpuArray_DEV_DATA(*output)) + output_offset * g); APPLY_SPECIFIC(output), ((char *)PyGpuArray_DEV_DATA(*output)) + output_offset * g);
} }
if (worksize != 0) if (worksize != 0) {
cuda_record(workspace, GPUARRAY_CUDA_WAIT_WRITE);
gpudata_release(workspace); gpudata_release(workspace);
}
cuda_record(input->ga.data, GPUARRAY_CUDA_WAIT_READ); cuda_record(input->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_record(kerns->ga.data, GPUARRAY_CUDA_WAIT_READ); cuda_record(kerns->ga.data, GPUARRAY_CUDA_WAIT_READ);
......
...@@ -342,6 +342,8 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -342,6 +342,8 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
} }
} }
if (worksize != 0)
cuda_wait(workspace, GPUARRAY_CUDA_WAIT_WRITE);
cuda_wait(kerns->ga.data, GPUARRAY_CUDA_WAIT_READ); cuda_wait(kerns->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_wait(output->ga.data, GPUARRAY_CUDA_WAIT_READ); cuda_wait(output->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_wait((*input)->ga.data, GPUARRAY_CUDA_WAIT_WRITE); cuda_wait((*input)->ga.data, GPUARRAY_CUDA_WAIT_WRITE);
...@@ -357,8 +359,10 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -357,8 +359,10 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
APPLY_SPECIFIC(input), ((char *)PyGpuArray_DEV_DATA(*input)) + input_offset * g); APPLY_SPECIFIC(input), ((char *)PyGpuArray_DEV_DATA(*input)) + input_offset * g);
} }
if (worksize != 0) if (worksize != 0) {
cuda_record(workspace, GPUARRAY_CUDA_WAIT_WRITE);
gpudata_release(workspace); gpudata_release(workspace);
}
cuda_record(kerns->ga.data, GPUARRAY_CUDA_WAIT_READ); cuda_record(kerns->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_record(output->ga.data, GPUARRAY_CUDA_WAIT_READ); cuda_record(output->ga.data, GPUARRAY_CUDA_WAIT_READ);
......
...@@ -333,6 +333,8 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -333,6 +333,8 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
} }
} }
if (worksize != 0)
cuda_wait(workspace, GPUARRAY_CUDA_WAIT_WRITE);
cuda_wait(input->ga.data, GPUARRAY_CUDA_WAIT_READ); cuda_wait(input->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_wait(output->ga.data, GPUARRAY_CUDA_WAIT_READ); cuda_wait(output->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_wait((*kerns)->ga.data, GPUARRAY_CUDA_WAIT_WRITE); cuda_wait((*kerns)->ga.data, GPUARRAY_CUDA_WAIT_WRITE);
...@@ -348,8 +350,10 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -348,8 +350,10 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
APPLY_SPECIFIC(kerns), ((char *)PyGpuArray_DEV_DATA(*kerns)) + kern_offset * g); APPLY_SPECIFIC(kerns), ((char *)PyGpuArray_DEV_DATA(*kerns)) + kern_offset * g);
} }
if (worksize != 0) if (worksize != 0) {
cuda_record(workspace, GPUARRAY_CUDA_WAIT_WRITE);
gpudata_release(workspace); gpudata_release(workspace);
}
cuda_record(input->ga.data, GPUARRAY_CUDA_WAIT_READ); cuda_record(input->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_record(output->ga.data, GPUARRAY_CUDA_WAIT_READ); cuda_record(output->ga.data, GPUARRAY_CUDA_WAIT_READ);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论