提交 0e44e828 authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #5165 from gvtulder/f-fix-gpuarray-corr-threads

Correct blocks/threads for gpuarray CorrMM and Corr3DMM
......@@ -490,7 +490,7 @@ class BaseGpuCorrMM(CGpuKernelBase, BlasOp):
def c_code_cache_version(self):
# raise this whenever modifying any of the support_code_files
return (0, 1)
return (0, 2)
def c_code_helper(self, bottom, weights, top, direction, sub, height=None, width=None):
"""
......@@ -1029,7 +1029,7 @@ class BaseGpuCorr3dMM(CGpuKernelBase, BlasOp):
def c_code_cache_version(self):
# raise this whenever modifying any of the support_code_files
return (0, 1)
return (0, 2)
def c_code_helper(self, bottom, weights, top, direction, sub,
height=None, width=None, depth=None):
......
......@@ -263,7 +263,7 @@ int im3d2col(const size_t max_threads_dim,
GpuKernel *kernel;
if(dilation_h != 1 || dilation_w != 1 || dilation_d != 1){
err = dilated_im3d2col_kernel_call(
1, &threads_per_block, &n_blocks, 0,
1, &n_blocks, &threads_per_block, 0,
num_kernels, data_im, data_im_offset, height, width, depth,
kernel_h, kernel_w, kernel_d, dilation_h, dilation_w, dilation_d,
pad_h, pad_w, pad_d, stride_h, stride_w, stride_d, height_col,
......@@ -276,7 +276,7 @@ int im3d2col(const size_t max_threads_dim,
}
else{
err = im3d2col_kernel_call(
1, &threads_per_block, &n_blocks, 0,
1, &n_blocks, &threads_per_block, 0,
num_kernels, data_im, data_im_offset, height, width, depth,
kernel_h, kernel_w, kernel_d, pad_h, pad_w, pad_d,
stride_h, stride_w, stride_d, height_col, width_col, depth_col,
......@@ -311,7 +311,7 @@ int col2im3d(const size_t max_threads_dim, gpudata * data_col, const size_t chan
int err;
if(dilation_h != 1 || dilation_w != 1 || dilation_d != 1){
err = dilated_col2im3d_kernel_call(
1, &threads_per_block, &n_blocks, 0,
1, &n_blocks, &threads_per_block, 0,
num_kernels, data_col, height, width, depth, channels, patch_h, patch_w,
patch_d, dilation_h, dilation_w, dilation_d, pad_h, pad_w, pad_d,
stride_h, stride_w, stride_d, height_col, width_col, depth_col,
......@@ -324,7 +324,7 @@ int col2im3d(const size_t max_threads_dim, gpudata * data_col, const size_t chan
}
else{
err = col2im3d_kernel_call(
1, &threads_per_block, &n_blocks, 0,
1, &n_blocks, &threads_per_block, 0,
num_kernels, data_col, height, width, depth, channels, patch_h, patch_w,
patch_d, pad_h, pad_w, pad_d, stride_h, stride_w, stride_d,
height_col, width_col, depth_col, data_im, data_im_offset);
......
......@@ -215,7 +215,7 @@ int im2col(const size_t max_threads_dim,
GpuKernel *kernel;
if(dilation_h != 1 || dilation_w != 1){
err = dilated_im2col_kernel_call(
1, &threads_per_block, &n_blocks, 0,
1, &n_blocks, &threads_per_block, 0,
num_kernels, data_im, data_im_offset, height, width, kernel_h, kernel_w,
dilation_h, dilation_w, pad_h, pad_w, stride_h, stride_w, height_col,
width_col, data_col);
......@@ -227,7 +227,7 @@ int im2col(const size_t max_threads_dim,
}
else{
err = im2col_kernel_call(
1, &threads_per_block, &n_blocks, 0,
1, &n_blocks, &threads_per_block, 0,
num_kernels, data_im, data_im_offset, height, width, kernel_h, kernel_w,
pad_h, pad_w, stride_h, stride_w, height_col,
width_col, data_col);
......@@ -257,7 +257,7 @@ int col2im(const size_t max_threads_dim, gpudata * data_col, const size_t channe
int err;
if(dilation_h != 1 || dilation_w != 1){
err = dilated_col2im_kernel_call(
1, &threads_per_block, &n_blocks, 0,
1, &n_blocks, &threads_per_block, 0,
num_kernels, data_col, height, width, channels, patch_h, patch_w,
dilation_h, dilation_w, pad_h, pad_w, stride_h, stride_w,
height_col, width_col, data_im, data_im_offset);
......@@ -269,7 +269,7 @@ int col2im(const size_t max_threads_dim, gpudata * data_col, const size_t channe
}
else{
err = col2im_kernel_call(
1, &threads_per_block, &n_blocks, 0,
1, &n_blocks, &threads_per_block, 0,
num_kernels, data_col, height, width, channels, patch_h, patch_w,
pad_h, pad_w, stride_h, stride_w,
height_col, width_col, data_im, data_im_offset);
......
......@@ -206,3 +206,13 @@ class TestCorrMM(unittest.TestCase):
self.run_gradinput(inputs_shape=(16, 15, 12, 10),
filters_shape=(10, 6, 12, 1),
subsample=(3, 1))
def test_large_input(self):
# This tests the number-of-threads computation
# by making (channels * height) > (max_threads_dim ** 2).
# (See also issue #5165.)
self.run_conv_valid(inputs_shape=(1, 1024, 3, 1024),
filters_shape=(1, 1, 1, 1024),
verify_grad=False)
self.run_gradinput(inputs_shape=(1, 1024, 3, 1),
filters_shape=(1, 1, 1, 1024))
......@@ -207,3 +207,13 @@ class TestCorr3dMM(unittest.TestCase):
self.run_gradinput(inputs_shape=(16, 15, 12, 12, 10),
filters_shape=(10, 6, 12, 4, 1),
subsample=(3, 1, 2))
def test_large_input(self):
# This tests the number-of-threads computation
# by making (channels * height) > (max_threads_dim ** 2).
# (See also issue #5165.)
self.run_conv_valid(inputs_shape=(1, 1024, 3, 3, 1024),
filters_shape=(1, 1, 1, 1, 1024),
verify_grad=False)
self.run_gradinput(inputs_shape=(1, 1024, 3, 3, 1),
filters_shape=(1, 1, 1, 1, 1024))
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论