提交 09b1f19c authored 作者: Nicolas Ballas's avatar Nicolas Ballas

Small fixes

上级 9e2d8a33
...@@ -1018,7 +1018,7 @@ class BaseGpuCorr3dMM(GpuOp): ...@@ -1018,7 +1018,7 @@ class BaseGpuCorr3dMM(GpuOp):
def c_code_cache_version(self): def c_code_cache_version(self):
# raise this whenever modifying any of the support_code_files # raise this whenever modifying any of the support_code_files
return (0, 1) return None #(0, 1)
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
# REMEMBER TO RAISE c_code_cache_version when changing any of # REMEMBER TO RAISE c_code_cache_version when changing any of
...@@ -1065,6 +1065,12 @@ class BaseGpuCorr3dMM(GpuOp): ...@@ -1065,6 +1065,12 @@ class BaseGpuCorr3dMM(GpuOp):
If self.pad == 'half', a variable giving the width of the filters If self.pad == 'half', a variable giving the width of the filters
for direction="backprop weights". for direction="backprop weights".
Ignored otherwise. Ignored otherwise.
:param depth: If self.subsample[2] != 1, a variable giving the depth
of the filters for direction="backprop weights" or the depth of the
input images for direction="backprop inputs".
If self.pad == 'half', a variable giving the depth of the filters
for direction="backprop weights".
Ignored otherwise.
""" """
if self.border_mode != "valid": if self.border_mode != "valid":
raise ValueError("mode must be 'valid'") raise ValueError("mode must be 'valid'")
...@@ -1212,11 +1218,11 @@ class BaseGpuCorr3dMM(GpuOp): ...@@ -1212,11 +1218,11 @@ class BaseGpuCorr3dMM(GpuOp):
} }
else if (padD == -2) else if (padD == -2)
{ // horizontal full padding { // horizontal full padding
padW = kW - 1; padD = kD - 1;
} }
else if (padW < 0) else if (padD < 0)
{ {
PyErr_SetString(PyExc_ValueError, "BaseGpuCorr3dMM: padW must be >= -2"); PyErr_SetString(PyExc_ValueError, "BaseGpuCorr3dMM: padD must be >= -2");
%(fail)s %(fail)s
} }
...@@ -1255,6 +1261,8 @@ class BaseGpuCorr3dMM(GpuOp): ...@@ -1255,6 +1261,8 @@ class BaseGpuCorr3dMM(GpuOp):
%(fail)s %(fail)s
} }
// Prepare output array // Prepare output array
if (!(%(out)s if (!(%(out)s
&& %(out)s->nd == 5 && %(out)s->nd == 5
...@@ -1380,7 +1388,6 @@ class GpuCorr3dMM_gradWeights(BaseGpuCorr3dMM): ...@@ -1380,7 +1388,6 @@ class GpuCorr3dMM_gradWeights(BaseGpuCorr3dMM):
broadcastable = [topgrad.type.broadcastable[1], img.type.broadcastable[1], broadcastable = [topgrad.type.broadcastable[1], img.type.broadcastable[1],
False, False, False] False, False, False]
print [img, topgrad] + height_width_depth
return Apply(self, [img, topgrad] + height_width_depth, [CudaNdarrayType(broadcastable)()]) return Apply(self, [img, topgrad] + height_width_depth, [CudaNdarrayType(broadcastable)()])
def c_code(self, node, nodename, inp, out_, sub): def c_code(self, node, nodename, inp, out_, sub):
......
...@@ -53,16 +53,11 @@ inline int GET_BLOCKS(const int N) { ...@@ -53,16 +53,11 @@ inline int GET_BLOCKS(const int N) {
// (Adapted from Caffe: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu) // (Adapted from Caffe: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu)
// Kernels for fast unfold + copy // Kernels for fast unfold + copy
__global__ void im3d2col_kernel(const int n, const float* data_im, __global__ void im3d2col_kernel(const int n, const float* data_im,
const int height, const int width, const int height, const int width, const int depth,
const int depth, const int kernel_h, const int kernel_w, const int kernel_d,
const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int pad_d,
const int kernel_d, const int stride_h, const int stride_w, const int stride_d,
const int pad_h, const int pad_w, const int height_col, const int width_col, const int depth_col,
const int pad_d,
const int stride_h, const int stride_w,
const int stride_d,
const int height_col, const int width_col,
const int depth_col,
float* data_col) float* data_col)
{ {
CUDA_KERNEL_LOOP(index, n) CUDA_KERNEL_LOOP(index, n)
...@@ -134,14 +129,12 @@ void im3d2col(const float* data_im, const int channels, ...@@ -134,14 +129,12 @@ void im3d2col(const float* data_im, const int channels,
__global__ void col2im3d_kernel(const int n, const float* data_col, __global__ void col2im3d_kernel(const int n, const float* data_col,
const int height, const int width, const int height, const int width, const int depth,
const int depth, const int channels, const int channels,
const int patch_h, const int patch_w, const int patch_h, const int patch_w, const int patch_d,
const int patch_d, const int pad_h, const int pad_h, const int pad_w, const int pad_d,
const int pad_w, const int pad_d, const int stride_h, const int stride_w, const int stride_d,
const int stride_h, const int stride_w, const int height_col, const int width_col, const int depth_col,
const int stride_d, const int height_col,
const int width_col, const int depth_col,
float* data_im) float* data_im)
{ {
CUDA_KERNEL_LOOP(index, n) CUDA_KERNEL_LOOP(index, n)
...@@ -219,7 +212,7 @@ CudaNdarray* corr3dMM(CudaNdarray *const bottom, ...@@ -219,7 +212,7 @@ CudaNdarray* corr3dMM(CudaNdarray *const bottom,
const int dD = 1, const int dD = 1,
const int padH = 0, const int padH = 0,
const int padW = 0, const int padW = 0,
const int padD = 1) const int padD = 0)
{ {
if (bottom->nd != 5) if (bottom->nd != 5)
{ {
......
...@@ -22,7 +22,7 @@ else: ...@@ -22,7 +22,7 @@ else:
class TestCorr3DMM(unittest.TestCase): class TestCorr3DMM(unittest.TestCase):
def run_conv_valid(self, inputs_shape, filters_shape, def run_conv_valid(self, inputs_shape, filters_shape,
subsample = ( 1, 1, 1)): subsample=(1, 1, 1)):
inputs_val = numpy.random.random(inputs_shape).astype('float32') inputs_val = numpy.random.random(inputs_shape).astype('float32')
filters_val = numpy.random.random(filters_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32')
...@@ -66,7 +66,7 @@ class TestCorr3DMM(unittest.TestCase): ...@@ -66,7 +66,7 @@ class TestCorr3DMM(unittest.TestCase):
subsample=(1, 2, 3)) subsample=(1, 2, 3))
def run_gradweight(self, inputs_shape, filters_shape, dCdH_shape, def run_gradweight(self, inputs_shape, filters_shape, dCdH_shape,
subsample = (1, 1, 1)): subsample=(1, 1, 1)):
inputs_val = numpy.random.random(inputs_shape).astype('float32') inputs_val = numpy.random.random(inputs_shape).astype('float32')
dCdH_val = numpy.random.random(dCdH_shape).astype('float32') dCdH_val = numpy.random.random(dCdH_shape).astype('float32')
...@@ -96,25 +96,25 @@ class TestCorr3DMM(unittest.TestCase): ...@@ -96,25 +96,25 @@ class TestCorr3DMM(unittest.TestCase):
utt.assert_allclose(res_ref, res, rtol=1e-04, atol=1e-04) utt.assert_allclose(res_ref, res, rtol=1e-04, atol=1e-04)
def test_gradweight(self): def test_gradweight(self):
self.run_gradweight(inputs_shape = (16, 20, 32, 16, 1), self.run_gradweight(inputs_shape=(16, 20, 32, 16, 1),
filters_shape = (10, 6, 12, 4, 1), filters_shape=(10, 6, 12, 4, 1),
dCdH_shape = (16, 15, 21, 13, 10), dCdH_shape=(16, 15, 21, 13, 10),
subsample = (1, 1, 1)) subsample=(1, 1, 1))
self.run_gradweight(inputs_shape = (16, 20, 32, 16, 1), self.run_gradweight(inputs_shape=(16, 20, 32, 16, 1),
filters_shape = (10, 6, 12, 4, 1), filters_shape=(10, 6, 12, 4, 1),
dCdH_shape = (16, 8, 11, 7, 10), dCdH_shape=(16, 8, 11, 7, 10),
subsample = (2, 2, 2)) subsample=(2, 2, 2))
self.run_gradweight(inputs_shape = (16, 20, 32, 16, 1), self.run_gradweight(inputs_shape=(16, 20, 32, 16, 1),
filters_shape = (10, 6, 12, 4, 1), filters_shape=(10, 6, 12, 4, 1),
dCdH_shape = (16, 5, 7, 5, 10), dCdH_shape=(16, 5, 7, 5, 10),
subsample = (3, 3, 3)) subsample=(3, 3, 3))
self.run_gradweight(inputs_shape = (16, 20, 32, 16, 1), self.run_gradweight(inputs_shape=(16, 20, 32, 16, 1),
filters_shape = (10, 6, 12, 4, 1), filters_shape=(10, 6, 12, 4, 1),
dCdH_shape = (16, 8, 21, 5, 10), dCdH_shape=(16, 8, 21, 5, 10),
subsample = (2, 1, 3)) subsample=(2, 1, 3))
def run_gradinput(self, inputs_shape, filters_shape, def run_gradinput(self, inputs_shape, filters_shape,
subsample = (1, 1, 1)): subsample=(1, 1, 1)):
inputs_val = numpy.random.random(inputs_shape).astype('float32') inputs_val = numpy.random.random(inputs_shape).astype('float32')
filters_val = numpy.random.random(filters_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32')
...@@ -138,7 +138,7 @@ class TestCorr3DMM(unittest.TestCase): ...@@ -138,7 +138,7 @@ class TestCorr3DMM(unittest.TestCase):
conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)(kern=weight, topgrad=top) conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)(kern=weight, topgrad=top)
else: else:
conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)(kern=weight, topgrad=top, conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)(kern=weight, topgrad=top,
shape = bottom.shape[1:4]) shape=bottom.shape[1:4])
conv_gemm = conv_gemm.dimshuffle(0, 2, 3, 4, 1) conv_gemm = conv_gemm.dimshuffle(0, 2, 3, 4, 1)
f = theano.function([], conv_gemm) f = theano.function([], conv_gemm)
...@@ -146,15 +146,15 @@ class TestCorr3DMM(unittest.TestCase): ...@@ -146,15 +146,15 @@ class TestCorr3DMM(unittest.TestCase):
utt.assert_allclose(res_ref, res, rtol=1e-04, atol=1e-04) utt.assert_allclose(res_ref, res, rtol=1e-04, atol=1e-04)
def test_gradinput(self): def test_gradinput(self):
self.run_gradinput(inputs_shape = (16, 15, 21, 12, 10), self.run_gradinput(inputs_shape=(16, 15, 21, 12, 10),
filters_shape = (10, 6, 12, 4, 1)) filters_shape=(10, 6, 12, 4, 1))
self.run_gradinput(inputs_shape = (16, 15, 21, 12, 10), self.run_gradinput(inputs_shape=(16, 15, 21, 12, 10),
filters_shape = (10, 6, 12, 4, 1), filters_shape=(10, 6, 12, 4, 1),
subsample=(2,2,2)) subsample=(2,2,2))
self.run_gradinput(inputs_shape = (16, 15, 21, 12, 10), self.run_gradinput(inputs_shape=(16, 15, 21, 12, 10),
filters_shape = (10, 6, 12, 4, 1), filters_shape=(10, 6, 12, 4, 1),
subsample=(3,3,3)) subsample=(3,3,3))
self.run_gradinput(inputs_shape = (16, 15, 21, 12, 10), self.run_gradinput(inputs_shape=(16, 15, 21, 12, 10),
filters_shape = (10, 6, 12, 4, 1), filters_shape=(10, 6, 12, 4, 1),
subsample=(3,1,2)) subsample=(3,1,2))
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论