提交 bae4eb91 authored 作者: Aleksandar Botev's avatar Aleksandar Botev

Added mode 'half' to Images2Neibs. Tests pass. #5938

上级 22eaec56
......@@ -23,9 +23,9 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
"""
def __init__(self, mode='valid'):
if mode not in ['valid', 'ignore_borders', 'wrap_centered']:
if mode not in ['valid', 'ignore_borders', 'wrap_centered', 'half']:
raise NotImplementedError("Only the mode valid, ignore_borders"
" and wrap_centered"
", wrap_centered and half"
" have been implemented for the op"
" GpuImages2Neibs")
self.mode = mode
......@@ -85,8 +85,8 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
GLOBAL_MEM %(type_z)s * global_out, const ga_size offset_out
)
{
const ga_int wrap_centered_idx_shift_x = c/2;
const ga_int wrap_centered_idx_shift_y = d/2;
const ga_int wrap_centered_half_idx_shift_x = c/2;
const ga_int wrap_centered_half_idx_shift_y = d/2;
global_ten4 = (GLOBAL_MEM const %(type_ten4)s *)(((GLOBAL_MEM char *)global_ten4)+offset_ten4);
global_out = (GLOBAL_MEM %(type_z)s *)(((GLOBAL_MEM char *)global_out)+offset_out);
......@@ -111,31 +111,38 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
{
ga_int ten4_2 = i + a * step_x;
if("%(mode)s"=="wrap_centered"){
ten4_2 -= wrap_centered_idx_shift_x;
ten4_2 -= wrap_centered_half_idx_shift_x;
if ( ten4_2 < 0 )
ten4_2 += height;
else if (ten4_2 >= height)
ten4_2 -= height;
} else if ("%(mode)s"=="half"){
ten4_2 -= wrap_centered_half_idx_shift_x;
}
ga_int j = LID_0; // loop over d
{
ga_int ten4_3 = j + b * step_y;
if("%(mode)s"=="wrap_centered"){
ten4_3 -= wrap_centered_idx_shift_y;
ten4_3 -= wrap_centered_half_idx_shift_y;
if ( ten4_3 < 0 )
ten4_3 += width;
else if (ten4_3 >= width)
ten4_3 -= width;
} else if ("%(mode)s"=="half"){
ten4_3 -= wrap_centered_half_idx_shift_y;
}
ga_int ten4_idx = stride3*ten4_3 +
stride2*ten4_2 +
stride1*s + stride0*n;
ga_int z_col = j + d * i;
ga_int z_idx = z_col * out_s1 +
z_row * out_s0;
global_out[z_idx] = global_ten4[ten4_idx];
if(ten4_2 < 0 || ten4_2 >= height || ten4_3 < 0 || ten4_3 >= width){
global_ot[z_idx] = 0;
} else {
ga_int ten4_idx = stride3*ten4_3 +
stride2*ten4_2 +
stride1*s + stride0*n;
global_out[z_idx] = global_ten4[ten4_idx];
}
}
}
}
......@@ -172,8 +179,8 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
GLOBAL_MEM %(type_z)s * global_out, const ga_size offset_out
)
{
const ga_int wrap_centered_idx_shift_x = c/2;
const ga_int wrap_centered_idx_shift_y = d/2;
const ga_int wrap_centered_half_idx_shift_x = c/2;
const ga_int wrap_centered_half_idx_shift_y = d/2;
global_ten4 = (GLOBAL_MEM const %(type_ten4)s *)(((GLOBAL_MEM char *)global_ten4)+offset_ten4);
global_out = (GLOBAL_MEM %(type_z)s *)(((GLOBAL_MEM char *)global_out)+offset_out);
......@@ -199,32 +206,39 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
{
ga_int ten4_2 = i + a * step_x;
if("%(mode)s"=="wrap_centered"){
ten4_2 -= wrap_centered_idx_shift_x;
ten4_2 -= wrap_centered_half_idx_shift_x;
if ( ten4_2 < 0 )
ten4_2 += height;
else if (ten4_2 >= height)
ten4_2 -= height;
} else if ("%(mode)s"=="half"){
ten4_2 -= wrap_centered_half_idx_shift_x;
}
// loop over d
for (ga_int j = LID_0; j < d; j+=LDIM_0)
{
ga_int ten4_3 = j + b * step_y;
if("%(mode)s"=="wrap_centered"){
ten4_3 -= wrap_centered_idx_shift_y;
ten4_3 -= wrap_centered_half_idx_shift_y;
if ( ten4_3 < 0 )
ten4_3 += width;
else if (ten4_3 >= width)
ten4_3 -= width;
} else if ("%(mode)s"=="half"){
ten4_3 -= wrap_centered_half_idx_shift_y;
}
ga_int ten4_idx = stride3*ten4_3 +
stride2*ten4_2 +
stride1*s + stride0*n;
ga_int z_col = j + d * i;
ga_int z_idx = z_col * out_s1 +
z_row * out_s0;
global_out[z_idx] = global_ten4[ten4_idx];
if(ten4_2 < 0 || ten4_2 >= height || ten4_3 < 0 || ten4_3 >= width){
global_ot[z_idx] = 0;
} else {
ga_int ten4_idx = stride3*ten4_3 +
stride2*ten4_2 +
stride1*s + stride0*n;
global_out[z_idx] = global_ten4[ten4_idx];
}
}
}
}
......@@ -367,6 +381,31 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
grid_c = 1+(((PyGpuArray_DIMS(%(ten4)s))[2]-c)/step_x);
//number of patch in width
grid_d = 1+(((PyGpuArray_DIMS(%(ten4)s))[3]-d)/step_y);
}else if ( "%(mode)s" == "half") {
if ( ((PyGpuArray_DIMS(%(ten4)s))[2] < c) ||
((((PyGpuArray_DIMS(%(ten4)s))[2]-(c%%2)) %% step_x)!=0))
{
PyErr_Format(PyExc_TypeError, "GpuImages2Neibs:"
" neib_shape[0]=%%d, neib_step[0]=%%d and"
" ten4.shape[2]=%%d not consistent",
c, step_x,
PyGpuArray_DIMS(%(ten4)s)[2]);
%(fail)s;
}
if ( ((PyGpuArray_DIMS(%(ten4)s))[3] < d) ||
((((PyGpuArray_DIMS(%(ten4)s))[3]-(d%%2)) %% step_y)!=0))
{
PyErr_Format(PyExc_TypeError, "GpuImages2Neibs:"
" neib_shape[1]=%%d, neib_step[1]=%%d and"
" ten4.shape[3]=%%d not consistent",
d, step_y,
PyGpuArray_DIMS(%(ten4)s)[3]);
%(fail)s;
}
//number of patch in height
grid_c = 1+(((PyGpuArray_DIMS(%(ten4)s))[2]-(c%%2))/step_x);
//number of patch in width
grid_d = 1+(((PyGpuArray_DIMS(%(ten4)s))[3]-(d%%2))/step_y);
}else{
PyErr_Format(PyExc_TypeError,
"GpuImages2Neibs:: unknown mode '%(mode)s'");
......@@ -485,5 +524,5 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
@op_lifter([Images2Neibs])
@register_opt2([Images2Neibs], 'fast_compile')
def local_gpua_images2neibs(op, context_name, inputs, outputs):
if op.mode in ['valid', 'ignore_borders', 'wrap_centered']:
if op.mode in ['valid', 'ignore_borders', 'wrap_centered', 'half']:
return GpuImages2Neibs(op.mode)
......@@ -29,15 +29,18 @@ class Images2Neibs(Op):
of the input is not a multiple of the pooling factor(s).
- 'wrap_centered' :
?? TODO comment
- 'half' :
Equivalent to 'valid' if we pre-pad the input on each side by
(neib_shape[0]//2, neib_shape[1]//2)
"""
__props__ = ("mode",)
def __init__(self, mode='valid'):
if mode not in ['valid', 'wrap_centered', 'ignore_borders']:
if mode not in ['valid', 'wrap_centered', 'ignore_borders', 'half']:
raise NotImplementedError("Only the mode valid, ignore_borders"
" and wrap_centered have been"
",wrap_centered and half have been"
" implemented for the op Images2Neibs")
self.mode = mode
......@@ -198,7 +201,6 @@ class Images2Neibs(Op):
(c, d, ten4.shape[2], ten4.shape[3]))
grid_c = CEIL_INTDIV(ten4.shape[2], step_x)
grid_d = CEIL_INTDIV(ten4.shape[3], step_y)
elif mode == "valid":
if (ten4.shape[2] < c) or (((ten4.shape[2] - c) % step_x) != 0):
raise TypeError(
......@@ -219,6 +221,26 @@ class Images2Neibs(Op):
grid_c = 1 + ((ten4.shape[2] - c) // step_x)
# number of patch in width
grid_d = 1 + ((ten4.shape[3] - d) // step_y)
elif mode == "half":
# This is equivalent to 'valid' with padding (c // 2, d // 2) on both sides
# Thus the expanded image will have size (h + 2 * (c // 2), w + 2 * (d // 2))
# Plugging these in the equation for 'valid' we get
# h + 2 * (c // 2) - c = h - (c % 2)
# w + 2 * (d // 2) - c = w - (d % 2)
if (ten4.shape[2] < c) or (((ten4.shape[2] - (c % 2)) % step_x) != 0):
raise TypeError(
"neib_shape[0]=%d, neib_step[0]=%d and"
" ten4.shape[2]=%d not consistent" %
(c, step_x, ten4.shape[2]))
if (ten4.shape[3] < d) or (((ten4.shape[3] - (d % 2)) % step_y) != 0):
raise TypeError(
"neib_shape[1]=%d, neib_step[1]=%d and"
" ten4.shape[3]=%d not consistent" %
(d, step_y, ten4.shape[3]))
# number of patch in height
grid_c = 1 + ((ten4.shape[2] - (c % 2)) // step_x)
# number of patch in width
grid_d = 1 + ((ten4.shape[3] - (d % 2)) // step_y)
else:
raise TypeError("Images2Neibs: unknow mode '%s'" % mode)
......@@ -231,8 +253,8 @@ class Images2Neibs(Op):
height = ten4.shape[2]
width = ten4.shape[3]
wrap_centered_idx_shift_x = c // 2
wrap_centered_idx_shift_y = d // 2
wrap_centered_half_idx_shift_x = c // 2
wrap_centered_half_idx_shift_y = d // 2
for n in range(nb_batch):
for s in range(nb_stack):
# loop over the number of patch in height
......@@ -243,22 +265,31 @@ class Images2Neibs(Op):
for i in range(c):
ten4_2 = i + a * step_x
if mode == "wrap_centered":
ten4_2 -= wrap_centered_idx_shift_x
ten4_2 -= wrap_centered_half_idx_shift_x
if ten4_2 < 0:
ten4_2 += height
elif ten4_2 >= height:
ten4_2 -= height
for j in range(d):
ten4_3 = j + b * step_y
if mode == "wrap_centered":
ten4_3 -= wrap_centered_idx_shift_y
if ten4_3 < 0:
ten4_3 += width
elif ten4_3 >= width:
ten4_3 -= width
z_col = j + d * i
z[0][z_row, z_col] = ten4[n, s, ten4_2, ten4_3]
elif mode == "half":
ten4_2 -= wrap_centered_half_idx_shift_x
if ten4_2 < 0 or ten4_2 >= height:
z[0][z_row, d * i: d * i + d] = 0
else:
for j in range(d):
ten4_3 = j + b * step_y
if mode == "wrap_centered":
ten4_3 -= wrap_centered_half_idx_shift_y
if ten4_3 < 0:
ten4_3 += width
elif ten4_3 >= width:
ten4_3 -= width
elif mode == "half":
ten4_3 -= wrap_centered_half_idx_shift_y
z_col = j + d * i
if ten4_3 < 0 or ten4_3 >= width:
z[0][z_row, z_col] = 0
else:
z[0][z_row, z_col] = ten4[n, s, ten4_2, ten4_3]
def infer_shape(self, node, input_shape):
in_shape = input_shape[0]
......@@ -273,6 +304,9 @@ class Images2Neibs(Op):
elif self.mode == 'ignore_borders':
grid_c = 1 + ((in_shape[2] - c) // step_x)
grid_d = 1 + ((in_shape[3] - d) // step_y)
elif self.mode == 'half':
grid_c = 1 + ((in_shape[2] - (c % 2)) // step_x)
grid_d = 1 + ((in_shape[3] - (d % 2)) // step_y)
z_dim0 = grid_c * grid_d * in_shape[1] * in_shape[0]
z_dim1 = c * d
return [(z_dim0, z_dim1)]
......@@ -394,6 +428,31 @@ class Images2Neibs(Op):
grid_c = 1+(((PyArray_DIMS(%(ten4)s))[2]-c)/step_x);
//number of patch in width
grid_d = 1+(((PyArray_DIMS(%(ten4)s))[3]-d)/step_y);
}else if ( "%(mode)s" == "half") {
if ( ((PyArray_DIMS(%(ten4)s))[2] < c) ||
( (((PyArray_DIMS(%(ten4)s))[2]-(c%%2)) %% step_x)!=0))
{
PyErr_Format(PyExc_TypeError,
"neib_shape[0]=%%ld, neib_step[0]=%%ld and"
" ten4.shape[2]=%%ld not consistent",
(long int)c, (long int)step_x,
(long int)(PyArray_DIMS(%(ten4)s)[2]));
%(fail)s;
}
if ( ((PyArray_DIMS(%(ten4)s))[3] < d) ||
( (((PyArray_DIMS(%(ten4)s))[3]-(d%%2)) %% step_y)!=0))
{
PyErr_Format(PyExc_TypeError,
"neib_shape[1]=%%ld, neib_step[1]=%%ld and"
" ten4.shape[3]=%%ld not consistent",
(long int)d, (long int)step_y,
(long int)(PyArray_DIMS(%(ten4)s)[3]));
%(fail)s;
}
//number of patch in height
grid_c = 1+(((PyArray_DIMS(%(ten4)s))[2]-(c%%2))/step_x);
//number of patch in width
grid_d = 1+(((PyArray_DIMS(%(ten4)s))[3]-(d%%2))/step_y);
}else{
PyErr_Format(PyExc_TypeError,
"Images2Neibs: unknow mode '%(mode)s'");
......@@ -444,8 +503,8 @@ class Images2Neibs(Op):
const npy_intp step_x = (npy_intp) *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 0);
const npy_intp step_y = (npy_intp) *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 1);
const int wrap_centered_idx_shift_x = c/2;
const int wrap_centered_idx_shift_y = d/2;
const int wrap_centered_half_idx_shift_x = c/2;
const int wrap_centered_half_idx_shift_y = d/2;
// Oh this is messed up...
for (int n = 0; n < nb_batch; n++) // loop over batches
for (int s = 0; s < nb_stack; s++) // loop over stacks
......@@ -457,27 +516,43 @@ class Images2Neibs(Op):
{
int ten4_2 = i + a * step_x;
if ( "%(mode)s" == "wrap_centered" ){
ten4_2 -= wrap_centered_idx_shift_x;
ten4_2 -= wrap_centered_half_idx_shift_x;
if ( ten4_2 < 0 ) ten4_2 += height;
else if (ten4_2 >= height) ten4_2 -= height;
} else if ( "%(mode)s" == "half" ){
ten4_2 -= wrap_centered_half_idx_shift_x;
}
for (int j = 0; j < d; j++) // loop over d
{
int ten4_3 = j + b * step_y;
if ( "%(mode)s" == "wrap_centered" ){
ten4_3 -= wrap_centered_idx_shift_y;
if ( ten4_3 < 0 ) ten4_3 += width;
else if (ten4_3 >= width) ten4_3 -= width;
if (ten4_2 < 0 | ten4_2 >= height) {
dtype_%(z)s* curr_z = (dtype_%(z)s*) PyArray_GETPTR2(%(z)s, z_row, d * i);
memset(curr_z, 0, d*sizeof(*curr_z));
// for (int j = 0; j < d; j++) // loop over d
// {
// int z_col = j + d * i;
// dtype_%(z)s* curr_z = (dtype_%(z)s*) PyArray_GETPTR2(%(z)s, z_row, z_col);
// *curr_z = 0;
// }
} else {
for (int j = 0; j < d; j++) // loop over d
{
int ten4_3 = j + b * step_y;
if ( "%(mode)s" == "wrap_centered" ){
ten4_3 -= wrap_centered_half_idx_shift_y;
if ( ten4_3 < 0 ) ten4_3 += width;
else if (ten4_3 >= width) ten4_3 -= width;
} else if ( "%(mode)s" == "half" ){
ten4_3 -= wrap_centered_half_idx_shift_y;
}
int z_col = j + d * i;
dtype_%(z)s* curr_z = (dtype_%(z)s*) PyArray_GETPTR2(%(z)s, z_row, z_col);
if (ten4_3 < 0 | ten4_3 >= width) {
*curr_z = 0;
} else {
*curr_z = *( (dtype_%(ten4)s*) PyArray_GETPTR4(%(ten4)s, n, s, ten4_2, ten4_3));
}
//printf("\\n(%%i,%%i,%%i,%%i) --> (%%i,%%i)",
// n, s, ten4_2, ten4_3, z_row, z_col);
//printf("%%f ", *curr_z);
}
int z_col = j + d * i;
dtype_%(z)s* curr_z = (dtype_%(z)s*) PyArray_GETPTR2(%(z)s, z_row, z_col);
*curr_z = *( (dtype_%(ten4)s*) PyArray_GETPTR4(%(ten4)s, n, s, ten4_2, ten4_3));
//printf("\\n(%%i,%%i,%%i,%%i) --> (%%i,%%i)",
// n, s, ten4_2, ten4_3, z_row, z_col);
//printf("%%f ", *curr_z);
}
}
}
......@@ -513,7 +588,7 @@ def images2neibs(ten4, neib_shape, neib_step=None, mode='valid'):
By default it is equal to `neib_shape` in other words, the patches are
disjoint. When the step is greater than `neib_shape`, some elements are
omitted. When None, this is the same as neib_shape (patch are disjoint).
mode : {'valid', 'ignore_borders', 'wrap_centered'}
mode : {'valid', 'ignore_borders', 'wrap_centered', 'half'}
``valid``
Requires an input that is a multiple of the
pooling factor (in each direction).
......@@ -522,6 +597,9 @@ def images2neibs(ten4, neib_shape, neib_step=None, mode='valid'):
the input is not a multiple of the pooling factor(s).
``wrap_centered``
?? TODO comment
``half``
Equivalent to 'valid' if we pre-pad the input on each side by
(neib_shape[0]//2, neib_shape[1]//2)
Returns
-------
......
......@@ -236,6 +236,29 @@ class T_Images2Neibs(unittest_tools.InferShapeTester):
# TODO: why this is commented?
# assert numpy.allclose(images.get_value(borrow=True), g())
def test_neibs_half_step_by_valid(self):
for shp_idx, (shape, neib_shape, neib_step) in enumerate([
[(7, 8, 5, 5), (3, 3), (1, 1)],
[(7, 8, 5, 5), (3, 3), (2, 2)],
[(7, 8, 5, 5), (3, 3), (4, 4)],
[(7, 8, 5, 5), (3, 3), (1, 4)],
[(7, 8, 5, 5), (3, 3), (4, 1)],
[(80, 90, 5, 5), (3, 3), (1, 2)],
[(1025, 9, 5, 5), (3, 3), (2, 1)],
[(1, 1, 5, 1037), (3, 3), (2, 4)],
[(1, 1, 1045, 5), (3, 3), (4, 2)]]
):
for dtype in self.dtypes:
x = theano.shared(np.random.randn(*shape).astype(dtype))
extra = (neib_shape[0] // 2, neib_shape[1] // 2)
padded_shape = (x.shape[0], x.shape[1], x.shape[2] + 2 * extra[0], x.shape[3] + 2 * extra[1])
padded_x = T.zeros(padded_shape)
padded_x = T.set_subtensor(padded_x[:, :, extra[0]:-extra[0], extra[1]:-extra[1]], x)
x_using_valid = images2neibs(padded_x, neib_shape, neib_step, mode="valid")
x_using_half = images2neibs(x, neib_shape, neib_step, mode="half")
close = T.allclose(x_using_valid, x_using_half)
assert close.eval()
def test_neibs_bad_shape_wrap_centered(self):
shape = (2, 3, 10, 10)
......@@ -281,6 +304,17 @@ class T_Images2Neibs(unittest_tools.InferShapeTester):
self.assertRaises(TypeError, unittest_tools.verify_grad,
fn, [images_val], mode=self.mode)
def test_grad_half(self):
# It is not implemented for now. So test that we raise an error.
shape = (2, 3, 6, 6)
images_val = np.random.rand(*shape).astype('float32')
def fn(images):
return images2neibs(images, (3, 3), mode='half')
self.assertRaises(TypeError, unittest_tools.verify_grad,
fn, [images_val], mode=self.mode)
def test_grad_valid(self):
shape = (2, 3, 6, 6)
images_val = np.random.rand(*shape).astype('float32')
......@@ -330,15 +364,22 @@ class T_Images2Neibs(unittest_tools.InferShapeTester):
images_val = np.arange(np.prod(shape),
dtype='float32').reshape(shape)
def fn(images):
return T.sum(T.sqr(images2neibs(images, (2, 2), mode='valid')),
axis=[0, 1])
f = theano.function([images],
T.sqr(images2neibs(images, (2, 2), mode='valid')),
mode=self.mode)
self.assertRaises(TypeError, f, images_val)
def test_neibs_half_with_inconsistent_borders(self):
shape = (2, 3, 5, 5)
images = T.dtensor4()
images_val = np.arange(np.prod(shape),
dtype='float32').reshape(shape)
f = theano.function([images],
T.sqr(images2neibs(images, (2, 2), mode='half')),
mode=self.mode)
self.assertRaises(TypeError, f, images_val)
def test_can_not_infer_nb_dim(self):
# Was reported in gh-5613. Test that we do not crash
# or that we crash in a few other case found while
......@@ -392,6 +433,19 @@ class T_Images2Neibs(unittest_tools.InferShapeTester):
for i in range(1000):
f()
def speed_neibs_half(self):
shape = (100, 40, 18, 18)
images = shared(np.arange(np.prod(shape),
dtype='float32').reshape(shape))
neib_shape = T.as_tensor_variable((3, 3))
f = function([],
images2neibs(images, neib_shape, mode="half"),
mode=self.mode)
for i in range(1000):
f()
def test_infer_shape(self):
shape = (100, 40, 6, 3)
images = np.ones(shape).astype('float32')
......@@ -431,6 +485,15 @@ class T_Images2Neibs(unittest_tools.InferShapeTester):
[x], [images2neibs(
x, neib_shape=(3, 3), mode='wrap_centered')],
[images], Images2Neibs)
shape = (100, 40, 6, 4)
images = np.ones(shape).astype('float32')
x = T.ftensor4()
self._compile_and_check(
[x], [images2neibs(x, neib_shape=(2, 1), mode='half')],
[images], Images2Neibs)
self._compile_and_check(
[x], [images2neibs(x, neib_shape=(2, 3), mode='half')],
[images], Images2Neibs)
if __name__ == '__main__':
unittest.main()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论