提交 b8bee3c6 authored 作者: Aleksandar Botev's avatar Aleksandar Botev

Added mode 'half' to Images2Neibs. Tests pass. #5938

上级 22eaec56
...@@ -23,17 +23,20 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op): ...@@ -23,17 +23,20 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
""" """
def __init__(self, mode='valid'): def __init__(self, mode='valid'):
if mode not in ['valid', 'ignore_borders', 'wrap_centered']: if mode not in ['valid', 'ignore_borders', 'wrap_centered', 'half']:
raise NotImplementedError("Only the mode valid, ignore_borders" raise NotImplementedError("Only the mode valid, ignore_borders"
" and wrap_centered" ", wrap_centered and half"
" have been implemented for the op" " have been implemented for the op"
" GpuImages2Neibs") " GpuImages2Neibs")
self.mode = mode self.mode = mode
def make_node(self, ten4, neib_shape, neib_step): def make_node(self, ten4, neib_shape, neib_step=None):
ten4 = as_gpuarray_variable(ten4, infer_context_name(ten4)) ten4 = as_gpuarray_variable(ten4, infer_context_name(ten4))
neib_shape = T.as_tensor_variable(neib_shape) neib_shape = T.as_tensor_variable(neib_shape)
neib_step = T.as_tensor_variable(neib_step) if neib_step is None:
neib_step = neib_shape
else:
neib_step = T.as_tensor_variable(neib_step)
assert ten4.ndim == 4 assert ten4.ndim == 4
assert neib_shape.ndim == 1 assert neib_shape.ndim == 1
...@@ -50,7 +53,7 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op): ...@@ -50,7 +53,7 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
return node.inputs[0].type.context return node.inputs[0].type.context
def c_code_cache_version(self): def c_code_cache_version(self):
return (11,) return (12,)
def c_headers(self): def c_headers(self):
return ['<numpy_compat.h>', '<gpuarray/types.h>'] return ['<numpy_compat.h>', '<gpuarray/types.h>']
...@@ -85,8 +88,8 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op): ...@@ -85,8 +88,8 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
GLOBAL_MEM %(type_z)s * global_out, const ga_size offset_out GLOBAL_MEM %(type_z)s * global_out, const ga_size offset_out
) )
{ {
const ga_int wrap_centered_idx_shift_x = c/2; const ga_int wrap_centered_half_idx_shift_x = c/2;
const ga_int wrap_centered_idx_shift_y = d/2; const ga_int wrap_centered_half_idx_shift_y = d/2;
global_ten4 = (GLOBAL_MEM const %(type_ten4)s *)(((GLOBAL_MEM char *)global_ten4)+offset_ten4); global_ten4 = (GLOBAL_MEM const %(type_ten4)s *)(((GLOBAL_MEM char *)global_ten4)+offset_ten4);
global_out = (GLOBAL_MEM %(type_z)s *)(((GLOBAL_MEM char *)global_out)+offset_out); global_out = (GLOBAL_MEM %(type_z)s *)(((GLOBAL_MEM char *)global_out)+offset_out);
...@@ -111,31 +114,38 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op): ...@@ -111,31 +114,38 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
{ {
ga_int ten4_2 = i + a * step_x; ga_int ten4_2 = i + a * step_x;
if("%(mode)s"=="wrap_centered"){ if("%(mode)s"=="wrap_centered"){
ten4_2 -= wrap_centered_idx_shift_x; ten4_2 -= wrap_centered_half_idx_shift_x;
if ( ten4_2 < 0 ) if ( ten4_2 < 0 )
ten4_2 += height; ten4_2 += height;
else if (ten4_2 >= height) else if (ten4_2 >= height)
ten4_2 -= height; ten4_2 -= height;
} else if ("%(mode)s"=="half"){
ten4_2 -= wrap_centered_half_idx_shift_x;
} }
ga_int j = LID_0; // loop over d ga_int j = LID_0; // loop over d
{ {
ga_int ten4_3 = j + b * step_y; ga_int ten4_3 = j + b * step_y;
if("%(mode)s"=="wrap_centered"){ if("%(mode)s"=="wrap_centered"){
ten4_3 -= wrap_centered_idx_shift_y; ten4_3 -= wrap_centered_half_idx_shift_y;
if ( ten4_3 < 0 ) if ( ten4_3 < 0 )
ten4_3 += width; ten4_3 += width;
else if (ten4_3 >= width) else if (ten4_3 >= width)
ten4_3 -= width; ten4_3 -= width;
} else if ("%(mode)s"=="half"){
ten4_3 -= wrap_centered_half_idx_shift_y;
} }
ga_int ten4_idx = stride3*ten4_3 +
stride2*ten4_2 +
stride1*s + stride0*n;
ga_int z_col = j + d * i; ga_int z_col = j + d * i;
ga_int z_idx = z_col * out_s1 + ga_int z_idx = z_col * out_s1 +
z_row * out_s0; z_row * out_s0;
global_out[z_idx] = global_ten4[ten4_idx]; if(ten4_2 < 0 || ten4_2 >= height || ten4_3 < 0 || ten4_3 >= width){
global_out[z_idx] = 0;
} else {
ga_int ten4_idx = stride3*ten4_3 +
stride2*ten4_2 +
stride1*s + stride0*n;
global_out[z_idx] = global_ten4[ten4_idx];
}
} }
} }
} }
...@@ -172,8 +182,8 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op): ...@@ -172,8 +182,8 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
GLOBAL_MEM %(type_z)s * global_out, const ga_size offset_out GLOBAL_MEM %(type_z)s * global_out, const ga_size offset_out
) )
{ {
const ga_int wrap_centered_idx_shift_x = c/2; const ga_int wrap_centered_half_idx_shift_x = c/2;
const ga_int wrap_centered_idx_shift_y = d/2; const ga_int wrap_centered_half_idx_shift_y = d/2;
global_ten4 = (GLOBAL_MEM const %(type_ten4)s *)(((GLOBAL_MEM char *)global_ten4)+offset_ten4); global_ten4 = (GLOBAL_MEM const %(type_ten4)s *)(((GLOBAL_MEM char *)global_ten4)+offset_ten4);
global_out = (GLOBAL_MEM %(type_z)s *)(((GLOBAL_MEM char *)global_out)+offset_out); global_out = (GLOBAL_MEM %(type_z)s *)(((GLOBAL_MEM char *)global_out)+offset_out);
...@@ -199,32 +209,39 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op): ...@@ -199,32 +209,39 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
{ {
ga_int ten4_2 = i + a * step_x; ga_int ten4_2 = i + a * step_x;
if("%(mode)s"=="wrap_centered"){ if("%(mode)s"=="wrap_centered"){
ten4_2 -= wrap_centered_idx_shift_x; ten4_2 -= wrap_centered_half_idx_shift_x;
if ( ten4_2 < 0 ) if ( ten4_2 < 0 )
ten4_2 += height; ten4_2 += height;
else if (ten4_2 >= height) else if (ten4_2 >= height)
ten4_2 -= height; ten4_2 -= height;
} else if ("%(mode)s"=="half"){
ten4_2 -= wrap_centered_half_idx_shift_x;
} }
// loop over d // loop over d
for (ga_int j = LID_0; j < d; j+=LDIM_0) for (ga_int j = LID_0; j < d; j+=LDIM_0)
{ {
ga_int ten4_3 = j + b * step_y; ga_int ten4_3 = j + b * step_y;
if("%(mode)s"=="wrap_centered"){ if("%(mode)s"=="wrap_centered"){
ten4_3 -= wrap_centered_idx_shift_y; ten4_3 -= wrap_centered_half_idx_shift_y;
if ( ten4_3 < 0 ) if ( ten4_3 < 0 )
ten4_3 += width; ten4_3 += width;
else if (ten4_3 >= width) else if (ten4_3 >= width)
ten4_3 -= width; ten4_3 -= width;
} else if ("%(mode)s"=="half"){
ten4_3 -= wrap_centered_half_idx_shift_y;
} }
ga_int ten4_idx = stride3*ten4_3 +
stride2*ten4_2 +
stride1*s + stride0*n;
ga_int z_col = j + d * i; ga_int z_col = j + d * i;
ga_int z_idx = z_col * out_s1 + ga_int z_idx = z_col * out_s1 +
z_row * out_s0; z_row * out_s0;
global_out[z_idx] = global_ten4[ten4_idx]; if(ten4_2 < 0 || ten4_2 >= height || ten4_3 < 0 || ten4_3 >= width){
global_out[z_idx] = 0;
} else {
ga_int ten4_idx = stride3*ten4_3 +
stride2*ten4_2 +
stride1*s + stride0*n;
global_out[z_idx] = global_ten4[ten4_idx];
}
} }
} }
} }
...@@ -367,6 +384,31 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op): ...@@ -367,6 +384,31 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
grid_c = 1+(((PyGpuArray_DIMS(%(ten4)s))[2]-c)/step_x); grid_c = 1+(((PyGpuArray_DIMS(%(ten4)s))[2]-c)/step_x);
//number of patch in width //number of patch in width
grid_d = 1+(((PyGpuArray_DIMS(%(ten4)s))[3]-d)/step_y); grid_d = 1+(((PyGpuArray_DIMS(%(ten4)s))[3]-d)/step_y);
}else if ( "%(mode)s" == "half") {
if ( ((PyGpuArray_DIMS(%(ten4)s))[2] < c) ||
((((PyGpuArray_DIMS(%(ten4)s))[2]-(c%%2)) %% step_x)!=0))
{
PyErr_Format(PyExc_TypeError, "GpuImages2Neibs:"
" neib_shape[0]=%%d, neib_step[0]=%%d and"
" ten4.shape[2]=%%d not consistent",
c, step_x,
PyGpuArray_DIMS(%(ten4)s)[2]);
%(fail)s;
}
if ( ((PyGpuArray_DIMS(%(ten4)s))[3] < d) ||
((((PyGpuArray_DIMS(%(ten4)s))[3]-(d%%2)) %% step_y)!=0))
{
PyErr_Format(PyExc_TypeError, "GpuImages2Neibs:"
" neib_shape[1]=%%d, neib_step[1]=%%d and"
" ten4.shape[3]=%%d not consistent",
d, step_y,
PyGpuArray_DIMS(%(ten4)s)[3]);
%(fail)s;
}
//number of patch in height
grid_c = 1+(((PyGpuArray_DIMS(%(ten4)s))[2]-(c%%2))/step_x);
//number of patch in width
grid_d = 1+(((PyGpuArray_DIMS(%(ten4)s))[3]-(d%%2))/step_y);
}else{ }else{
PyErr_Format(PyExc_TypeError, PyErr_Format(PyExc_TypeError,
"GpuImages2Neibs:: unknown mode '%(mode)s'"); "GpuImages2Neibs:: unknown mode '%(mode)s'");
...@@ -485,5 +527,5 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op): ...@@ -485,5 +527,5 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
@op_lifter([Images2Neibs]) @op_lifter([Images2Neibs])
@register_opt2([Images2Neibs], 'fast_compile') @register_opt2([Images2Neibs], 'fast_compile')
def local_gpua_images2neibs(op, context_name, inputs, outputs): def local_gpua_images2neibs(op, context_name, inputs, outputs):
if op.mode in ['valid', 'ignore_borders', 'wrap_centered']: if op.mode in ['valid', 'ignore_borders', 'wrap_centered', 'half']:
return GpuImages2Neibs(op.mode) return GpuImages2Neibs(op.mode)
...@@ -29,15 +29,18 @@ class Images2Neibs(Op): ...@@ -29,15 +29,18 @@ class Images2Neibs(Op):
of the input is not a multiple of the pooling factor(s). of the input is not a multiple of the pooling factor(s).
- 'wrap_centered' : - 'wrap_centered' :
?? TODO comment ?? TODO comment
- 'half' :
Equivalent to 'valid' if we pre-pad with zeros the input on
each side by (neib_shape[0]//2, neib_shape[1]//2)
""" """
__props__ = ("mode",) __props__ = ("mode",)
def __init__(self, mode='valid'): def __init__(self, mode='valid'):
if mode not in ['valid', 'wrap_centered', 'ignore_borders']: if mode not in ['valid', 'wrap_centered', 'ignore_borders', 'half']:
raise NotImplementedError("Only the mode valid, ignore_borders" raise NotImplementedError("Only the mode valid, ignore_borders"
" and wrap_centered have been" ",wrap_centered and half have been"
" implemented for the op Images2Neibs") " implemented for the op Images2Neibs")
self.mode = mode self.mode = mode
...@@ -198,7 +201,6 @@ class Images2Neibs(Op): ...@@ -198,7 +201,6 @@ class Images2Neibs(Op):
(c, d, ten4.shape[2], ten4.shape[3])) (c, d, ten4.shape[2], ten4.shape[3]))
grid_c = CEIL_INTDIV(ten4.shape[2], step_x) grid_c = CEIL_INTDIV(ten4.shape[2], step_x)
grid_d = CEIL_INTDIV(ten4.shape[3], step_y) grid_d = CEIL_INTDIV(ten4.shape[3], step_y)
elif mode == "valid": elif mode == "valid":
if (ten4.shape[2] < c) or (((ten4.shape[2] - c) % step_x) != 0): if (ten4.shape[2] < c) or (((ten4.shape[2] - c) % step_x) != 0):
raise TypeError( raise TypeError(
...@@ -219,6 +221,26 @@ class Images2Neibs(Op): ...@@ -219,6 +221,26 @@ class Images2Neibs(Op):
grid_c = 1 + ((ten4.shape[2] - c) // step_x) grid_c = 1 + ((ten4.shape[2] - c) // step_x)
# number of patch in width # number of patch in width
grid_d = 1 + ((ten4.shape[3] - d) // step_y) grid_d = 1 + ((ten4.shape[3] - d) // step_y)
elif mode == "half":
# This is equivalent to 'valid' with padding (c // 2, d // 2) on both sides
# Thus the expanded image will have size (h + 2 * (c // 2), w + 2 * (d // 2))
# Plugging these in the equation for 'valid' we get
# h + 2 * (c // 2) - c = h - (c % 2)
# w + 2 * (d // 2) - c = w - (d % 2)
if (ten4.shape[2] < c) or (((ten4.shape[2] - (c % 2)) % step_x) != 0):
raise TypeError(
"neib_shape[0]=%d, neib_step[0]=%d and"
" ten4.shape[2]=%d not consistent" %
(c, step_x, ten4.shape[2]))
if (ten4.shape[3] < d) or (((ten4.shape[3] - (d % 2)) % step_y) != 0):
raise TypeError(
"neib_shape[1]=%d, neib_step[1]=%d and"
" ten4.shape[3]=%d not consistent" %
(d, step_y, ten4.shape[3]))
# number of patch in height
grid_c = 1 + ((ten4.shape[2] - (c % 2)) // step_x)
# number of patch in width
grid_d = 1 + ((ten4.shape[3] - (d % 2)) // step_y)
else: else:
raise TypeError("Images2Neibs: unknow mode '%s'" % mode) raise TypeError("Images2Neibs: unknow mode '%s'" % mode)
...@@ -231,8 +253,8 @@ class Images2Neibs(Op): ...@@ -231,8 +253,8 @@ class Images2Neibs(Op):
height = ten4.shape[2] height = ten4.shape[2]
width = ten4.shape[3] width = ten4.shape[3]
wrap_centered_idx_shift_x = c // 2 wrap_centered_half_idx_shift_x = c // 2
wrap_centered_idx_shift_y = d // 2 wrap_centered_half_idx_shift_y = d // 2
for n in range(nb_batch): for n in range(nb_batch):
for s in range(nb_stack): for s in range(nb_stack):
# loop over the number of patch in height # loop over the number of patch in height
...@@ -243,22 +265,31 @@ class Images2Neibs(Op): ...@@ -243,22 +265,31 @@ class Images2Neibs(Op):
for i in range(c): for i in range(c):
ten4_2 = i + a * step_x ten4_2 = i + a * step_x
if mode == "wrap_centered": if mode == "wrap_centered":
ten4_2 -= wrap_centered_idx_shift_x ten4_2 -= wrap_centered_half_idx_shift_x
if ten4_2 < 0: if ten4_2 < 0:
ten4_2 += height ten4_2 += height
elif ten4_2 >= height: elif ten4_2 >= height:
ten4_2 -= height ten4_2 -= height
for j in range(d): elif mode == "half":
ten4_3 = j + b * step_y ten4_2 -= wrap_centered_half_idx_shift_x
if mode == "wrap_centered": if ten4_2 < 0 or ten4_2 >= height:
ten4_3 -= wrap_centered_idx_shift_y z[0][z_row, d * i: d * i + d] = 0
if ten4_3 < 0: else:
ten4_3 += width for j in range(d):
elif ten4_3 >= width: ten4_3 = j + b * step_y
ten4_3 -= width if mode == "wrap_centered":
z_col = j + d * i ten4_3 -= wrap_centered_half_idx_shift_y
if ten4_3 < 0:
z[0][z_row, z_col] = ten4[n, s, ten4_2, ten4_3] ten4_3 += width
elif ten4_3 >= width:
ten4_3 -= width
elif mode == "half":
ten4_3 -= wrap_centered_half_idx_shift_y
z_col = j + d * i
if ten4_3 < 0 or ten4_3 >= width:
z[0][z_row, z_col] = 0
else:
z[0][z_row, z_col] = ten4[n, s, ten4_2, ten4_3]
def infer_shape(self, node, input_shape): def infer_shape(self, node, input_shape):
in_shape = input_shape[0] in_shape = input_shape[0]
...@@ -273,6 +304,9 @@ class Images2Neibs(Op): ...@@ -273,6 +304,9 @@ class Images2Neibs(Op):
elif self.mode == 'ignore_borders': elif self.mode == 'ignore_borders':
grid_c = 1 + ((in_shape[2] - c) // step_x) grid_c = 1 + ((in_shape[2] - c) // step_x)
grid_d = 1 + ((in_shape[3] - d) // step_y) grid_d = 1 + ((in_shape[3] - d) // step_y)
elif self.mode == 'half':
grid_c = 1 + ((in_shape[2] - (c % 2)) // step_x)
grid_d = 1 + ((in_shape[3] - (d % 2)) // step_y)
z_dim0 = grid_c * grid_d * in_shape[1] * in_shape[0] z_dim0 = grid_c * grid_d * in_shape[1] * in_shape[0]
z_dim1 = c * d z_dim1 = c * d
return [(z_dim0, z_dim1)] return [(z_dim0, z_dim1)]
...@@ -394,6 +428,31 @@ class Images2Neibs(Op): ...@@ -394,6 +428,31 @@ class Images2Neibs(Op):
grid_c = 1+(((PyArray_DIMS(%(ten4)s))[2]-c)/step_x); grid_c = 1+(((PyArray_DIMS(%(ten4)s))[2]-c)/step_x);
//number of patch in width //number of patch in width
grid_d = 1+(((PyArray_DIMS(%(ten4)s))[3]-d)/step_y); grid_d = 1+(((PyArray_DIMS(%(ten4)s))[3]-d)/step_y);
}else if ( "%(mode)s" == "half") {
if ( ((PyArray_DIMS(%(ten4)s))[2] < c) ||
( (((PyArray_DIMS(%(ten4)s))[2]-(c%%2)) %% step_x)!=0))
{
PyErr_Format(PyExc_TypeError,
"neib_shape[0]=%%ld, neib_step[0]=%%ld and"
" ten4.shape[2]=%%ld not consistent",
(long int)c, (long int)step_x,
(long int)(PyArray_DIMS(%(ten4)s)[2]));
%(fail)s;
}
if ( ((PyArray_DIMS(%(ten4)s))[3] < d) ||
( (((PyArray_DIMS(%(ten4)s))[3]-(d%%2)) %% step_y)!=0))
{
PyErr_Format(PyExc_TypeError,
"neib_shape[1]=%%ld, neib_step[1]=%%ld and"
" ten4.shape[3]=%%ld not consistent",
(long int)d, (long int)step_y,
(long int)(PyArray_DIMS(%(ten4)s)[3]));
%(fail)s;
}
//number of patch in height
grid_c = 1+(((PyArray_DIMS(%(ten4)s))[2]-(c%%2))/step_x);
//number of patch in width
grid_d = 1+(((PyArray_DIMS(%(ten4)s))[3]-(d%%2))/step_y);
}else{ }else{
PyErr_Format(PyExc_TypeError, PyErr_Format(PyExc_TypeError,
"Images2Neibs: unknow mode '%(mode)s'"); "Images2Neibs: unknow mode '%(mode)s'");
...@@ -444,8 +503,8 @@ class Images2Neibs(Op): ...@@ -444,8 +503,8 @@ class Images2Neibs(Op):
const npy_intp step_x = (npy_intp) *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 0); const npy_intp step_x = (npy_intp) *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 0);
const npy_intp step_y = (npy_intp) *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 1); const npy_intp step_y = (npy_intp) *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 1);
const int wrap_centered_idx_shift_x = c/2; const int wrap_centered_half_idx_shift_x = c/2;
const int wrap_centered_idx_shift_y = d/2; const int wrap_centered_half_idx_shift_y = d/2;
// Oh this is messed up... // Oh this is messed up...
for (int n = 0; n < nb_batch; n++) // loop over batches for (int n = 0; n < nb_batch; n++) // loop over batches
for (int s = 0; s < nb_stack; s++) // loop over stacks for (int s = 0; s < nb_stack; s++) // loop over stacks
...@@ -457,27 +516,34 @@ class Images2Neibs(Op): ...@@ -457,27 +516,34 @@ class Images2Neibs(Op):
{ {
int ten4_2 = i + a * step_x; int ten4_2 = i + a * step_x;
if ( "%(mode)s" == "wrap_centered" ){ if ( "%(mode)s" == "wrap_centered" ){
ten4_2 -= wrap_centered_idx_shift_x; ten4_2 -= wrap_centered_half_idx_shift_x;
if ( ten4_2 < 0 ) ten4_2 += height; if ( ten4_2 < 0 ) ten4_2 += height;
else if (ten4_2 >= height) ten4_2 -= height; else if (ten4_2 >= height) ten4_2 -= height;
} else if ( "%(mode)s" == "half" ){
ten4_2 -= wrap_centered_half_idx_shift_x;
} }
for (int j = 0; j < d; j++) // loop over d if (ten4_2 < 0 | ten4_2 >= height) {
{ dtype_%(z)s* curr_z = (dtype_%(z)s*) PyArray_GETPTR2(%(z)s, z_row, d * i);
memset(curr_z, 0, d*sizeof(*curr_z));
int ten4_3 = j + b * step_y; } else {
if ( "%(mode)s" == "wrap_centered" ){ for (int j = 0; j < d; j++) // loop over d
ten4_3 -= wrap_centered_idx_shift_y; {
if ( ten4_3 < 0 ) ten4_3 += width; int ten4_3 = j + b * step_y;
else if (ten4_3 >= width) ten4_3 -= width; if ( "%(mode)s" == "wrap_centered" ){
ten4_3 -= wrap_centered_half_idx_shift_y;
if ( ten4_3 < 0 ) ten4_3 += width;
else if (ten4_3 >= width) ten4_3 -= width;
} else if ( "%(mode)s" == "half" ){
ten4_3 -= wrap_centered_half_idx_shift_y;
}
int z_col = j + d * i;
dtype_%(z)s* curr_z = (dtype_%(z)s*) PyArray_GETPTR2(%(z)s, z_row, z_col);
if (ten4_3 < 0 | ten4_3 >= width) {
*curr_z = 0;
} else {
*curr_z = *( (dtype_%(ten4)s*) PyArray_GETPTR4(%(ten4)s, n, s, ten4_2, ten4_3));
}
} }
int z_col = j + d * i;
dtype_%(z)s* curr_z = (dtype_%(z)s*) PyArray_GETPTR2(%(z)s, z_row, z_col);
*curr_z = *( (dtype_%(ten4)s*) PyArray_GETPTR4(%(ten4)s, n, s, ten4_2, ten4_3));
//printf("\\n(%%i,%%i,%%i,%%i) --> (%%i,%%i)",
// n, s, ten4_2, ten4_3, z_row, z_col);
//printf("%%f ", *curr_z);
} }
} }
} }
...@@ -513,7 +579,7 @@ def images2neibs(ten4, neib_shape, neib_step=None, mode='valid'): ...@@ -513,7 +579,7 @@ def images2neibs(ten4, neib_shape, neib_step=None, mode='valid'):
By default it is equal to `neib_shape` in other words, the patches are By default it is equal to `neib_shape` in other words, the patches are
disjoint. When the step is greater than `neib_shape`, some elements are disjoint. When the step is greater than `neib_shape`, some elements are
omitted. When None, this is the same as neib_shape (patch are disjoint). omitted. When None, this is the same as neib_shape (patch are disjoint).
mode : {'valid', 'ignore_borders', 'wrap_centered'} mode : {'valid', 'ignore_borders', 'wrap_centered', 'half'}
``valid`` ``valid``
Requires an input that is a multiple of the Requires an input that is a multiple of the
pooling factor (in each direction). pooling factor (in each direction).
...@@ -522,6 +588,9 @@ def images2neibs(ten4, neib_shape, neib_step=None, mode='valid'): ...@@ -522,6 +588,9 @@ def images2neibs(ten4, neib_shape, neib_step=None, mode='valid'):
the input is not a multiple of the pooling factor(s). the input is not a multiple of the pooling factor(s).
``wrap_centered`` ``wrap_centered``
?? TODO comment ?? TODO comment
``half``
Equivalent to 'valid' if we pre-pad with zeros the input on
each side by (neib_shape[0]//2, neib_shape[1]//2)
Returns Returns
------- -------
......
...@@ -236,6 +236,31 @@ class T_Images2Neibs(unittest_tools.InferShapeTester): ...@@ -236,6 +236,31 @@ class T_Images2Neibs(unittest_tools.InferShapeTester):
# TODO: why this is commented? # TODO: why this is commented?
# assert numpy.allclose(images.get_value(borrow=True), g()) # assert numpy.allclose(images.get_value(borrow=True), g())
def test_neibs_half_step_by_valid(self):
for shp_idx, (shape, neib_step) in enumerate([
[(7, 8, 5, 5), (1, 1)],
[(7, 8, 5, 5), (2, 2)],
[(7, 8, 5, 5), (4, 4)],
[(7, 8, 5, 5), (1, 4)],
[(7, 8, 5, 5), (4, 1)],
[(80, 90, 5, 5), (1, 2)],
[(1025, 9, 5, 5), (2, 1)],
[(1, 1, 5, 1037), (2, 4)],
[(1, 1, 1045, 5), (4, 2)]]
):
for neib_shape in [(3, 3), (3, 5), (5, 3)]:
for dtype in self.dtypes:
x = theano.shared(np.random.randn(*shape).astype(dtype))
extra = (neib_shape[0] // 2, neib_shape[1] // 2)
padded_shape = (x.shape[0], x.shape[1], x.shape[2] + 2 * extra[0], x.shape[3] + 2 * extra[1])
padded_x = T.zeros(padded_shape)
padded_x = T.set_subtensor(padded_x[:, :, extra[0]:-extra[0], extra[1]:-extra[1]], x)
x_using_valid = images2neibs(padded_x, neib_shape, neib_step, mode="valid")
x_using_half = images2neibs(x, neib_shape, neib_step, mode="half")
close = T.allclose(x_using_valid, x_using_half)
f = theano.function([], close, mode=self.mode)
assert f()
def test_neibs_bad_shape_wrap_centered(self): def test_neibs_bad_shape_wrap_centered(self):
shape = (2, 3, 10, 10) shape = (2, 3, 10, 10)
...@@ -281,6 +306,17 @@ class T_Images2Neibs(unittest_tools.InferShapeTester): ...@@ -281,6 +306,17 @@ class T_Images2Neibs(unittest_tools.InferShapeTester):
self.assertRaises(TypeError, unittest_tools.verify_grad, self.assertRaises(TypeError, unittest_tools.verify_grad,
fn, [images_val], mode=self.mode) fn, [images_val], mode=self.mode)
def test_grad_half(self):
# It is not implemented for now. So test that we raise an error.
shape = (2, 3, 6, 6)
images_val = np.random.rand(*shape).astype('float32')
def fn(images):
return images2neibs(images, (3, 3), mode='half')
self.assertRaises(TypeError, unittest_tools.verify_grad,
fn, [images_val], mode=self.mode)
def test_grad_valid(self): def test_grad_valid(self):
shape = (2, 3, 6, 6) shape = (2, 3, 6, 6)
images_val = np.random.rand(*shape).astype('float32') images_val = np.random.rand(*shape).astype('float32')
...@@ -330,15 +366,22 @@ class T_Images2Neibs(unittest_tools.InferShapeTester): ...@@ -330,15 +366,22 @@ class T_Images2Neibs(unittest_tools.InferShapeTester):
images_val = np.arange(np.prod(shape), images_val = np.arange(np.prod(shape),
dtype='float32').reshape(shape) dtype='float32').reshape(shape)
def fn(images):
return T.sum(T.sqr(images2neibs(images, (2, 2), mode='valid')),
axis=[0, 1])
f = theano.function([images], f = theano.function([images],
T.sqr(images2neibs(images, (2, 2), mode='valid')), T.sqr(images2neibs(images, (2, 2), mode='valid')),
mode=self.mode) mode=self.mode)
self.assertRaises(TypeError, f, images_val) self.assertRaises(TypeError, f, images_val)
def test_neibs_half_with_inconsistent_borders(self):
shape = (2, 3, 5, 5)
images = T.dtensor4()
images_val = np.arange(np.prod(shape),
dtype='float32').reshape(shape)
f = theano.function([images],
T.sqr(images2neibs(images, (2, 2), mode='half')),
mode=self.mode)
self.assertRaises(TypeError, f, images_val)
def test_can_not_infer_nb_dim(self): def test_can_not_infer_nb_dim(self):
# Was reported in gh-5613. Test that we do not crash # Was reported in gh-5613. Test that we do not crash
# or that we crash in a few other case found while # or that we crash in a few other case found while
...@@ -346,7 +389,7 @@ class T_Images2Neibs(unittest_tools.InferShapeTester): ...@@ -346,7 +389,7 @@ class T_Images2Neibs(unittest_tools.InferShapeTester):
img = T.tensor4('img') img = T.tensor4('img')
patches = T.nnet.neighbours.images2neibs(img, [16, 16]) patches = T.nnet.neighbours.images2neibs(img, [16, 16])
extractPatches = theano.function([img], patches) extractPatches = theano.function([img], patches, mode=self.mode)
patsRecovery = T.matrix('patsRecovery') patsRecovery = T.matrix('patsRecovery')
original_size = T.ivector('original_size') original_size = T.ivector('original_size')
...@@ -354,7 +397,8 @@ class T_Images2Neibs(unittest_tools.InferShapeTester): ...@@ -354,7 +397,8 @@ class T_Images2Neibs(unittest_tools.InferShapeTester):
for mode in ['valid', 'ignore_borders']: for mode in ['valid', 'ignore_borders']:
out = neibs2images(patsRecovery, (16, 16), out = neibs2images(patsRecovery, (16, 16),
original_size, mode=mode) original_size, mode=mode)
f = theano.function([patsRecovery, original_size], out) f = theano.function([patsRecovery, original_size], out,
mode=self.mode)
im_val = np.ones((1, 3, 320, 320), dtype=np.float32) im_val = np.ones((1, 3, 320, 320), dtype=np.float32)
neibs = extractPatches(im_val) neibs = extractPatches(im_val)
...@@ -364,8 +408,13 @@ class T_Images2Neibs(unittest_tools.InferShapeTester): ...@@ -364,8 +408,13 @@ class T_Images2Neibs(unittest_tools.InferShapeTester):
(1, 1, 3, 320, 320)) (1, 1, 3, 320, 320))
# End up with a step of 0 # End up with a step of 0
# This can lead to division by zero in DebugMode # This can lead to division by zero in DebugMode
self.assertRaises((ValueError, ZeroDivisionError), f, neibs, # This can not be ran on the GPU since from the C code we get
(3, 320, 320, 1)) # no ZeroDivisionError, but rather the whole processes crashes
# with floating point exception.
if "gpu" not in self.mode.provided_optimizer.include and \
"gpuarray" not in self.mode.provided_optimizer.include:
self.assertRaises((ValueError, ZeroDivisionError), f, neibs,
(3, 320, 320, 1))
def speed_neibs(self): def speed_neibs(self):
shape = (100, 40, 18, 18) shape = (100, 40, 18, 18)
...@@ -392,6 +441,19 @@ class T_Images2Neibs(unittest_tools.InferShapeTester): ...@@ -392,6 +441,19 @@ class T_Images2Neibs(unittest_tools.InferShapeTester):
for i in range(1000): for i in range(1000):
f() f()
def speed_neibs_half(self):
shape = (100, 40, 18, 18)
images = shared(np.arange(np.prod(shape),
dtype='float32').reshape(shape))
neib_shape = T.as_tensor_variable((3, 3))
f = function([],
images2neibs(images, neib_shape, mode="half"),
mode=self.mode)
for i in range(1000):
f()
def test_infer_shape(self): def test_infer_shape(self):
shape = (100, 40, 6, 3) shape = (100, 40, 6, 3)
images = np.ones(shape).astype('float32') images = np.ones(shape).astype('float32')
...@@ -431,6 +493,15 @@ class T_Images2Neibs(unittest_tools.InferShapeTester): ...@@ -431,6 +493,15 @@ class T_Images2Neibs(unittest_tools.InferShapeTester):
[x], [images2neibs( [x], [images2neibs(
x, neib_shape=(3, 3), mode='wrap_centered')], x, neib_shape=(3, 3), mode='wrap_centered')],
[images], Images2Neibs) [images], Images2Neibs)
shape = (100, 40, 6, 4)
images = np.ones(shape).astype('float32')
x = T.ftensor4()
self._compile_and_check(
[x], [images2neibs(x, neib_shape=(2, 1), mode='half')],
[images], Images2Neibs)
self._compile_and_check(
[x], [images2neibs(x, neib_shape=(2, 3), mode='half')],
[images], Images2Neibs)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论