提交 98d2a0c3 authored 作者: Frederic Bastien's avatar Frederic Bastien

In Image2Neibs(images2neibs) op added the neib_step parameter and updated the…

In Image2Neibs(images2neibs) op added the neib_step parameter and updated the gpu op to raise an error if it is not the default as it don't implement the new option.
上级 bb466e6d
...@@ -17,18 +17,32 @@ class Images2Neibs(Op): ...@@ -17,18 +17,32 @@ class Images2Neibs(Op):
return hash(type(self)) return hash(type(self))
def __str__(self): def __str__(self):
return self.__class__.__name__ return self.__class__.__name__
def make_node(self, ten4, neib_shape): def make_node(self, ten4, neib_shape, neib_step=None):
"""
:param neib_step: (dx,dy) where dx is the number of rows to skip between patch
and dy is the number of columns. When None, this is the same
as neib_shape(patch are disjoint)
"""
ten4 = T.as_tensor_variable(ten4) ten4 = T.as_tensor_variable(ten4)
neib_shape = T.as_tensor_variable(neib_shape) neib_shape = T.as_tensor_variable(neib_shape)
return Apply(self, [ten4, neib_shape], [T.matrix(dtype=ten4.type.dtype)]) if neib_step is None:
neib_step = neib_shape
else:
neib_step = T.as_tensor_variable(neib_step)
assert ten4.ndim==4
assert neib_shape.ndim==1
assert neib_step.ndim==1
return Apply(self, [ten4, neib_shape,neib_step], [T.matrix(dtype=ten4.type.dtype)])
def grad(self, (pvals, unis), (gz,)): def grad(self, (pvals, unis), (gz,)):
return [None, None] return [None, None]
def c_code_cache_version(self): def c_code_cache_version(self):
return (2,) return (3,)
def c_code(self, node, name, (ten4, neib_shape), (z,), sub): def c_code(self, node, name, (ten4, neib_shape, neib_step), (z,), sub):
fail = sub['fail'] fail = sub['fail']
return """ return """
...@@ -48,25 +62,44 @@ class Images2Neibs(Op): ...@@ -48,25 +62,44 @@ class Images2Neibs(Op):
PyErr_Format(PyExc_TypeError, "neib_shape wrong shape ; has to contain 2 elements"); PyErr_Format(PyExc_TypeError, "neib_shape wrong shape ; has to contain 2 elements");
%(fail)s; %(fail)s;
} }
if (%(neib_step)s->nd != 1)
{
PyErr_Format(PyExc_TypeError, "neib_step wrong rank");
%(fail)s;
}
if ( (%(neib_step)s->dimensions)[0] != 2)
{
PyErr_Format(PyExc_TypeError, "neib_step wrong step ; has to contain 2 elements");
%(fail)s;
}
// (c,d) = neib_shape
const npy_intp c = (npy_intp) *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 0); const npy_intp c = (npy_intp) *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 0);
const npy_intp d = (npy_intp) *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 1); const npy_intp d = (npy_intp) *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 1);
// (step_x,step_y) = neib_step
const npy_intp step_x = (npy_intp) *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 0);
const npy_intp step_y = (npy_intp) *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 1);
if ( (%(ten4)s->dimensions)[2] %% c != 0) if ( ((%(ten4)s->dimensions)[2] < c) ||( (((%(ten4)s->dimensions)[2]-c) %% step_x)!=0))
{ {
PyErr_Format(PyExc_TypeError, "neib_shape[0] must divide ten4.shape[2]"); PyErr_Format(PyExc_TypeError, "neib_shape[0]=%%d, neib_step[0]=%%d and ten4.shape[2]=%%d not consistent",
c, step_x, %(ten4)s->dimensions[2]);
%(fail)s; %(fail)s;
} }
if ( (%(ten4)s->dimensions)[3] %% d != 0) if ( ((%(ten4)s->dimensions)[3] < d) ||( (((%(ten4)s->dimensions)[3]-d) %% step_y)!=0))
{ {
PyErr_Format(PyExc_TypeError, "neib_shape[1] must divide ten4.shape[3]"); PyErr_Format(PyExc_TypeError, "neib_shape[1]=%%d, neib_step[1]=%%d and ten4.shape[3]=%%d not consistent",
d, step_y, %(ten4)s->dimensions[3]);
%(fail)s; %(fail)s;
} }
const int grid_c = 1+(((%(ten4)s->dimensions)[2]-c)/step_x); //number of patch in height
const int grid_d = 1+(((%(ten4)s->dimensions)[3]-d)/step_y); //number of patch in width
// new dimensions for z // new dimensions for z
const npy_intp z_dim1 = c * d; const npy_intp z_dim1 = c * d;
const npy_intp z_dim0 = (%(ten4)s->dimensions)[2] / c const npy_intp z_dim0 = grid_c
* (%(ten4)s->dimensions)[3] / d * grid_d
* (%(ten4)s->dimensions)[1] * (%(ten4)s->dimensions)[1]
* (%(ten4)s->dimensions)[0]; * (%(ten4)s->dimensions)[0];
...@@ -101,26 +134,29 @@ class Images2Neibs(Op): ...@@ -101,26 +134,29 @@ class Images2Neibs(Op):
const int width = (%(ten4)s->dimensions)[3]; const int width = (%(ten4)s->dimensions)[3];
// (c,d) = neib_shape // (c,d) = neib_shape
const int c = (int) *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 0); const npy_intp c = (npy_intp) *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 0);
const int d = (int) *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 1); const npy_intp d = (npy_intp) *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 1);
// (step_x,step_y) = neib_step
const npy_intp step_x = (npy_intp) *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 0);
const npy_intp step_y = (npy_intp) *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 1);
const int grid_c = 1+(((%(ten4)s->dimensions)[2]-c)/step_x); //number of patch in height
const int grid_d = 1+(((%(ten4)s->dimensions)[3]-d)/step_y); //number of patch in width
const int grid_c = height/c;
const int grid_d = width/d;
// Oh this is messed up... // Oh this is messed up...
for (int n = 0; n < nb_batch; n++) // loop over batches for (int n = 0; n < nb_batch; n++) // loop over batches
for (int s = 0; s < nb_stack; s++) // loop over stacks for (int s = 0; s < nb_stack; s++) // loop over stacks
for (int a = 0; a < grid_c; a++) // loop over height/c for (int a = 0; a < grid_c; a++) // loop over the number of patch in height
for (int b = 0; b < grid_d; b++) // loop over width/d for (int b = 0; b < grid_d; b++) // loop over the number of patch in width
{ {
int z_row = b + grid_d*(a + grid_c*(s + nb_stack*n)); int z_row = b + grid_d*(a + grid_c*(s + nb_stack*n));
for (int i = 0; i < c; i++) // loop over c for (int i = 0; i < c; i++) // loop over c
{ {
int ten4_2 = i + a * c; int ten4_2 = i + a * step_x;
for (int j = 0; j < d; j++) // loop over d for (int j = 0; j < d; j++) // loop over d
{ {
int ten4_3 = j + b * d; int ten4_3 = j + b * step_y;
int z_col = j + d * i; int z_col = j + d * i;
dtype_%(z)s* curr_z = (dtype_%(z)s*) PyArray_GETPTR2(%(z)s, z_row, z_col); dtype_%(z)s* curr_z = (dtype_%(z)s*) PyArray_GETPTR2(%(z)s, z_row, z_col);
...@@ -156,7 +192,9 @@ def neibs2images(neibs, neib_shape, original_shape): ...@@ -156,7 +192,9 @@ def neibs2images(neibs, neib_shape, original_shape):
# This is work in progress # This is work in progress
class GpuImages2Neibs(Images2Neibs): class GpuImages2Neibs(Images2Neibs):
def make_node(self, ten4, neib_shape): def make_node(self, ten4, neib_shape, neib_step):
if neib_shape!=neib_step:
raise NotImplementedError("neib_step not implemented now on the gpu")
assert ten4.dtype == 'float32' assert ten4.dtype == 'float32'
#assert neib_shape.dtype == 'float32' #assert neib_shape.dtype == 'float32'
if not isinstance(ten4.type, CudaNdarrayType): if not isinstance(ten4.type, CudaNdarrayType):
...@@ -169,6 +207,7 @@ class GpuImages2Neibs(Images2Neibs): ...@@ -169,6 +207,7 @@ class GpuImages2Neibs(Images2Neibs):
dtype=ten4.type.dtype)()]) dtype=ten4.type.dtype)()])
def c_code_cache_version(self): def c_code_cache_version(self):
return ()
return (2,) return (2,)
def c_support_code_apply(self, node, nodename): def c_support_code_apply(self, node, nodename):
...@@ -368,7 +407,7 @@ gpu_images2neibs = GpuImages2Neibs() ...@@ -368,7 +407,7 @@ gpu_images2neibs = GpuImages2Neibs()
@local_optimizer() @local_optimizer()
def use_gpu_images2neibs(node): def use_gpu_images2neibs(node):
if node.op == images2neibs: if node.op == images2neibs:
return [host_from_gpu(gpu_images2neibs(*[gpu_from_host(node.inputs[0]),node.inputs[1]]))] return [host_from_gpu(gpu_images2neibs(*[gpu_from_host(node.inputs[0]),node.inputs[1],node.inputs[2]]))]
if cuda_available: if cuda_available:
register_gpu_opt()(use_gpu_images2neibs) register_gpu_opt()(use_gpu_images2neibs)
......
...@@ -29,6 +29,116 @@ def test_neibs(): ...@@ -29,6 +29,116 @@ def test_neibs():
#print g() #print g()
assert numpy.allclose(images.value,g()) assert numpy.allclose(images.value,g())
def test_neibs_bad_shape():
shape = (2,3,10,10)
images = shared(numpy.arange(numpy.prod(shape)).reshape(shape))
neib_shape = T.as_tensor_variable((3,2))
try:
f = function([], images2neibs(images, neib_shape), mode=mode_without_gpu)
neibs = f()
#print neibs
assert False,"An error was expected"
except TypeError:
pass
shape = (2,3,10,10)
images = shared(numpy.arange(numpy.prod(shape)).reshape(shape))
neib_shape = T.as_tensor_variable((2,3))
try:
f = function([], images2neibs(images, neib_shape), mode=mode_without_gpu)
neibs = f()
#print neibs
assert False,"An error was expected"
except TypeError:
pass
def test_neibs_manual():
shape = (2,3,4,4)
images = shared(numpy.arange(numpy.prod(shape)).reshape(shape))
neib_shape = T.as_tensor_variable((2,2))
f = function([], images2neibs(images, neib_shape), mode=mode_without_gpu)
#print images.value
neibs = f()
print neibs
assert numpy.allclose(neibs,[[ 0, 1, 4, 5],
[ 2, 3, 6, 7],
[ 8, 9, 12, 13],
[10, 11, 14, 15],
[16, 17, 20, 21],
[18, 19, 22, 23],
[24, 25, 28, 29],
[26, 27, 30, 31],
[32, 33, 36, 37],
[34, 35, 38, 39],
[40, 41, 44, 45],
[42, 43, 46, 47],
[48, 49, 52, 53],
[50, 51, 54, 55],
[56, 57, 60, 61],
[58, 59, 62, 63],
[64, 65, 68, 69],
[66, 67, 70, 71],
[72, 73, 76, 77],
[74, 75, 78, 79],
[80, 81, 84, 85],
[82, 83, 86, 87],
[88, 89, 92, 93],
[90, 91, 94, 95]])
g = function([], neibs2images(neibs, neib_shape, images.shape), mode=mode_without_gpu)
print g()
assert numpy.allclose(images.value,g())
def test_neibs_manual_step():
shape = (2,3,5,5)
images = shared(numpy.asarray(numpy.arange(numpy.prod(shape)).reshape(shape),dtype='float32'))
neib_shape = T.as_tensor_variable((3,3))
neib_step = T.as_tensor_variable((2,2))
modes = [mode_without_gpu]
if cuda.cuda_available:
modes.append(mode_with_gpu)
for mode in modes:
f = function([], images2neibs(images, neib_shape, neib_step), mode=mode)
#print images.value
neibs = f()
print neibs
assert numpy.allclose(neibs,
[[ 0, 1, 2, 5, 6, 7, 10, 11, 12],
[ 2, 3, 4, 7, 8, 9, 12, 13, 14],
[ 10, 11, 12, 15, 16, 17, 20, 21, 22],
[ 12, 13, 14, 17, 18, 19, 22, 23, 24],
[ 25, 26, 27, 30, 31, 32, 35, 36, 37],
[ 27, 28, 29, 32, 33, 34, 37, 38, 39],
[ 35, 36, 37, 40, 41, 42, 45, 46, 47],
[ 37, 38, 39, 42, 43, 44, 47, 48, 49],
[ 50, 51, 52, 55, 56, 57, 60, 61, 62],
[ 52, 53, 54, 57, 58, 59, 62, 63, 64],
[ 60, 61, 62, 65, 66, 67, 70, 71, 72],
[ 62, 63, 64, 67, 68, 69, 72, 73, 74],
[ 75, 76, 77, 80, 81, 82, 85, 86, 87],
[ 77, 78, 79, 82, 83, 84, 87, 88, 89],
[ 85, 86, 87, 90, 91, 92, 95, 96, 97],
[ 87, 88, 89, 92, 93, 94, 97, 98, 99],
[100, 101, 102, 105, 106, 107, 110, 111, 112],
[102, 103, 104, 107, 108, 109, 112, 113, 114],
[110, 111, 112, 115, 116, 117, 120, 121, 122],
[112, 113, 114, 117, 118, 119, 122, 123, 124],
[125, 126, 127, 130, 131, 132, 135, 136, 137],
[127, 128, 129, 132, 133, 134, 137, 138, 139],
[135, 136, 137, 140, 141, 142, 145, 146, 147],
[137, 138, 139, 142, 143, 144, 147, 148, 149]])
#g = function([], neibs2images(neibs, neib_shape, images.shape), mode=mode_without_gpu)
#print g()
#assert numpy.allclose(images.value,g())
def test_neibs_gpu(): def test_neibs_gpu():
if cuda.cuda_available == False: if cuda.cuda_available == False:
raise SkipTest('Optional package cuda disabled') raise SkipTest('Optional package cuda disabled')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论