提交 5b989fa1 authored 作者: Frederic Bastien's avatar Frederic Bastien

implement neib_step for GpuImages2Neibs

上级 44734e53
...@@ -235,13 +235,15 @@ class GpuImages2Neibs(Images2Neibs): ...@@ -235,13 +235,15 @@ class GpuImages2Neibs(Images2Neibs):
self.mode = mode self.mode = mode
def make_node(self, ten4, neib_shape, neib_step): def make_node(self, ten4, neib_shape, neib_step):
if neib_shape!=neib_step:
raise NotImplementedError("neib_step not implemented now on the gpu")
assert ten4.dtype == 'float32' assert ten4.dtype == 'float32'
if not isinstance(ten4.type, CudaNdarrayType): if not isinstance(ten4.type, CudaNdarrayType):
raise TypeError('ten4 must be cudandarray', ten4) raise TypeError('ten4 must be cudandarray', ten4)
return Apply(self, [ten4, neib_shape], [CudaNdarrayType(broadcastable=(False,False), assert ten4.ndim==4
assert neib_shape.ndim==1
assert neib_step.ndim==1
return Apply(self, [ten4, neib_shape, neib_step], [CudaNdarrayType(broadcastable=(False,False),
dtype=ten4.type.dtype)()]) dtype=ten4.type.dtype)()])
def c_code_cache_version(self): def c_code_cache_version(self):
...@@ -258,6 +260,8 @@ class GpuImages2Neibs(Images2Neibs): ...@@ -258,6 +260,8 @@ class GpuImages2Neibs(Images2Neibs):
const int width, const int width,
const int c, const int c,
const int d, const int d,
const int step_x,
const int step_y,
const int grid_c, const int grid_c,
const int grid_d, const int grid_d,
const int stride0, const int stride1, const int stride2, const int stride3, const int stride0, const int stride1, const int stride2, const int stride3,
...@@ -282,10 +286,10 @@ class GpuImages2Neibs(Images2Neibs): ...@@ -282,10 +286,10 @@ class GpuImages2Neibs(Images2Neibs):
int z_row = b + grid_d*(a + grid_c*(s + nb_stack*n)); int z_row = b + grid_d*(a + grid_c*(s + nb_stack*n));
for (int i = 0; i < c; i++) // loop over c for (int i = 0; i < c; i++) // loop over c
{ {
int ten4_2 = i + a * c; int ten4_2 = i + a * step_x;
for (int j = threadIdx.x; j < d; j+=blockDim.x) // loop over d for (int j = threadIdx.x; j < d; j+=blockDim.x) // loop over d
{ {
int ten4_3 = j + b * d; int ten4_3 = j + b * step_y;
//int ten4_idx = ten4_3 + width*(ten4_2 + height*(s +nb_stack*n)); //int ten4_idx = ten4_3 + width*(ten4_2 + height*(s +nb_stack*n));
//int ten4_idx = stride3*ten4_3 + stride2*(ten4_2 + stride1*(s + stride0*n)); //int ten4_idx = stride3*ten4_3 + stride2*(ten4_2 + stride1*(s + stride0*n));
int ten4_idx = stride3*ten4_3 + stride2*ten4_2 + stride1*s + stride0*n; int ten4_idx = stride3*ten4_3 + stride2*ten4_2 + stride1*s + stride0*n;
...@@ -308,6 +312,8 @@ class GpuImages2Neibs(Images2Neibs): ...@@ -308,6 +312,8 @@ class GpuImages2Neibs(Images2Neibs):
const int width, const int width,
const int c, const int c,
const int d, const int d,
const int step_x,
const int step_y,
const int grid_c, const int grid_c,
const int grid_d, const int grid_d,
const int stride0, const int stride1, const int stride2, const int stride3, const int stride0, const int stride1, const int stride2, const int stride3,
...@@ -334,14 +340,14 @@ class GpuImages2Neibs(Images2Neibs): ...@@ -334,14 +340,14 @@ class GpuImages2Neibs(Images2Neibs):
int z_row = b + grid_d*(a + grid_c*(s + nb_stack*n)); int z_row = b + grid_d*(a + grid_c*(s + nb_stack*n));
for (int i = 0; i < c; i++) // loop over c for (int i = 0; i < c; i++) // loop over c
{ {
int ten4_2 = i + a * c; int ten4_2 = i + a * step_x;
ten4_2 -= wrap_centered_idx_shift_x; ten4_2 -= wrap_centered_idx_shift_x;
if ( ten4_2 < 0 ) ten4_2 += height; if ( ten4_2 < 0 ) ten4_2 += height;
else if (ten4_2 >= height) ten4_2 -= height; else if (ten4_2 >= height) ten4_2 -= height;
for (int j = threadIdx.x; j < d; j+=blockDim.x) // loop over d for (int j = threadIdx.x; j < d; j+=blockDim.x) // loop over d
{ {
int ten4_3 = j + b * d; int ten4_3 = j + b * step_y;
ten4_3 -= wrap_centered_idx_shift_y; ten4_3 -= wrap_centered_idx_shift_y;
if ( ten4_3 < 0 ) ten4_3 += width; if ( ten4_3 < 0 ) ten4_3 += width;
else if (ten4_3 >= width) ten4_3 -= width; else if (ten4_3 >= width) ten4_3 -= width;
...@@ -360,7 +366,7 @@ class GpuImages2Neibs(Images2Neibs): ...@@ -360,7 +366,7 @@ class GpuImages2Neibs(Images2Neibs):
""" % locals() """ % locals()
def c_code(self, node, name, (ten4, neib_shape), (z,), sub): def c_code(self, node, name, (ten4, neib_shape, neib_step), (z,), sub):
fail = sub['fail'] fail = sub['fail']
mode = self.mode mode = self.mode
return """ return """
...@@ -387,8 +393,8 @@ class GpuImages2Neibs(Images2Neibs): ...@@ -387,8 +393,8 @@ class GpuImages2Neibs(Images2Neibs):
const int c = *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 0); const int c = *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 0);
const int d = *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 1); const int d = *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 1);
const int step_x = c;//will change when we implement neib_step const npy_intp step_x = (npy_intp) *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 0);
const int step_y = d;//will change when we implement neib_step const npy_intp step_y = (npy_intp) *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 1);
if ( "%(mode)s" == "wrap_centered") { if ( "%(mode)s" == "wrap_centered") {
if (c%%2!=1 || d%%2!=1){ if (c%%2!=1 || d%%2!=1){
...@@ -406,14 +412,16 @@ class GpuImages2Neibs(Images2Neibs): ...@@ -406,14 +412,16 @@ class GpuImages2Neibs(Images2Neibs):
grid_c = ((CudaNdarray_HOST_DIMS(%(ten4)s))[2])/step_x + ((((CudaNdarray_HOST_DIMS(%(ten4)s))[2])%%step_x)? 1:0); grid_c = ((CudaNdarray_HOST_DIMS(%(ten4)s))[2])/step_x + ((((CudaNdarray_HOST_DIMS(%(ten4)s))[2])%%step_x)? 1:0);
grid_d = ((CudaNdarray_HOST_DIMS(%(ten4)s))[3])/step_y + ((((CudaNdarray_HOST_DIMS(%(ten4)s))[3])%%step_y)? 1:0); grid_d = ((CudaNdarray_HOST_DIMS(%(ten4)s))[3])/step_y + ((((CudaNdarray_HOST_DIMS(%(ten4)s))[3])%%step_y)? 1:0);
}else if ( "%(mode)s" == "valid") { }else if ( "%(mode)s" == "valid") {
if ( CudaNdarray_HOST_DIMS(%(ten4)s)[2] %% c != 0) if ( ((CudaNdarray_HOST_DIMS(%(ten4)s))[2] < c) ||( (((CudaNdarray_HOST_DIMS(%(ten4)s))[2]-c) %% step_x)!=0))
{ {
PyErr_Format(PyExc_TypeError, "neib_shape[0] must divide ten4.shape[2]"); PyErr_Format(PyExc_TypeError, "neib_shape[0]=%%d, neib_step[0]=%%d and ten4.shape[2]=%%d not consistent",
c, step_x, CudaNdarray_HOST_DIMS(%(ten4)s)[2]);
%(fail)s; %(fail)s;
} }
if ( CudaNdarray_HOST_DIMS(%(ten4)s)[3] %% d != 0) if ( ((CudaNdarray_HOST_DIMS(%(ten4)s))[3] < d) ||( (((CudaNdarray_HOST_DIMS(%(ten4)s))[3]-d) %% step_y)!=0))
{ {
PyErr_Format(PyExc_TypeError, "neib_shape[1] must divide ten4.shape[3]"); PyErr_Format(PyExc_TypeError, "neib_shape[1]=%%d, neib_step[1]=%%d and ten4.shape[3]=%%d not consistent",
d, step_y, CudaNdarray_HOST_DIMS(%(ten4)s)[3]);
%(fail)s; %(fail)s;
} }
grid_c = 1+(((CudaNdarray_HOST_DIMS(%(ten4)s))[2]-c)/step_x); //number of patch in height grid_c = 1+(((CudaNdarray_HOST_DIMS(%(ten4)s))[2]-c)/step_x); //number of patch in height
...@@ -457,6 +465,8 @@ class GpuImages2Neibs(Images2Neibs): ...@@ -457,6 +465,8 @@ class GpuImages2Neibs(Images2Neibs):
const int c = *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 0); const int c = *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 0);
const int d = *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 1); const int d = *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 1);
const npy_intp step_x = (npy_intp) *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 0);
const npy_intp step_y = (npy_intp) *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 1);
int nb_block; int nb_block;
if (nb_batch %% 32 == 0) if (nb_batch %% 32 == 0)
...@@ -472,7 +482,7 @@ class GpuImages2Neibs(Images2Neibs): ...@@ -472,7 +482,7 @@ class GpuImages2Neibs(Images2Neibs):
nb_batch, nb_batch,
nb_stack, nb_stack,
height, width, height, width,
c, d, c, d, step_x, step_y,
grid_c, grid_d, grid_c, grid_d,
CudaNdarray_HOST_STRIDES(%(ten4)s)[0], CudaNdarray_HOST_STRIDES(%(ten4)s)[0],
CudaNdarray_HOST_STRIDES(%(ten4)s)[1], CudaNdarray_HOST_STRIDES(%(ten4)s)[1],
......
...@@ -2,7 +2,7 @@ import numpy ...@@ -2,7 +2,7 @@ import numpy
import theano import theano
from theano import shared, function from theano import shared, function
import theano.tensor as T import theano.tensor as T
from neighbours import images2neibs, neibs2images, GpuImages2Neibs from neighbours import images2neibs, neibs2images, Images2Neibs, GpuImages2Neibs
# Skip test if cuda_ndarray is not available. # Skip test if cuda_ndarray is not available.
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
import theano.sandbox.cuda as cuda import theano.sandbox.cuda as cuda
...@@ -147,7 +147,7 @@ def test_neibs_manual(): ...@@ -147,7 +147,7 @@ def test_neibs_manual():
[90, 91, 94, 95]]) [90, 91, 94, 95]])
g = function([], neibs2images(neibs, neib_shape, images.shape), mode=mode_without_gpu) g = function([], neibs2images(neibs, neib_shape, images.shape), mode=mode_without_gpu)
print g() #print g()
assert numpy.allclose(images.value,g()) assert numpy.allclose(images.value,g())
...@@ -159,12 +159,16 @@ def test_neibs_step_manual(): ...@@ -159,12 +159,16 @@ def test_neibs_step_manual():
modes = [mode_without_gpu] modes = [mode_without_gpu]
if cuda.cuda_available: if cuda.cuda_available:
modes.append(mode_with_gpu) modes.append(mode_with_gpu)
for mode in modes: for mode_idx,mode in enumerate(modes):
f = function([], images2neibs(images, neib_shape, neib_step), mode=mode) f = function([], images2neibs(images, neib_shape, neib_step), mode=mode)
#print images.value #print images.value
neibs = f() neibs = f()
print neibs if mode_idx==0:
assert Images2Neibs in [type(node.op) for node in f.maker.env.toposort()]
elif mode_idx==1:
assert GpuImages2Neibs in [type(node.op) for node in f.maker.env.toposort()]
assert numpy.allclose(neibs, assert numpy.allclose(neibs,
[[ 0, 1, 2, 5, 6, 7, 10, 11, 12], [[ 0, 1, 2, 5, 6, 7, 10, 11, 12],
[ 2, 3, 4, 7, 8, 9, 12, 13, 14], [ 2, 3, 4, 7, 8, 9, 12, 13, 14],
...@@ -196,23 +200,12 @@ def test_neibs_step_manual(): ...@@ -196,23 +200,12 @@ def test_neibs_step_manual():
#assert numpy.allclose(images.value,g()) #assert numpy.allclose(images.value,g())
def test_neibs_wrap_centered_step_manual(): def test_neibs_wrap_centered_step_manual():
shape = (2,3,5,5)
images = shared(numpy.asarray(numpy.arange(numpy.prod(shape)).reshape(shape),dtype='float32'))
neib_shape = T.as_tensor_variable((3,3))
neib_step = T.as_tensor_variable((2,2))
neib_step = neib_shape
modes = [mode_without_gpu] modes = [mode_without_gpu]
if cuda.cuda_available: if cuda.cuda_available:
modes.append(mode_with_gpu) modes.append(mode_with_gpu)
for mode_idx,mode in enumerate(modes): expected1 = [[24, 20, 21, 4, 0, 1, 9, 5, 6],
f = function([], images2neibs(images, neib_shape, neib_step, mode="wrap_centered"), mode=mode)
neibs = f()
print repr(neibs)
print neibs.shape
print images.value
expected1 = numpy.asarray([[24, 20, 21, 4, 0, 1, 9, 5, 6],
[21, 22, 23, 1, 2, 3, 6, 7, 8], [21, 22, 23, 1, 2, 3, 6, 7, 8],
[23, 24, 20, 3, 4, 0, 8, 9, 5], [23, 24, 20, 3, 4, 0, 8, 9, 5],
[ 9, 5, 6, 14, 10, 11, 19, 15, 16], [ 9, 5, 6, 14, 10, 11, 19, 15, 16],
...@@ -220,19 +213,65 @@ def test_neibs_wrap_centered_step_manual(): ...@@ -220,19 +213,65 @@ def test_neibs_wrap_centered_step_manual():
[ 8, 9, 5, 13, 14, 10, 18, 19, 15], [ 8, 9, 5, 13, 14, 10, 18, 19, 15],
[19, 15, 16, 24, 20, 21, 4, 0, 1], [19, 15, 16, 24, 20, 21, 4, 0, 1],
[16, 17, 18, 21, 22, 23, 1, 2, 3], [16, 17, 18, 21, 22, 23, 1, 2, 3],
[18, 19, 15, 23, 24, 20, 3, 4, 0]]) [18, 19, 15, 23, 24, 20, 3, 4, 0]]
expected2 = numpy.asarray([[ 24., 20., 21., 4., 0., 1., 9., 5., 6.], expected2 = [[ 24, 20, 21, 4, 0, 1, 9, 5, 6],
[ 22., 23., 24., 2., 3., 4., 7., 8., 9.], [ 22, 23, 24, 2, 3, 4, 7, 8, 9],
[ 14., 10., 11., 19., 15., 16., 24., 20., 21.], [ 14, 10, 11, 19, 15, 16, 24, 20, 21],
[ 12., 13., 14., 17., 18., 19., 22., 23., 24.]]) [ 12, 13, 14, 17, 18, 19, 22, 23, 24]]
expected3 = [[ 19, 15, 16, 24, 20, 21, 4, 0, 1, 9, 5, 6, 14, 10, 11],
[ 17, 18, 19, 22, 23, 24, 2, 3, 4, 7, 8, 9, 12, 13, 14],
[ 9, 5, 6, 14, 10, 11, 19, 15, 16, 24, 20, 21, 4, 0, 1],
[ 7, 8, 9, 12, 13, 14, 17, 18, 19, 22, 23, 24, 2, 3, 4]]
expected4 = [[ 23, 24, 20, 21, 22, 3, 4, 0, 1, 2, 8, 9, 5, 6, 7],
[ 21, 22, 23, 24, 20, 1, 2, 3, 4, 0, 6, 7, 8, 9, 5],
[ 13, 14, 10, 11, 12, 18, 19, 15, 16, 17, 23, 24, 20, 21, 22],
[ 11, 12, 13, 14, 10, 16, 17, 18, 19, 15, 21, 22, 23, 24, 20]]
expected5 = [[ 24, 20, 21, 4, 0, 1, 9, 5, 6],
[ 22, 23, 24, 2, 3, 4, 7, 8, 9],
[ 9, 5, 6, 14, 10, 11, 19, 15, 16],
[ 7, 8, 9, 12, 13, 14, 17, 18, 19],
[ 19, 15, 16, 24, 20, 21, 4, 0, 1],
[ 17, 18, 19, 22, 23, 24, 2, 3, 4]]
expected6 = [[ 24, 20, 21, 4, 0, 1, 9, 5, 6],
[ 21, 22, 23, 1, 2, 3, 6, 7, 8],
[ 23, 24, 20, 3, 4, 0, 8, 9, 5],
[ 14, 10, 11, 19, 15, 16, 24, 20, 21],
[ 11, 12, 13, 16, 17, 18, 21, 22, 23],
[ 13, 14, 10, 18, 19, 15, 23, 24, 20]]
#TODO test discontinous image
for shp_idx,(shape,neib_shape,neib_step,expected) in enumerate([
[(7,8,5,5),(3,3),(2,2),expected1],
[(7,8,5,5),(3,3),(3,3),expected2],
[(7,8,5,5),(5,3),(3,3),expected3],
[(7,8,5,5),(3,5),(3,3),expected4],
[(80,90,5,5),(3,3),(2,3),expected5],
[(1025,9,5,5),(3,3),(3,2),expected6],
[(1,1,5,1035),(3,3),(3,3),None],
[(1,1,1045,5),(3,3),(3,3),None],
]):
images = shared(numpy.asarray(numpy.arange(numpy.prod(shape)).reshape(shape),dtype='float32'))
neib_shape = T.as_tensor_variable(neib_shape)
neib_step = T.as_tensor_variable(neib_step)
expected = numpy.asarray(expected)
for mode_idx,mode in enumerate(modes):
f = function([], images2neibs(images, neib_shape, neib_step, mode="wrap_centered"), mode=mode)
neibs = f()
expected = expected2 if expected.size>1:
for i in range(shape[0]*shape[1]): for i in range(shape[0]*shape[1]):
assert numpy.allclose(neibs[i*expected.shape[0]:(i+1)*expected.shape[0],:],expected+25*i), mode_idx assert numpy.allclose(neibs[i*expected.shape[0]:(i+1)*expected.shape[0],:],expected+25*i), mode_idx
if mode_idx==0:
assert Images2Neibs in [type(node.op) for node in f.maker.env.toposort()]
elif mode_idx==1:
assert GpuImages2Neibs in [type(node.op) for node in f.maker.env.toposort()]
#g = function([], neibs2images(neibs, neib_shape, images.shape), mode=mode_without_gpu) #g = function([], neibs2images(neibs, neib_shape, images.shape), mode=mode_without_gpu)
#print g()
#assert numpy.allclose(images.value,g()) #assert numpy.allclose(images.value,g())
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论