提交 5b989fa1 authored 作者: Frederic Bastien's avatar Frederic Bastien

implement neib_step for GpuImages2Neibs

上级 44734e53
......@@ -235,13 +235,15 @@ class GpuImages2Neibs(Images2Neibs):
self.mode = mode
def make_node(self, ten4, neib_shape, neib_step):
if neib_shape!=neib_step:
raise NotImplementedError("neib_step not implemented now on the gpu")
assert ten4.dtype == 'float32'
if not isinstance(ten4.type, CudaNdarrayType):
raise TypeError('ten4 must be cudandarray', ten4)
return Apply(self, [ten4, neib_shape], [CudaNdarrayType(broadcastable=(False,False),
assert ten4.ndim==4
assert neib_shape.ndim==1
assert neib_step.ndim==1
return Apply(self, [ten4, neib_shape, neib_step], [CudaNdarrayType(broadcastable=(False,False),
dtype=ten4.type.dtype)()])
def c_code_cache_version(self):
......@@ -258,6 +260,8 @@ class GpuImages2Neibs(Images2Neibs):
const int width,
const int c,
const int d,
const int step_x,
const int step_y,
const int grid_c,
const int grid_d,
const int stride0, const int stride1, const int stride2, const int stride3,
......@@ -282,10 +286,10 @@ class GpuImages2Neibs(Images2Neibs):
int z_row = b + grid_d*(a + grid_c*(s + nb_stack*n));
for (int i = 0; i < c; i++) // loop over c
{
int ten4_2 = i + a * c;
int ten4_2 = i + a * step_x;
for (int j = threadIdx.x; j < d; j+=blockDim.x) // loop over d
{
int ten4_3 = j + b * d;
int ten4_3 = j + b * step_y;
//int ten4_idx = ten4_3 + width*(ten4_2 + height*(s +nb_stack*n));
//int ten4_idx = stride3*ten4_3 + stride2*(ten4_2 + stride1*(s + stride0*n));
int ten4_idx = stride3*ten4_3 + stride2*ten4_2 + stride1*s + stride0*n;
......@@ -308,6 +312,8 @@ class GpuImages2Neibs(Images2Neibs):
const int width,
const int c,
const int d,
const int step_x,
const int step_y,
const int grid_c,
const int grid_d,
const int stride0, const int stride1, const int stride2, const int stride3,
......@@ -334,14 +340,14 @@ class GpuImages2Neibs(Images2Neibs):
int z_row = b + grid_d*(a + grid_c*(s + nb_stack*n));
for (int i = 0; i < c; i++) // loop over c
{
int ten4_2 = i + a * c;
int ten4_2 = i + a * step_x;
ten4_2 -= wrap_centered_idx_shift_x;
if ( ten4_2 < 0 ) ten4_2 += height;
else if (ten4_2 >= height) ten4_2 -= height;
for (int j = threadIdx.x; j < d; j+=blockDim.x) // loop over d
{
int ten4_3 = j + b * d;
int ten4_3 = j + b * step_y;
ten4_3 -= wrap_centered_idx_shift_y;
if ( ten4_3 < 0 ) ten4_3 += width;
else if (ten4_3 >= width) ten4_3 -= width;
......@@ -360,7 +366,7 @@ class GpuImages2Neibs(Images2Neibs):
""" % locals()
def c_code(self, node, name, (ten4, neib_shape), (z,), sub):
def c_code(self, node, name, (ten4, neib_shape, neib_step), (z,), sub):
fail = sub['fail']
mode = self.mode
return """
......@@ -387,8 +393,8 @@ class GpuImages2Neibs(Images2Neibs):
const int c = *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 0);
const int d = *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 1);
const int step_x = c;//will change when we implement neib_step
const int step_y = d;//will change when we implement neib_step
const npy_intp step_x = (npy_intp) *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 0);
const npy_intp step_y = (npy_intp) *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 1);
if ( "%(mode)s" == "wrap_centered") {
if (c%%2!=1 || d%%2!=1){
......@@ -405,15 +411,17 @@ class GpuImages2Neibs(Images2Neibs):
//grid_d = CEIL_INTDIV(((CudaNdarray_HOST_DIMS(%(ten4)s))[3]),step_y)
grid_c = ((CudaNdarray_HOST_DIMS(%(ten4)s))[2])/step_x + ((((CudaNdarray_HOST_DIMS(%(ten4)s))[2])%%step_x)? 1:0);
grid_d = ((CudaNdarray_HOST_DIMS(%(ten4)s))[3])/step_y + ((((CudaNdarray_HOST_DIMS(%(ten4)s))[3])%%step_y)? 1:0);
}else if ( "%(mode)s" == "valid") {
if ( CudaNdarray_HOST_DIMS(%(ten4)s)[2] %% c != 0)
}else if ( "%(mode)s" == "valid") {
if ( ((CudaNdarray_HOST_DIMS(%(ten4)s))[2] < c) ||( (((CudaNdarray_HOST_DIMS(%(ten4)s))[2]-c) %% step_x)!=0))
{
PyErr_Format(PyExc_TypeError, "neib_shape[0] must divide ten4.shape[2]");
PyErr_Format(PyExc_TypeError, "neib_shape[0]=%%d, neib_step[0]=%%d and ten4.shape[2]=%%d not consistent",
c, step_x, CudaNdarray_HOST_DIMS(%(ten4)s)[2]);
%(fail)s;
}
if ( CudaNdarray_HOST_DIMS(%(ten4)s)[3] %% d != 0)
if ( ((CudaNdarray_HOST_DIMS(%(ten4)s))[3] < d) ||( (((CudaNdarray_HOST_DIMS(%(ten4)s))[3]-d) %% step_y)!=0))
{
PyErr_Format(PyExc_TypeError, "neib_shape[1] must divide ten4.shape[3]");
PyErr_Format(PyExc_TypeError, "neib_shape[1]=%%d, neib_step[1]=%%d and ten4.shape[3]=%%d not consistent",
d, step_y, CudaNdarray_HOST_DIMS(%(ten4)s)[3]);
%(fail)s;
}
grid_c = 1+(((CudaNdarray_HOST_DIMS(%(ten4)s))[2]-c)/step_x); //number of patch in height
......@@ -457,6 +465,8 @@ class GpuImages2Neibs(Images2Neibs):
const int c = *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 0);
const int d = *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 1);
const npy_intp step_x = (npy_intp) *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 0);
const npy_intp step_y = (npy_intp) *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 1);
int nb_block;
if (nb_batch %% 32 == 0)
......@@ -472,7 +482,7 @@ class GpuImages2Neibs(Images2Neibs):
nb_batch,
nb_stack,
height, width,
c, d,
c, d, step_x, step_y,
grid_c, grid_d,
CudaNdarray_HOST_STRIDES(%(ten4)s)[0],
CudaNdarray_HOST_STRIDES(%(ten4)s)[1],
......
......@@ -2,7 +2,7 @@ import numpy
import theano
from theano import shared, function
import theano.tensor as T
from neighbours import images2neibs, neibs2images, GpuImages2Neibs
from neighbours import images2neibs, neibs2images, Images2Neibs, GpuImages2Neibs
# Skip test if cuda_ndarray is not available.
from nose.plugins.skip import SkipTest
import theano.sandbox.cuda as cuda
......@@ -147,7 +147,7 @@ def test_neibs_manual():
[90, 91, 94, 95]])
g = function([], neibs2images(neibs, neib_shape, images.shape), mode=mode_without_gpu)
print g()
#print g()
assert numpy.allclose(images.value,g())
......@@ -159,12 +159,16 @@ def test_neibs_step_manual():
modes = [mode_without_gpu]
if cuda.cuda_available:
modes.append(mode_with_gpu)
for mode in modes:
for mode_idx,mode in enumerate(modes):
f = function([], images2neibs(images, neib_shape, neib_step), mode=mode)
#print images.value
neibs = f()
print neibs
if mode_idx==0:
assert Images2Neibs in [type(node.op) for node in f.maker.env.toposort()]
elif mode_idx==1:
assert GpuImages2Neibs in [type(node.op) for node in f.maker.env.toposort()]
assert numpy.allclose(neibs,
[[ 0, 1, 2, 5, 6, 7, 10, 11, 12],
[ 2, 3, 4, 7, 8, 9, 12, 13, 14],
......@@ -196,44 +200,79 @@ def test_neibs_step_manual():
#assert numpy.allclose(images.value,g())
def test_neibs_wrap_centered_step_manual():
shape = (2,3,5,5)
images = shared(numpy.asarray(numpy.arange(numpy.prod(shape)).reshape(shape),dtype='float32'))
neib_shape = T.as_tensor_variable((3,3))
neib_step = T.as_tensor_variable((2,2))
neib_step = neib_shape
modes = [mode_without_gpu]
if cuda.cuda_available:
modes.append(mode_with_gpu)
for mode_idx,mode in enumerate(modes):
f = function([], images2neibs(images, neib_shape, neib_step, mode="wrap_centered"), mode=mode)
neibs = f()
print repr(neibs)
print neibs.shape
print images.value
expected1 = numpy.asarray([[24, 20, 21, 4, 0, 1, 9, 5, 6],
[21, 22, 23, 1, 2, 3, 6, 7, 8],
[23, 24, 20, 3, 4, 0, 8, 9, 5],
[ 9, 5, 6, 14, 10, 11, 19, 15, 16],
[ 6, 7, 8, 11, 12, 13, 16, 17, 18],
[ 8, 9, 5, 13, 14, 10, 18, 19, 15],
[19, 15, 16, 24, 20, 21, 4, 0, 1],
[16, 17, 18, 21, 22, 23, 1, 2, 3],
[18, 19, 15, 23, 24, 20, 3, 4, 0]])
expected2 = numpy.asarray([[ 24., 20., 21., 4., 0., 1., 9., 5., 6.],
[ 22., 23., 24., 2., 3., 4., 7., 8., 9.],
[ 14., 10., 11., 19., 15., 16., 24., 20., 21.],
[ 12., 13., 14., 17., 18., 19., 22., 23., 24.]])
expected = expected2
for i in range(shape[0]*shape[1]):
assert numpy.allclose(neibs[i*expected.shape[0]:(i+1)*expected.shape[0],:],expected+25*i), mode_idx
#g = function([], neibs2images(neibs, neib_shape, images.shape), mode=mode_without_gpu)
expected1 = [[24, 20, 21, 4, 0, 1, 9, 5, 6],
[21, 22, 23, 1, 2, 3, 6, 7, 8],
[23, 24, 20, 3, 4, 0, 8, 9, 5],
[ 9, 5, 6, 14, 10, 11, 19, 15, 16],
[ 6, 7, 8, 11, 12, 13, 16, 17, 18],
[ 8, 9, 5, 13, 14, 10, 18, 19, 15],
[19, 15, 16, 24, 20, 21, 4, 0, 1],
[16, 17, 18, 21, 22, 23, 1, 2, 3],
[18, 19, 15, 23, 24, 20, 3, 4, 0]]
expected2 = [[ 24, 20, 21, 4, 0, 1, 9, 5, 6],
[ 22, 23, 24, 2, 3, 4, 7, 8, 9],
[ 14, 10, 11, 19, 15, 16, 24, 20, 21],
[ 12, 13, 14, 17, 18, 19, 22, 23, 24]]
expected3 = [[ 19, 15, 16, 24, 20, 21, 4, 0, 1, 9, 5, 6, 14, 10, 11],
[ 17, 18, 19, 22, 23, 24, 2, 3, 4, 7, 8, 9, 12, 13, 14],
[ 9, 5, 6, 14, 10, 11, 19, 15, 16, 24, 20, 21, 4, 0, 1],
[ 7, 8, 9, 12, 13, 14, 17, 18, 19, 22, 23, 24, 2, 3, 4]]
expected4 = [[ 23, 24, 20, 21, 22, 3, 4, 0, 1, 2, 8, 9, 5, 6, 7],
[ 21, 22, 23, 24, 20, 1, 2, 3, 4, 0, 6, 7, 8, 9, 5],
[ 13, 14, 10, 11, 12, 18, 19, 15, 16, 17, 23, 24, 20, 21, 22],
[ 11, 12, 13, 14, 10, 16, 17, 18, 19, 15, 21, 22, 23, 24, 20]]
expected5 = [[ 24, 20, 21, 4, 0, 1, 9, 5, 6],
[ 22, 23, 24, 2, 3, 4, 7, 8, 9],
[ 9, 5, 6, 14, 10, 11, 19, 15, 16],
[ 7, 8, 9, 12, 13, 14, 17, 18, 19],
[ 19, 15, 16, 24, 20, 21, 4, 0, 1],
[ 17, 18, 19, 22, 23, 24, 2, 3, 4]]
expected6 = [[ 24, 20, 21, 4, 0, 1, 9, 5, 6],
[ 21, 22, 23, 1, 2, 3, 6, 7, 8],
[ 23, 24, 20, 3, 4, 0, 8, 9, 5],
[ 14, 10, 11, 19, 15, 16, 24, 20, 21],
[ 11, 12, 13, 16, 17, 18, 21, 22, 23],
[ 13, 14, 10, 18, 19, 15, 23, 24, 20]]
#TODO test discontinous image
for shp_idx,(shape,neib_shape,neib_step,expected) in enumerate([
[(7,8,5,5),(3,3),(2,2),expected1],
[(7,8,5,5),(3,3),(3,3),expected2],
[(7,8,5,5),(5,3),(3,3),expected3],
[(7,8,5,5),(3,5),(3,3),expected4],
[(80,90,5,5),(3,3),(2,3),expected5],
[(1025,9,5,5),(3,3),(3,2),expected6],
[(1,1,5,1035),(3,3),(3,3),None],
[(1,1,1045,5),(3,3),(3,3),None],
]):
images = shared(numpy.asarray(numpy.arange(numpy.prod(shape)).reshape(shape),dtype='float32'))
neib_shape = T.as_tensor_variable(neib_shape)
neib_step = T.as_tensor_variable(neib_step)
expected = numpy.asarray(expected)
for mode_idx,mode in enumerate(modes):
f = function([], images2neibs(images, neib_shape, neib_step, mode="wrap_centered"), mode=mode)
neibs = f()
if expected.size>1:
for i in range(shape[0]*shape[1]):
assert numpy.allclose(neibs[i*expected.shape[0]:(i+1)*expected.shape[0],:],expected+25*i), mode_idx
if mode_idx==0:
assert Images2Neibs in [type(node.op) for node in f.maker.env.toposort()]
elif mode_idx==1:
assert GpuImages2Neibs in [type(node.op) for node in f.maker.env.toposort()]
#g = function([], neibs2images(neibs, neib_shape, images.shape), mode=mode_without_gpu)
#print g()
#assert numpy.allclose(images.value,g())
#assert numpy.allclose(images.value,g())
def test_neibs_gpu():
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论