提交 be3fee10 authored 作者: Olivier Delalleau's avatar Olivier Delalleau

Merge pull request #225 from nouiz/fix_neibs

Fix neibs
import theano import theano
from theano import Op, Apply from theano import Op, Apply
import theano.tensor as T import theano.tensor as T
from theano.tensor.opt import register_specialize
from theano.gof import local_optimizer from theano.gof import local_optimizer
from theano.sandbox.cuda import cuda_available from theano.sandbox.cuda import cuda_available
...@@ -10,6 +9,13 @@ if cuda_available: ...@@ -10,6 +9,13 @@ if cuda_available:
from theano.sandbox.cuda.basic_ops import host_from_gpu, gpu_from_host from theano.sandbox.cuda.basic_ops import host_from_gpu, gpu_from_host
from theano.sandbox.cuda.opt import register_opt as register_gpu_opt from theano.sandbox.cuda.opt import register_opt as register_gpu_opt
class BadOldCode(Exception):
""" We create a specific Exception to be sure it don't get caught
by mistake"""
pass
class Images2Neibs(Op): class Images2Neibs(Op):
def __init__(self, mode='valid'): def __init__(self, mode='valid'):
""" """
...@@ -20,26 +26,32 @@ class Images2Neibs(Op): ...@@ -20,26 +26,32 @@ class Images2Neibs(Op):
is not a multiple of the pooling factor(s) is not a multiple of the pooling factor(s)
wrap_centered : ?? TODO comment wrap_centered : ?? TODO comment
""" """
if mode not in ['valid','wrap_centered','ignore_borders']: if mode not in ['valid', 'wrap_centered', 'ignore_borders']:
raise NotImplementedError("Only the mode valid, ignore_borders and wrap_centered have been implemented for the op Images2Neibs") raise NotImplementedError("Only the mode valid, ignore_borders"
" and wrap_centered have been"
" implemented for the op Images2Neibs")
self.mode = mode self.mode = mode
def __eq__(self, other): def __eq__(self, other):
return type(self) == type(other) and self.mode==other.mode return type(self) == type(other) and self.mode == other.mode
def __hash__(self): def __hash__(self):
return hash(type(self))^hash(self.mode) return hash(type(self)) ^ hash(self.mode)
def __str__(self): def __str__(self):
return self.__class__.__name__+"{%s}"%self.mode return self.__class__.__name__ + "{%s}" % self.mode
def __setstate__(self, d): def __setstate__(self, d):
self.__dict__.update(d) self.__dict__.update(d)
if not hasattr(self,"mode"): if not hasattr(self, "mode"):
self.mode = 'valid' self.mode = 'valid'
def make_node(self, ten4, neib_shape, neib_step=None): def make_node(self, ten4, neib_shape, neib_step=None):
""" """
:param neib_step: (dx,dy) where dx is the number of rows to skip between patch :param neib_step: (dx,dy) where dx is the number of rows to
and dy is the number of columns. When None, this is the same skip between patch and dy is the number of
as neib_shape(patch are disjoint) columns. When None, this is the same as
neib_shape(patch are disjoint)
""" """
ten4 = T.as_tensor_variable(ten4) ten4 = T.as_tensor_variable(ten4)
neib_shape = T.as_tensor_variable(neib_shape) neib_shape = T.as_tensor_variable(neib_shape)
...@@ -48,17 +60,23 @@ class Images2Neibs(Op): ...@@ -48,17 +60,23 @@ class Images2Neibs(Op):
else: else:
neib_step = T.as_tensor_variable(neib_step) neib_step = T.as_tensor_variable(neib_step)
assert ten4.ndim==4 assert ten4.ndim == 4
assert neib_shape.ndim==1 assert neib_shape.ndim == 1
assert neib_step.ndim==1 assert neib_step.ndim == 1
return Apply(self, [ten4, neib_shape,neib_step], [T.matrix(dtype=ten4.type.dtype)]) return Apply(self, [ten4, neib_shape, neib_step],
[T.matrix(dtype=ten4.type.dtype)])
def grad(self, inp, grads): def grad(self, inp, grads):
x, neib_shape, neib_step = inp x, neib_shape, neib_step = inp
gz, = grads gz, = grads
if self.mode in ['valid','ignore_borders']: if self.mode in ['valid', 'ignore_borders']:
return [neibs2images(gz, neib_shape, x.shape, mode=self.mode), None, None] raise BadOldCode("The Images2Neibs grad is not implemented."
" It was in the past, but returned the wrong"
" answer!")
# This is the reverse of the op, not the grad!
return [neibs2images(gz, neib_shape, x.shape, mode=self.mode),
None, None]
else: else:
raise NotImplementedError() raise NotImplementedError()
...@@ -70,7 +88,7 @@ class Images2Neibs(Op): ...@@ -70,7 +88,7 @@ class Images2Neibs(Op):
z, = out z, = out
fail = sub['fail'] fail = sub['fail']
mode=self.mode mode = self.mode
return """ return """
int grid_c = -1; //number of patch in height int grid_c = -1; //number of patch in height
int grid_d = -1; //number of patch in width int grid_d = -1; //number of patch in width
...@@ -87,7 +105,8 @@ class Images2Neibs(Op): ...@@ -87,7 +105,8 @@ class Images2Neibs(Op):
} }
if ( (%(neib_shape)s->dimensions)[0] != 2) if ( (%(neib_shape)s->dimensions)[0] != 2)
{ {
PyErr_Format(PyExc_TypeError, "neib_shape wrong shape ; has to contain 2 elements"); PyErr_Format(PyExc_TypeError, "neib_shape wrong shape ; has to"
" contain 2 elements");
%(fail)s; %(fail)s;
} }
if (%(neib_step)s->nd != 1) if (%(neib_step)s->nd != 1)
...@@ -97,7 +116,8 @@ class Images2Neibs(Op): ...@@ -97,7 +116,8 @@ class Images2Neibs(Op):
} }
if ( (%(neib_step)s->dimensions)[0] != 2) if ( (%(neib_step)s->dimensions)[0] != 2)
{ {
PyErr_Format(PyExc_TypeError, "neib_step wrong step ; has to contain 2 elements"); PyErr_Format(PyExc_TypeError,
"neib_step wrong step ; has to contain 2 elements");
%(fail)s; %(fail)s;
} }
...@@ -229,9 +249,11 @@ class Images2Neibs(Op): ...@@ -229,9 +249,11 @@ class Images2Neibs(Op):
} // END NESTED SCOPE } // END NESTED SCOPE
""" % locals() """ % locals()
def images2neibs(ten4, neib_shape, neib_step=None, mode='valid'): def images2neibs(ten4, neib_shape, neib_step=None, mode='valid'):
return Images2Neibs(mode)(ten4, neib_shape, neib_step) return Images2Neibs(mode)(ten4, neib_shape, neib_step)
def neibs2images(neibs, neib_shape, original_shape, mode='valid'): def neibs2images(neibs, neib_shape, original_shape, mode='valid'):
""" """
Inverse of images2neib. Inverse of images2neib.
...@@ -246,8 +268,10 @@ def neibs2images(neibs, neib_shape, original_shape, mode='valid'): ...@@ -246,8 +268,10 @@ def neibs2images(neibs, neib_shape, original_shape, mode='valid'):
neib_shape = T.as_tensor_variable(neib_shape) neib_shape = T.as_tensor_variable(neib_shape)
original_shape = T.as_tensor_variable(original_shape) original_shape = T.as_tensor_variable(original_shape)
new_neib_shape = T.stack(original_shape[-1] // neib_shape[1], neib_shape[1]) new_neib_shape = T.stack(original_shape[-1] // neib_shape[1],
output_2d = images2neibs(neibs.dimshuffle('x','x',0,1), new_neib_shape, mode=mode) neib_shape[1])
output_2d = images2neibs(neibs.dimshuffle('x', 'x', 0, 1),
new_neib_shape, mode=mode)
if mode == 'ignore_borders': if mode == 'ignore_borders':
valid_shape = list(original_shape) valid_shape = list(original_shape)
...@@ -255,10 +279,10 @@ def neibs2images(neibs, neib_shape, original_shape, mode='valid'): ...@@ -255,10 +279,10 @@ def neibs2images(neibs, neib_shape, original_shape, mode='valid'):
valid_shape[3] = (valid_shape[3] // neib_shape[1]) * neib_shape[1] valid_shape[3] = (valid_shape[3] // neib_shape[1]) * neib_shape[1]
output_4d = output_2d.reshape(valid_shape) output_4d = output_2d.reshape(valid_shape)
#padding the borders with zeros #padding the borders with zeros
for d in [2,3]: for d in [2, 3]:
pad_shape = list(output_4d.shape) pad_shape = list(output_4d.shape)
pad_shape[d] = original_shape[d] - valid_shape[d] pad_shape[d] = original_shape[d] - valid_shape[d]
output_4d = T.concatenate([output_4d,T.zeros(pad_shape)],axis=d) output_4d = T.concatenate([output_4d, T.zeros(pad_shape)], axis=d)
else: else:
output_4d = output_2d.reshape(original_shape) output_4d = output_2d.reshape(original_shape)
...@@ -269,7 +293,9 @@ def neibs2images(neibs, neib_shape, original_shape, mode='valid'): ...@@ -269,7 +293,9 @@ def neibs2images(neibs, neib_shape, original_shape, mode='valid'):
class GpuImages2Neibs(Images2Neibs): class GpuImages2Neibs(Images2Neibs):
def __init__(self, mode='valid'): def __init__(self, mode='valid'):
if mode not in ['valid', 'wrap_centered']: if mode not in ['valid', 'wrap_centered']:
raise NotImplementedError("Only the mode valid and wrap_centered have been implemented for the op GpuImages2Neibs") raise NotImplementedError("Only the mode valid and wrap_centered"
" have been implemented for the op"
" GpuImages2Neibs")
self.mode = mode self.mode = mode
def make_node(self, ten4, neib_shape, neib_step): def make_node(self, ten4, neib_shape, neib_step):
...@@ -277,11 +303,12 @@ class GpuImages2Neibs(Images2Neibs): ...@@ -277,11 +303,12 @@ class GpuImages2Neibs(Images2Neibs):
if not isinstance(ten4.type, CudaNdarrayType): if not isinstance(ten4.type, CudaNdarrayType):
raise TypeError('ten4 must be cudandarray', ten4) raise TypeError('ten4 must be cudandarray', ten4)
assert ten4.ndim==4 assert ten4.ndim == 4
assert neib_shape.ndim==1 assert neib_shape.ndim == 1
assert neib_step.ndim==1 assert neib_step.ndim == 1
return Apply(self, [ten4, neib_shape, neib_step], [CudaNdarrayType(broadcastable=(False,False), return Apply(self, [ten4, neib_shape, neib_step],
[CudaNdarrayType(broadcastable=(False, False),
dtype=ten4.type.dtype)()]) dtype=ten4.type.dtype)()])
def c_code_cache_version(self): def c_code_cache_version(self):
...@@ -502,7 +529,8 @@ class GpuImages2Neibs(Images2Neibs): ...@@ -502,7 +529,8 @@ class GpuImages2Neibs(Images2Neibs):
%(z)s = (CudaNdarray*)CudaNdarray_NewDims(2, dims); %(z)s = (CudaNdarray*)CudaNdarray_NewDims(2, dims);
if (!%(z)s) if (!%(z)s)
{ {
PyErr_SetString(PyExc_MemoryError, "failed to alloc z output"); PyErr_SetString(PyExc_MemoryError,
"failed to alloc z output");
%(fail)s; %(fail)s;
} }
} }
...@@ -567,7 +595,9 @@ class GpuImages2Neibs(Images2Neibs): ...@@ -567,7 +595,9 @@ class GpuImages2Neibs(Images2Neibs):
cudaError_t sts = cudaGetLastError(); cudaError_t sts = cudaGetLastError();
if (cudaSuccess != sts) if (cudaSuccess != sts)
{ {
PyErr_Format(PyExc_RuntimeError, "Cuda error: %%s: %%s. (grid: %%i x %%i; block: %%i x %%i x %%i; shared: %%i)\\n", PyErr_Format(PyExc_RuntimeError,
"Cuda error: %%s: %%s. (grid: %%i x %%i;"
" block: %%i x %%i x %%i; shared: %%i)\\n",
"k_multi_warp_%(name)s", "k_multi_warp_%(name)s",
cudaGetErrorString(sts), cudaGetErrorString(sts),
n_blocks.x, n_blocks.x,
...@@ -581,13 +611,18 @@ class GpuImages2Neibs(Images2Neibs): ...@@ -581,13 +611,18 @@ class GpuImages2Neibs(Images2Neibs):
} // END NESTED SCOPE } // END NESTED SCOPE
""" % locals() """ % locals()
def gpu_images2neibs(ten4, neib_shape, neib_step=None, mode='valid'): def gpu_images2neibs(ten4, neib_shape, neib_step=None, mode='valid'):
return GpuImages2Neibs(mode)(ten4, neib_shape, neib_step) return GpuImages2Neibs(mode)(ten4, neib_shape, neib_step)
@local_optimizer() @local_optimizer()
def use_gpu_images2neibs(node): def use_gpu_images2neibs(node):
if type(node.op) is Images2Neibs: if type(node.op) is Images2Neibs:
return [host_from_gpu(gpu_images2neibs(gpu_from_host(node.inputs[0]),node.inputs[1],node.inputs[2],mode=node.op.mode))] return [host_from_gpu(gpu_images2neibs(gpu_from_host(node.inputs[0]),
node.inputs[1], node.inputs[2],
mode=node.op.mode))]
if cuda_available: if cuda_available:
register_gpu_opt()(use_gpu_images2neibs) register_gpu_opt()(use_gpu_images2neibs)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论