提交 6bbc69e0 authored 作者: lamblin's avatar lamblin

Merge pull request #1466 from nouiz/gh1461

Gh1461
...@@ -13,8 +13,9 @@ if cuda_available: ...@@ -13,8 +13,9 @@ if cuda_available:
class GpuImages2Neibs(Images2Neibs, GpuOp): class GpuImages2Neibs(Images2Neibs, GpuOp):
def __init__(self, mode='valid'): def __init__(self, mode='valid'):
if mode not in ['valid', 'wrap_centered']: if mode not in ['valid', 'ignore_borders', 'wrap_centered']:
raise NotImplementedError("Only the mode valid and wrap_centered" raise NotImplementedError("Only the mode valid, ignore_borders"
" and wrap_centered"
" have been implemented for the op" " have been implemented for the op"
" GpuImages2Neibs") " GpuImages2Neibs")
self.mode = mode self.mode = mode
...@@ -277,6 +278,11 @@ class GpuImages2Neibs(Images2Neibs, GpuOp): ...@@ -277,6 +278,11 @@ class GpuImages2Neibs(Images2Neibs, GpuOp):
grid_c = 1+(((CudaNdarray_HOST_DIMS(%(ten4)s))[2]-c)/step_x); grid_c = 1+(((CudaNdarray_HOST_DIMS(%(ten4)s))[2]-c)/step_x);
//number of patch in width //number of patch in width
grid_d = 1+(((CudaNdarray_HOST_DIMS(%(ten4)s))[3]-d)/step_y); grid_d = 1+(((CudaNdarray_HOST_DIMS(%(ten4)s))[3]-d)/step_y);
}else if ( "%(mode)s" == "ignore_borders") {
//number of patch in height
grid_c = 1+(((CudaNdarray_HOST_DIMS(%(ten4)s))[2]-c)/step_x);
//number of patch in width
grid_d = 1+(((CudaNdarray_HOST_DIMS(%(ten4)s))[3]-d)/step_y);
}else{ }else{
PyErr_Format(PyExc_TypeError, PyErr_Format(PyExc_TypeError,
"Images2Neibs: unknow mode '%(mode)s'"); "Images2Neibs: unknow mode '%(mode)s'");
...@@ -403,7 +409,8 @@ def gpu_images2neibs(ten4, neib_shape, neib_step=None, mode='valid'): ...@@ -403,7 +409,8 @@ def gpu_images2neibs(ten4, neib_shape, neib_step=None, mode='valid'):
def use_gpu_images2neibs(node): def use_gpu_images2neibs(node):
if (type(node.op) is Images2Neibs and if (type(node.op) is Images2Neibs and
node.inputs[0].dtype == 'float32' and node.inputs[0].dtype == 'float32' and
node.op.mode in ['valid', 'wrap_centered']): node.op.mode in ['valid', 'ignore_borders',
'wrap_centered']):
return [host_from_gpu(gpu_images2neibs(gpu_from_host(node.inputs[0]), return [host_from_gpu(gpu_images2neibs(gpu_from_host(node.inputs[0]),
node.inputs[1], node.inputs[2], node.inputs[1], node.inputs[2],
mode=node.op.mode))] mode=node.op.mode))]
......
...@@ -1337,6 +1337,8 @@ def local_gpualloc(node): ...@@ -1337,6 +1337,8 @@ def local_gpualloc(node):
for c, idx in node.outputs[0].clients]): for c, idx in node.outputs[0].clients]):
# if the client is a subtensor with input on gpu or alloc # if the client is a subtensor with input on gpu or alloc
replace = True replace = True
if replace and node.inputs[0].dtype != 'float32':
replace = False
if replace: if replace:
val = node.inputs[0] val = node.inputs[0]
shp = node.inputs[1:] shp = node.inputs[1:]
......
#!/usr/bin/python #!/usr/bin/python
"""WARNING: This code is not recommanded. It is not finished, it is
slower then the version in sandbox/neighbours.py, and it do not work
on the GPU.
We only keep this version here as it is a little bit more generic, so
it cover more cases. But thoses cases aren't needed frequently, so you
probably don't want to use this version, go see neighbours.py!!!!!!!
"""
import theano import theano
from theano import gof, Op, tensor, Variable, Apply from theano import gof, Op, tensor, Variable, Apply
...@@ -150,6 +158,7 @@ class NeighbourhoodsFromImages(Op): ...@@ -150,6 +158,7 @@ class NeighbourhoodsFromImages(Op):
return out_dims, num_strides return out_dims, num_strides
def make_node(self, x): def make_node(self, x):
x = theano.tensor.as_tensor_variable(x)
if self.inverse: if self.inverse:
# +1 in the inverse case # +1 in the inverse case
if x.type.ndim != (self.n_dims_before + \ if x.type.ndim != (self.n_dims_before + \
......
...@@ -41,7 +41,6 @@ class T_Images2Neibs(unittest_tools.InferShapeTester): ...@@ -41,7 +41,6 @@ class T_Images2Neibs(unittest_tools.InferShapeTester):
g = function([], g = function([],
neibs2images(neibs, neib_shape, images.shape), neibs2images(neibs, neib_shape, images.shape),
mode=self.mode) mode=self.mode)
if border in ['valid']:
assert any([isinstance(node.op, self.op) assert any([isinstance(node.op, self.op)
for node in f.maker.fgraph.toposort()]) for node in f.maker.fgraph.toposort()])
...@@ -59,6 +58,8 @@ class T_Images2Neibs(unittest_tools.InferShapeTester): ...@@ -59,6 +58,8 @@ class T_Images2Neibs(unittest_tools.InferShapeTester):
for border in ['valid', 'ignore_borders']: for border in ['valid', 'ignore_borders']:
f = function([], images2neibs(images, neib_shape, mode=border), f = function([], images2neibs(images, neib_shape, mode=border),
mode=self.mode) mode=self.mode)
assert any([isinstance(node.op, self.op)
for node in f.maker.fgraph.toposort()])
#print images.get_value(borrow=True) #print images.get_value(borrow=True)
neibs = f() neibs = f()
...@@ -107,7 +108,6 @@ class T_Images2Neibs(unittest_tools.InferShapeTester): ...@@ -107,7 +108,6 @@ class T_Images2Neibs(unittest_tools.InferShapeTester):
mode=self.mode) mode=self.mode)
neibs = f() neibs = f()
if border in ['valid']:
assert self.op in [type(node.op) assert self.op in [type(node.op)
for node in f.maker.fgraph.toposort()] for node in f.maker.fgraph.toposort()]
...@@ -162,6 +162,8 @@ class T_Images2Neibs(unittest_tools.InferShapeTester): ...@@ -162,6 +162,8 @@ class T_Images2Neibs(unittest_tools.InferShapeTester):
images2neibs(images, neib_shape, images2neibs(images, neib_shape,
mode='ignore_borders'), mode='ignore_borders'),
mode=self.mode) mode=self.mode)
assert self.op in [type(node.op)
for node in f.maker.fgraph.toposort()]
f() f()
def test_neibs_wrap_centered_step_manual(self): def test_neibs_wrap_centered_step_manual(self):
......
...@@ -7918,6 +7918,15 @@ class Dot(Op): ...@@ -7918,6 +7918,15 @@ class Dot(Op):
xgrad = dot(gz, y.T) xgrad = dot(gz, y.T)
ygrad = dot(x.T, gz) ygrad = dot(x.T, gz)
# If x or y contain broadcastable dimensions but only one of
# them know that a matching dimensions is broadcastable, the
# above code don't always return the right broadcast pattern.
# This cause problem down the road. See gh-1461.
if xgrad.broadcastable != x.broadcastable:
xgrad = patternbroadcast(xgrad, x.broadcastable)
if ygrad.broadcastable != y.broadcastable:
ygrad = patternbroadcast(ygrad, y.broadcastable)
rval = xgrad, ygrad rval = xgrad, ygrad
for elem in rval: for elem in rval:
......
...@@ -4956,15 +4956,15 @@ class t_dot(unittest.TestCase): ...@@ -4956,15 +4956,15 @@ class t_dot(unittest.TestCase):
def test_broadcastable_patterns(self): def test_broadcastable_patterns(self):
# #
# These examples should all work because we broadcastable or no, all dimensions of all # These examples should all work because we broadcastable or
# results have size 1. # no, all dimensions of all results have size 1.
# #
def val_for(r): def val_for(r):
if r.dtype.startswith('complex'): if r.dtype.startswith('complex'):
# We want to test complex at the same time, so we give a value # We want to test complex at the same time, so we give a value
# To the imaginary component. # To the imaginary component.
# This strange way of doing things is the only way that worked on # This strange way of doing things is the only way that worked
# numpy 1.4.1 # on numpy 1.4.1
if r.ndim == 0: if r.ndim == 0:
return numpy.asarray(numpy.complex(1.1, 2.1), return numpy.asarray(numpy.complex(1.1, 2.1),
dtype=r.dtype) dtype=r.dtype)
...@@ -4989,9 +4989,11 @@ class t_dot(unittest.TestCase): ...@@ -4989,9 +4989,11 @@ class t_dot(unittest.TestCase):
for dtype0 in ('float32', 'float64', 'complex64', 'complex128'): for dtype0 in ('float32', 'float64', 'complex64', 'complex128'):
for dtype1 in ('float32', 'float64', 'complex64', 'complex128'): for dtype1 in ('float32', 'float64', 'complex64', 'complex128'):
for bc0 in ((True,), (False,), (True, True), (True, False), (False, True), for bc0 in ((True,), (False,), (True, True),
(True, False), (False, True),
(False, False)): (False, False)):
for bc1 in ((True,), (False,), (True, True), (True, False), (False, True), for bc1 in ((True,), (False,), (True, True),
(True, False), (False, True),
(False, False)): (False, False)):
x = TensorType(dtype=dtype0, broadcastable=bc0)() x = TensorType(dtype=dtype0, broadcastable=bc0)()
...@@ -5007,6 +5009,12 @@ class t_dot(unittest.TestCase): ...@@ -5007,6 +5009,12 @@ class t_dot(unittest.TestCase):
tval = val_for(t) tval = val_for(t)
f(xval, yval, tval) # debugmode checks result f(xval, yval, tval) # debugmode checks result
if (dtype0.startswith('float') and
dtype1.startswith('float')):
g = grad(z.sum(), x)
assert g.broadcastable == x.broadcastable
g = grad(z.sum(), y)
assert g.broadcastable == y.broadcastable
class T_tensorfromscalar(unittest.TestCase): class T_tensorfromscalar(unittest.TestCase):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论