提交 6bbc69e0 authored 作者: lamblin's avatar lamblin

Merge pull request #1466 from nouiz/gh1461

Gh1461
......@@ -13,8 +13,9 @@ if cuda_available:
class GpuImages2Neibs(Images2Neibs, GpuOp):
def __init__(self, mode='valid'):
if mode not in ['valid', 'wrap_centered']:
raise NotImplementedError("Only the mode valid and wrap_centered"
if mode not in ['valid', 'ignore_borders', 'wrap_centered']:
raise NotImplementedError("Only the mode valid, ignore_borders"
" and wrap_centered"
" have been implemented for the op"
" GpuImages2Neibs")
self.mode = mode
......@@ -277,6 +278,11 @@ class GpuImages2Neibs(Images2Neibs, GpuOp):
grid_c = 1+(((CudaNdarray_HOST_DIMS(%(ten4)s))[2]-c)/step_x);
//number of patch in width
grid_d = 1+(((CudaNdarray_HOST_DIMS(%(ten4)s))[3]-d)/step_y);
}else if ( "%(mode)s" == "ignore_borders") {
//number of patch in height
grid_c = 1+(((CudaNdarray_HOST_DIMS(%(ten4)s))[2]-c)/step_x);
//number of patch in width
grid_d = 1+(((CudaNdarray_HOST_DIMS(%(ten4)s))[3]-d)/step_y);
}else{
PyErr_Format(PyExc_TypeError,
"Images2Neibs: unknow mode '%(mode)s'");
......@@ -403,7 +409,8 @@ def gpu_images2neibs(ten4, neib_shape, neib_step=None, mode='valid'):
def use_gpu_images2neibs(node):
if (type(node.op) is Images2Neibs and
node.inputs[0].dtype == 'float32' and
node.op.mode in ['valid', 'wrap_centered']):
node.op.mode in ['valid', 'ignore_borders',
'wrap_centered']):
return [host_from_gpu(gpu_images2neibs(gpu_from_host(node.inputs[0]),
node.inputs[1], node.inputs[2],
mode=node.op.mode))]
......
......@@ -1337,6 +1337,8 @@ def local_gpualloc(node):
for c, idx in node.outputs[0].clients]):
# if the client is a subtensor with input on gpu or alloc
replace = True
if replace and node.inputs[0].dtype != 'float32':
replace = False
if replace:
val = node.inputs[0]
shp = node.inputs[1:]
......
#!/usr/bin/python
"""WARNING: This code is not recommanded. It is not finished, it is
slower then the version in sandbox/neighbours.py, and it do not work
on the GPU.
We only keep this version here as it is a little bit more generic, so
it cover more cases. But thoses cases aren't needed frequently, so you
probably don't want to use this version, go see neighbours.py!!!!!!!
"""
import theano
from theano import gof, Op, tensor, Variable, Apply
......@@ -150,6 +158,7 @@ class NeighbourhoodsFromImages(Op):
return out_dims, num_strides
def make_node(self, x):
x = theano.tensor.as_tensor_variable(x)
if self.inverse:
# +1 in the inverse case
if x.type.ndim != (self.n_dims_before + \
......
......@@ -41,9 +41,8 @@ class T_Images2Neibs(unittest_tools.InferShapeTester):
g = function([],
neibs2images(neibs, neib_shape, images.shape),
mode=self.mode)
if border in ['valid']:
assert any([isinstance(node.op, self.op)
for node in f.maker.fgraph.toposort()])
assert any([isinstance(node.op, self.op)
for node in f.maker.fgraph.toposort()])
#print g()
assert numpy.allclose(images.get_value(borrow=True), g())
......@@ -59,6 +58,8 @@ class T_Images2Neibs(unittest_tools.InferShapeTester):
for border in ['valid', 'ignore_borders']:
f = function([], images2neibs(images, neib_shape, mode=border),
mode=self.mode)
assert any([isinstance(node.op, self.op)
for node in f.maker.fgraph.toposort()])
#print images.get_value(borrow=True)
neibs = f()
......@@ -107,9 +108,8 @@ class T_Images2Neibs(unittest_tools.InferShapeTester):
mode=self.mode)
neibs = f()
if border in ['valid']:
assert self.op in [type(node.op)
for node in f.maker.fgraph.toposort()]
assert self.op in [type(node.op)
for node in f.maker.fgraph.toposort()]
assert numpy.allclose(neibs,
[[ 0, 1, 2, 5, 6, 7, 10, 11, 12],
......@@ -162,6 +162,8 @@ class T_Images2Neibs(unittest_tools.InferShapeTester):
images2neibs(images, neib_shape,
mode='ignore_borders'),
mode=self.mode)
assert self.op in [type(node.op)
for node in f.maker.fgraph.toposort()]
f()
def test_neibs_wrap_centered_step_manual(self):
......
......@@ -7918,6 +7918,15 @@ class Dot(Op):
xgrad = dot(gz, y.T)
ygrad = dot(x.T, gz)
# If x or y contain broadcastable dimensions but only one of
# them know that a matching dimensions is broadcastable, the
# above code don't always return the right broadcast pattern.
# This cause problem down the road. See gh-1461.
if xgrad.broadcastable != x.broadcastable:
xgrad = patternbroadcast(xgrad, x.broadcastable)
if ygrad.broadcastable != y.broadcastable:
ygrad = patternbroadcast(ygrad, y.broadcastable)
rval = xgrad, ygrad
for elem in rval:
......
......@@ -4956,15 +4956,15 @@ class t_dot(unittest.TestCase):
def test_broadcastable_patterns(self):
#
# These examples should all work because we broadcastable or no, all dimensions of all
# results have size 1.
# These examples should all work because we broadcastable or
# no, all dimensions of all results have size 1.
#
def val_for(r):
if r.dtype.startswith('complex'):
# We want to test complex at the same time, so we give a value
# To the imaginary component.
# This strange way of doing things is the only way that worked on
# numpy 1.4.1
# This strange way of doing things is the only way that worked
# on numpy 1.4.1
if r.ndim == 0:
return numpy.asarray(numpy.complex(1.1, 2.1),
dtype=r.dtype)
......@@ -4989,10 +4989,12 @@ class t_dot(unittest.TestCase):
for dtype0 in ('float32', 'float64', 'complex64', 'complex128'):
for dtype1 in ('float32', 'float64', 'complex64', 'complex128'):
for bc0 in ((True,), (False,), (True, True), (True, False), (False, True),
(False, False)):
for bc1 in ((True,), (False,), (True, True), (True, False), (False, True),
for bc0 in ((True,), (False,), (True, True),
(True, False), (False, True),
(False, False)):
for bc1 in ((True,), (False,), (True, True),
(True, False), (False, True),
(False, False)):
x = TensorType(dtype=dtype0, broadcastable=bc0)()
y = TensorType(dtype=dtype1, broadcastable=bc1)()
......@@ -5007,6 +5009,12 @@ class t_dot(unittest.TestCase):
tval = val_for(t)
f(xval, yval, tval) # debugmode checks result
if (dtype0.startswith('float') and
dtype1.startswith('float')):
g = grad(z.sum(), x)
assert g.broadcastable == x.broadcastable
g = grad(z.sum(), y)
assert g.broadcastable == y.broadcastable
class T_tensorfromscalar(unittest.TestCase):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论