Merge pull request #1466 from nouiz/gh1461

Gh1461

Merge pull request #1466 from nouiz/gh1461
6bbc69e0 · lamblin · 39db1f8e · 2c258623 · 6bbc69e0 · 6bbc69e0
--- a/theano/sandbox/cuda/neighbours.py
+++ b/theano/sandbox/cuda/neighbours.py
@@ -13,8 +13,9 @@ if cuda_available:
 class GpuImages2Neibs(Images2Neibs, GpuOp):
    def __init__(self, mode='valid'):
-        if mode not in ['valid', 'wrap_centered']:
+        if mode not in ['valid', 'ignore_borders', 'wrap_centered']:
-            raise NotImplementedError("Only the mode valid and wrap_centered"
+            raise NotImplementedError("Only the mode valid, ignore_borders"
+                                      " and wrap_centered"
                                      " have been implemented for the op"
                                      " GpuImages2Neibs")
        self.mode = mode
@@ -277,6 +278,11 @@ class GpuImages2Neibs(Images2Neibs, GpuOp):
                grid_c = 1+(((CudaNdarray_HOST_DIMS(%(ten4)s))[2]-c)/step_x);
                //number of patch in width
                grid_d = 1+(((CudaNdarray_HOST_DIMS(%(ten4)s))[3]-d)/step_y);
+            }else if ( "%(mode)s" == "ignore_borders") {
+                //number of patch in height
+                grid_c = 1+(((CudaNdarray_HOST_DIMS(%(ten4)s))[2]-c)/step_x);
+                //number of patch in width
+                grid_d = 1+(((CudaNdarray_HOST_DIMS(%(ten4)s))[3]-d)/step_y);
            }else{
                PyErr_Format(PyExc_TypeError,
                             "Images2Neibs: unknow mode '%(mode)s'");
@@ -403,7 +409,8 @@ def gpu_images2neibs(ten4, neib_shape, neib_step=None, mode='valid'):
 def use_gpu_images2neibs(node):
    if (type(node.op) is Images2Neibs and
        node.inputs[0].dtype == 'float32' and
-        node.op.mode in ['valid', 'wrap_centered']):
+        node.op.mode in ['valid', 'ignore_borders',
+                         'wrap_centered']):
        return [host_from_gpu(gpu_images2neibs(gpu_from_host(node.inputs[0]),
                                               node.inputs[1], node.inputs[2],
                                               mode=node.op.mode))]

--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -1337,6 +1337,8 @@ def local_gpualloc(node):
                for c, idx in node.outputs[0].clients]):
            # if the client is a subtensor with input on gpu or alloc
            replace = True
+        if replace and node.inputs[0].dtype != 'float32':
+            replace = False
    if replace:
        val = node.inputs[0]
        shp = node.inputs[1:]

--- a/theano/sandbox/neighbourhoods.py
+++ b/theano/sandbox/neighbourhoods.py
 #!/usr/bin/python
+"""WARNING: This code is not recommanded. It is not finished, it is
+slower then the version in sandbox/neighbours.py, and it do not work
+on the GPU.
+We only keep this version here as it is a little bit more generic, so
+it cover more cases. But thoses cases aren't needed frequently, so you
+probably don't want to use this version, go see neighbours.py!!!!!!!
+"""
 import theano
 from theano import gof, Op, tensor, Variable, Apply
@@ -150,6 +158,7 @@ class NeighbourhoodsFromImages(Op):
        return out_dims, num_strides
    def make_node(self, x):
+        x = theano.tensor.as_tensor_variable(x)
        if self.inverse:
            # +1 in the inverse case
            if x.type.ndim != (self.n_dims_before + \

--- a/theano/sandbox/test_neighbours.py
+++ b/theano/sandbox/test_neighbours.py
@@ -41,7 +41,6 @@ class T_Images2Neibs(unittest_tools.InferShapeTester):
                    g = function([],
                                 neibs2images(neibs, neib_shape, images.shape),
                                 mode=self.mode)
-                    if border in ['valid']:
                    assert any([isinstance(node.op, self.op)
                                for node in f.maker.fgraph.toposort()])
@@ -59,6 +58,8 @@ class T_Images2Neibs(unittest_tools.InferShapeTester):
            for border in ['valid', 'ignore_borders']:
                f = function([], images2neibs(images, neib_shape, mode=border),
                             mode=self.mode)
+                assert any([isinstance(node.op, self.op)
+                            for node in f.maker.fgraph.toposort()])
                #print images.get_value(borrow=True)
                neibs = f()
@@ -107,7 +108,6 @@ class T_Images2Neibs(unittest_tools.InferShapeTester):
                             mode=self.mode)
                neibs = f()
-                if border in ['valid']:
                assert self.op in [type(node.op)
                                   for node in f.maker.fgraph.toposort()]
@@ -162,6 +162,8 @@ class T_Images2Neibs(unittest_tools.InferShapeTester):
                             images2neibs(images, neib_shape,
                                          mode='ignore_borders'),
                             mode=self.mode)
+                assert self.op in [type(node.op)
+                                   for node in f.maker.fgraph.toposort()]
                f()
    def test_neibs_wrap_centered_step_manual(self):

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -7918,6 +7918,15 @@ class Dot(Op):
            xgrad = dot(gz, y.T)
            ygrad = dot(x.T, gz)
+        # If x or y contain broadcastable dimensions but only one of
+        # them know that a matching dimensions is broadcastable, the
+        # above code don't always return the right broadcast pattern.
+        # This cause problem down the road. See gh-1461.
+        if xgrad.broadcastable != x.broadcastable:
+            xgrad = patternbroadcast(xgrad, x.broadcastable)
+        if ygrad.broadcastable != y.broadcastable:
+            ygrad = patternbroadcast(ygrad, y.broadcastable)
        rval = xgrad, ygrad
        for elem in rval:

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -4956,15 +4956,15 @@ class t_dot(unittest.TestCase):
    def test_broadcastable_patterns(self):
        #
-        # These examples should all work because we broadcastable or no, all dimensions of all
+        # These examples should all work because we broadcastable or
-        # results have size 1.
+        # no, all dimensions of all results have size 1.
        #
        def val_for(r):
            if r.dtype.startswith('complex'):
                # We want to test complex at the same time, so we give a value
                # To the imaginary component.
-                # This strange way of doing things is the only way that worked on
+                # This strange way of doing things is the only way that worked
-                # numpy 1.4.1
+                # on numpy 1.4.1
                if r.ndim == 0:
                    return numpy.asarray(numpy.complex(1.1, 2.1),
                                         dtype=r.dtype)
@@ -4989,9 +4989,11 @@ class t_dot(unittest.TestCase):
        for dtype0 in ('float32', 'float64', 'complex64', 'complex128'):
            for dtype1 in ('float32', 'float64', 'complex64', 'complex128'):
-                for bc0 in ((True,), (False,), (True, True), (True, False), (False, True),
+                for bc0 in ((True,), (False,), (True, True),
+                            (True, False), (False, True),
                            (False, False)):
-                    for bc1 in ((True,), (False,), (True, True), (True, False), (False, True),
+                    for bc1 in ((True,), (False,), (True, True),
+                                (True, False), (False, True),
                                (False, False)):
                        x = TensorType(dtype=dtype0, broadcastable=bc0)()
@@ -5007,6 +5009,12 @@ class t_dot(unittest.TestCase):
                        tval = val_for(t)
                        f(xval, yval, tval)  # debugmode checks result
+                        if (dtype0.startswith('float') and
+                            dtype1.startswith('float')):
+                            g = grad(z.sum(), x)
+                            assert g.broadcastable == x.broadcastable
+                            g = grad(z.sum(), y)
+                            assert g.broadcastable == y.broadcastable
 class T_tensorfromscalar(unittest.TestCase):