Merge pull request #4258 from lamblin/fix_debugmode_dnn

Fixes for debugmode in dnn and gpuarray

Merge pull request #4258 from lamblin/fix_debugmode_dnn
260fc9a7 · Frédéric Bastien · 153b0dfb · 32645f15 · 260fc9a7 · 260fc9a7
--- a/theano/sandbox/cuda/dnn.py
+++ b/theano/sandbox/cuda/dnn.py
@@ -1322,9 +1322,16 @@ class GpuDnnPoolDesc(GpuOp):
        if self.pad != (0, 0) and version() == -1:
            raise RuntimeError("CuDNN pooling with padding requires CuDNN v2")

-        return Apply(self, [],
+        node = Apply(self, [],
                     [CDataType("cudnnPoolingDescriptor_t",
                                freefunc="cudnnDestroyPoolingDescriptor")()])
+        # DebugMode cannot compare the values of CDataType variables, so by
+        # default it returns False all the time. To prevent DebugMode from
+        # complaining because of the MergeOptimizer, we make this variable
+        # always compare to True.
+        out = node.outputs[0]
+        out.tag.values_eq_approx = tensor.type.values_eq_approx_always_true
+        return node

    def c_code(self, node, name, inputs, outputs, sub):
        desc, = outputs

--- a/theano/sandbox/cuda/tests/test_dnn.py
+++ b/theano/sandbox/cuda/tests/test_dnn.py
@@ -412,6 +412,13 @@ def test_pooling3d():
    if not cuda.dnn.dnn_available() or cuda.dnn.version() < (3000, 3000):
        raise SkipTest(cuda.dnn.dnn_available.msg)

+    # For max pooling pool3d2d explicitly pads the input with
+    # -inf. Because of this, the compilation mode for the function
+    # that uses pool3d2d should not check for infinite values or
+    # it will falsely believe there is a error in the graph.
+    mode_without_gpu2 = mode_without_gpu.including()
+    mode_without_gpu2.check_isfinite = False
+
    # 'average_exc_pad' is disabled for versions < 4004
    if cuda.dnn.version() < (4004, 4004):
        modes = ('max', 'average_inc_pad')
@@ -447,13 +454,6 @@ def test_pooling3d():
                                strides=(stride, stride, stride),
                                pad=pad, pool_func=func)

-                # For max pooling pool3d2d explicitly pads the input with
-                # -inf. Because of this, the compilation mode for the function
-                # that uses pool3d2d should not check for infinite values or
-                # it will falsely believe there is a error in the graph.
-                mode_without_gpu2 = mode_without_gpu.including()
-                mode_without_gpu2.check_isfinite = False
-
                f1 = theano.function([x], out1, mode=mode_with_gpu)
                assert any([isinstance(node.op, cuda.dnn.GpuDnnPool)
                            for node in f1.maker.fgraph.apply_nodes])
@@ -512,7 +512,7 @@ def test_pooling3d():
                           strides=(stride, stride, stride),
                           pad=pad, pool_func=func)
            fc = theano.function([x], theano.grad(out.sum(), x),
-                                 mode=mode_without_gpu)
+                                 mode=mode_without_gpu2)
            c_out = fc(data)
            utt.assert_allclose(c_out, g_out)


--- a/theano/sandbox/gpuarray/basic_ops.py
+++ b/theano/sandbox/gpuarray/basic_ops.py
@@ -996,6 +996,8 @@ class GpuJoin(HideC, Join):
    def perform(self, node, axis_and_tensors, out_, ctx):
        out, = out_
        axis = int(axis_and_tensors[0])
+        if axis < 0:
+            axis += axis_and_tensors[1].ndim
        tensors = axis_and_tensors[1:]
        out[0] = pygpu.concatenate(tensors, axis=axis, context=ctx).astype(
            node.outputs[0].dtype)

--- a/theano/sandbox/gpuarray/blas.py
+++ b/theano/sandbox/gpuarray/blas.py
@@ -287,7 +287,8 @@ class GpuDot22(BlasOp):
    def perform(self, node, inputs, outputs):
        x, y = inputs

-        out = pygpu.empty((x.shape[0], y.shape[1]), dtype=x.dtype)
+        out = pygpu.empty((x.shape[0], y.shape[1]), dtype=x.dtype,
+                          context=x.context)
        outputs[0][0] = blas.gemm(1., x, y, 0., out,
                                  overwrite_c=True)


--- a/theano/sandbox/gpuarray/dnn.py
+++ b/theano/sandbox/gpuarray/dnn.py
@@ -307,9 +307,16 @@ class GpuDnnConvDesc(COp):
        if kern_shape.type.ndim != 1 or kern_shape.type.dtype != 'int64':
            raise TypeError('kern must be 1D shape tensor')

-        return Apply(self, [kern_shape],
+        node = Apply(self, [kern_shape],
                     [CDataType("cudnnConvolutionDescriptor_t",
                                freefunc="cudnnDestroyConvolutionDescriptor")()])
+        # DebugMode cannot compare the values of CDataType variables, so by
+        # default it returns False all the time. To prevent DebugMode from
+        # complaining because of the MergeOptimizer, we make this variable
+        # always compare to True.
+        out = node.outputs[0]
+        out.tag.values_eq_approx = tensor.type.values_eq_approx_always_true
+        return node

    def get_op_params(self):
        pad0 = '0'
@@ -998,9 +1005,16 @@ class GpuDnnPoolDesc(Op):
            self.pad = (0, 0)

    def make_node(self):
-        return Apply(self, [],
+        node = Apply(self, [],
                     [CDataType("cudnnPoolingDescriptor_t",
                                freefunc="cudnnDestroyPoolingDescriptor")()])
+        # DebugMode cannot compare the values of CDataType variables, so by
+        # default it returns False all the time. To prevent DebugMode from
+        # complaining because of the MergeOptimizer, we make this variable
+        # always compare to True.
+        out = node.outputs[0]
+        out.tag.values_eq_approx = tensor.type.values_eq_approx_always_true
+        return node

    def c_code(self, node, name, inputs, outputs, sub):
        desc, = outputs

--- a/theano/sandbox/gpuarray/neighbours.py
+++ b/theano/sandbox/gpuarray/neighbours.py
@@ -462,6 +462,10 @@ class GpuImages2Neibs(GpuKernelBase, Images2Neibs, Op):
        } // END NESTED SCOPE
        """ % locals()

+    def perform(self, node, inp, out, ctx):
+        # Disable the perform method from the CPU version
+        Op.perform(self, node, inp, out, ctx)
+

 @op_lifter([Images2Neibs])
 def use_gpu_images2neibs(node, context_name):

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -3884,11 +3884,7 @@ class T_Join_and_Split(unittest.TestCase):
        got = f(-2)
        assert numpy.allclose(got, want)

-        try:
-            got = f(-3)
-            assert False
-        except IndexError:
-            pass
+        self.assertRaises((IndexError, OverflowError), f, -3)

    def test_join_matrixC_negative_axis(self):
        """constant join negative axis"""
@@ -3920,11 +3916,7 @@ class T_Join_and_Split(unittest.TestCase):
        got = f()
        assert numpy.allclose(got, want)

-        try:
-            s = join(-3, a, b)
-            assert False
-        except IndexError:
-            pass
+        self.assertRaises((IndexError, OverflowError), join, -3, a, b)

        utt.verify_grad(lambda a, b: join(-1, a, b), [v, 2 * v],
                        mode=self.mode)