Merge pull request #2741 from abergeron/fix_allocempty

Fix allocempty

Merge pull request #2741 from abergeron/fix_allocempty
81727020 · Frédéric Bastien · d9671761 · 570bab37 · 81727020 · 81727020
--- a/theano/compile/debugmode.py
+++ b/theano/compile/debugmode.py
@@ -721,6 +721,13 @@ class DataDestroyed():
 data_destroyed = DataDestroyed()


+def check_eq(var, val1, val2):
+    if hasattr(var.tag, 'values_eq_approx'):
+        return var.tag.values_eq_approx(val1, val2)
+    else:
+        return var.type.values_eq_approx(val1, val2)
+
+
 def _check_inputs(node, storage_map, r_vals, dr_vals, active_nodes,
                  clobber_dr_vals=True,
                  perform=None, warn_input_not_reused=True):
@@ -945,7 +952,10 @@ def _find_bad_optimizations0(order, reasons, r_vals):
                r_val = r_vals[r]
                assert r.type == new_r.type

-                if hasattr(new_r, 'values_eq_approx'):
+                if hasattr(new_r.tag, 'values_eq_approx'):
+                    check = new_r.tag.values_eq_approx(r_val, new_r_val)
+                elif hasattr(new_r, 'values_eq_approx'):
+                    # This way will be deprecated later, but not right now
                    check = new_r.values_eq_approx(r_val, new_r_val)
                else:
                    check = r.type.values_eq_approx(r_val, new_r_val)
@@ -1372,7 +1382,7 @@ def _check_preallocated_output(node, thunk, prealloc_modes, def_val,
            _check_viewmap(node, storage_map)

            for r in node.outputs:
-                if not r.type.values_eq_approx(r_vals[r], storage_map[r][0]):
+                if not check_eq(r, r_vals[r], storage_map[r][0]):
                    # TODO: indicate it is not a C/Py problem
                    inputs_val = [storage_map[inp][0] for inp in r.owner.inputs]
                    raise BadThunkOutput(r,
@@ -2002,22 +2012,20 @@ class _Linker(gof.link.LocalLinker):
                        # Check with Python result
                        for r in node.outputs:
                            if r in r_vals:
-                                #print >> sys.stderr, i, "DEBUGMODE clearing output", r
-                                # compares the version from thunk_py (in r_vals)
-                                # to the version produced by thunk_c (in storage_map)
-                                if not r.type.values_eq_approx(r_vals[r], storage_map[r][0]):
-                                    #import pdb; pdb.set_trace()
-                                    #r.type.values_eq_approx(r_vals[r], storage_map[r][0])
+                                # compares the version from thunk_py
+                                # (in r_vals) to the version produced
+                                # by thunk_c (in storage_map)
+                                if not check_eq(r, r_vals[r], storage_map[r][0]):
                                    inputs_val = [storage_map[inp][0] for inp in r.owner.inputs]
-                                    raise BadThunkOutput(r,
-                                            thunk1='perform', val1=r_vals[r],
-                                            thunk2='c_code', val2=storage_map[r][0],
-                                            inputs_val=inputs_val)
+                                    raise BadThunkOutput(
+                                        r, thunk1='perform', val1=r_vals[r],
+                                        thunk2='c_code', val2=storage_map[r][0],
+                                        inputs_val=inputs_val)
                            else:
-                                #print >> sys.stderr, i, "DEBUGMODE storing reference output %x" % id(storage_map[r][0])
                                # retrieve each output from the storage_map
                                r_vals[r] = storage_map[r][0]
-                            storage_map[r][0] = None  # clear the storage_map for the thunk_c
+                            # clear the storage_map for the thunk_c
+                            storage_map[r][0] = None

                        if self.maker.mode.check_preallocated_output:
                            prealloc_modes = \

--- a/theano/gof/graph.py
+++ b/theano/gof/graph.py
@@ -450,6 +450,9 @@ class Constant(Variable):
    def signature(self):
        return (self.type, self.data)

+    def merge_signature(self):
+        return self.signature()
+
    def __str__(self):
        if self.name is not None:
            return self.name

--- a/theano/gof/opt.py
+++ b/theano/gof/opt.py
@@ -499,7 +499,7 @@ class MergeFeature(object):
        """Check if a constant can be merged, and queue that replacement"""
        if id(c) in self.seen_constants:
            return
-        sig = c.signature()
+        sig = c.merge_signature()
        other_c = self.const_sig_inv.get(sig, None)
        if other_c is not None:
            # multiple names will clobber each other..

--- a/theano/gof/type.py
+++ b/theano/gof/type.py
@@ -627,11 +627,13 @@ if (py_%(name)s == NULL) { %(freefunc)s(%(name)s); }


 class CDataTypeConstant(graph.Constant):
+    def merge_signature(self):
+        # We don't want to merge constants that don't point to the
+        # same object.
+        return id(self.data)
+
    def signature(self):
-        # The Op.c_code* methoss can't access the data, so it can't
-        # change the code depending of it. So there is no need to put
-        # it in the signature. Also, under Python 2, PyCObject aren't
-        # pickable. So using the PyCObject in the signature would
-        # disable the c code cache for op that have it as an input.
+        # There is no way to put the data in the signature, so we
+        # don't even try
        return (self.type,)
 CDataType.Constant = CDataTypeConstant
--- a/theano/sandbox/cuda/basic_ops.py
+++ b/theano/sandbox/cuda/basic_ops.py
@@ -3291,7 +3291,7 @@ class GpuAllocEmpty(GpuOp):

    def make_node(self, *shape):
        shape, output = self.validate_shape(shape)
-        output.values_eq_approx = tensor.type.values_eq_approx_always_true
+        output.tag.values_eq_approx = tensor.type.values_eq_approx_always_true
        return Apply(self, shape, [output])

    def perform(self, node, inputs, out_):

--- a/theano/sandbox/cuda/tests/test_dnn.py
+++ b/theano/sandbox/cuda/tests/test_dnn.py
@@ -28,6 +28,26 @@ else:
    mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpu')


+def test_dnn_conv_desc_merge():
+    img_shp = T.as_tensor_variable(
+        numpy.asarray([2, 1, 8, 8]).astype('int64'))
+    kern_shp = T.as_tensor_variable(
+        numpy.asarray([3, 1, 2, 2]).astype('int64'))
+    desc1 = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(2, 2),
+                               conv_mode='conv')(img_shp, kern_shp)
+    desc2 = dnn.GpuDnnConvDesc(border_mode='full', subsample=(1, 1),
+                               conv_mode='cross')(img_shp, kern_shp)
+    # CDataType is not DeepCopyable so this will crash if we don't use
+    # borrow=True
+    f = theano.function([], [theano.Out(desc1, borrow=True),
+                             theano.Out(desc2, borrow=True)])
+
+    d1, d2 = f()
+
+    # This will be the case if they are merged, which would be bad.
+    assert d1 != d2
+
+
 def pool_2d_i2n(input, ds=(2, 2), strides=None,
                pad=(0, 0),
                pool_function=T.max, mode='ignore_borders'):