Merge pull request #5854 from lamblin/fix_div0_crash

Fix hard crash when dividing by 0

Merge pull request #5854 from lamblin/fix_div0_crash
318dd6af · Pascal Lamblin · GitHub · d2c753f8 · 77205b27 · 318dd6af
--- a/theano/scalar/basic.py
+++ b/theano/scalar/basic.py
@@ -1924,8 +1924,20 @@ class IntDiv(BinaryScalarOp):
    def c_code(self, node, name, inputs, outputs, sub):
        (x, y) = inputs
        (z,) = outputs
+        fail = sub['fail']
        t = node.inputs[0].type.upcast(*[i.type for i in node.inputs[1:]])
        if t in imap(str, discrete_types):
+            # If we are in a gpuarray kernel, %(fail)s exits the kernel,
+            # and we do not have any error report, and we cannot set
+            # Python error messages either, so for now we just call the
+            # cuda function, which return a binary pattern of all 1s.
+            check = dedent('''
+                #ifndef KERNEL
+                    PyErr_SetString(PyExc_ZeroDivisionError, "integer division by zero");
+                    %(fail)s
+                #endif
+                ''') % locals()
            x_div_y_pp = '(%(x)s / %(y)s)' % locals()
            x_div_y_mp = '((-%(x)s) / %(y)s)' % locals()
            x_mod_y_mp = 'THEANO_MACRO_MOD((-%(x)s), %(y)s)' % locals()
@@ -1944,6 +1956,7 @@ class IntDiv(BinaryScalarOp):
            else:
                raise NotImplementedError('type not supported', t)
+            check = ''
            x_div_y_pp = '%(floor)s(%(x)s / %(y)s)' % locals()
            x_div_y_mp = '%(floor)s((-%(x)s) / %(y)s)' % locals()
            x_mod_y_mp = '%(fmod)s((-%(x)s), %(y)s)' % locals()
@@ -1956,15 +1969,18 @@ class IntDiv(BinaryScalarOp):
            raise NotImplementedError('type not supported', t)
        return dedent("""
-            if (%(x)s < 0) {
+            if (%(y)s == 0) {
-                if (%(y)s < 0) {
+                %(check)s
+                %(z)s = %(x_div_y_pp)s;
+            } else if (%(y)s < 0) {
+                if (%(x)s < 0) {
                    %(z)s = %(x_div_y_mm)s;
                } else {
-                    %(z)s = - %(x_div_y_mp)s - ((%(x_mod_y_mp)s == 0) ? 0 : 1);
+                    %(z)s = - %(x_div_y_pm)s - ((%(x_mod_y_pm)s == 0) ? 0 : 1);
                }
            } else {
-                if (%(y)s < 0) {
+                if (%(x)s < 0) {
-                    %(z)s = - %(x_div_y_pm)s - ((%(x_mod_y_pm)s == 0) ? 0 : 1);
+                    %(z)s = - %(x_div_y_mp)s - ((%(x_mod_y_mp)s == 0) ? 0 : 1);
                } else {
                    %(z)s = %(x_div_y_pp)s;
                }
@@ -1972,7 +1988,7 @@ class IntDiv(BinaryScalarOp):
            """) % locals()
    def c_code_cache_version(self):
-        return (2,)
+        return (5,)
    def grad(self, inputs, g_output):
        return [inp.zeros_like(dtype=theano.config.floatX)
@@ -2004,13 +2020,13 @@ class Mod(BinaryScalarOp):
        return x % y
    def c_code_cache_version(self):
-        return (5,)
+        return (8,)
    def c_support_code(self):
        # We use a macro as python use % as a special string character,
        # and the output of c_code may be run through another level
        # of string formatting.
-        return "#define THEANO_MACRO_MOD(x,y) (x % y)"
+        return "#define THEANO_MACRO_MOD(x, y) (x % y)"
    def c_code(self, node, name, inputs, outputs, sub):
        """
@@ -2020,6 +2036,7 @@ class Mod(BinaryScalarOp):
        """
        (x, y) = inputs
        (z,) = outputs
+        fail = sub['fail']
        t = node.inputs[0].type.upcast(*[i.type for i in node.inputs[1:]])
        if (str(t) in imap(str, discrete_types) or
                t in ['uint8', 'int8', 'uint16', 'int16'] or
@@ -2029,6 +2046,16 @@ class Mod(BinaryScalarOp):
            # keep them out of safety, and verify they are useless with an
            # assert.
            assert str(t) in imap(str, discrete_types)
+            # If we are in a gpuarray kernel, %(fail)s exits the kernel,
+            # and we do not have any error report, and we cannot set
+            # Python error messages either, so for now we just call the
+            # cuda function, returning a binary pattern depending on dtype
+            check = dedent('''
+                #ifndef KERNEL
+                    PyErr_SetString(PyExc_ZeroDivisionError, "integer modulo by zero");
+                    %(fail)s
+                #endif
+                ''') % locals()
            x_mod_y = "THEANO_MACRO_MOD(%(x)s, %(y)s)" % locals()
            x_mod_ymm = "THEANO_MACRO_MOD(-%(x)s, -%(y)s)" % locals()
            x_mod_ypm = "THEANO_MACRO_MOD(%(x)s, -%(y)s)" % locals()
@@ -2040,26 +2067,32 @@ class Mod(BinaryScalarOp):
            # keep them out of safety, and verify they are useless with an
            # assert.
            assert str(t) in imap(str, float_types)
-            x_mod_y = "fmod(%(x)s,%(y)s)" % locals()
+            check = ''
-            x_mod_ymm = "fmod(-%(x)s,-%(y)s)" % locals()
+            x_mod_y = "fmod(%(x)s, %(y)s)" % locals()
-            x_mod_ypm = "fmod(%(x)s,-%(y)s)" % locals()
+            x_mod_ymm = "fmod(-%(x)s, -%(y)s)" % locals()
-            x_mod_ymp = "fmod(-%(x)s,%(y)s)" % locals()
+            x_mod_ypm = "fmod(%(x)s, -%(y)s)" % locals()
+            x_mod_ymp = "fmod(-%(x)s, %(y)s)" % locals()
        elif str(t) in imap(str, complex_types):
            raise self.complex_error
        else:
            raise NotImplementedError('type not supported', t)
        return dedent("""
-            if (%(x)s < 0){
+            if (%(y)s == 0) {
-               if (%(y)s < 0){
+                %(check)s
-                  %(z)s = -(%(x_mod_ymm)s);
+                %(z)s = %(x_mod_y)s;
-               }else{
+            } else if (%(y)s < 0){
-                  %(z)s = - %(x_mod_ymp)s + (%(x_mod_ymp)s != 0 ? %(y)s : 0);
+                if (%(x)s < 0){
-               }
+                    %(z)s = -(%(x_mod_ymm)s);
-            }else if (%(y)s < 0){
+                } else {
-               %(z)s = (%(x_mod_ypm)s) + (%(x_mod_ypm)s != 0 ? %(y)s : 0);
+                    %(z)s = (%(x_mod_ypm)s) + (%(x_mod_ypm)s != 0 ? %(y)s : 0);
-            }else{
+                }
-               %(z)s = %(x_mod_y)s;
+            } else {
+                if (%(x)s < 0){
+                    %(z)s = - %(x_mod_ymp)s + (%(x_mod_ymp)s != 0 ? %(y)s : 0);
+                } else {
+                    %(z)s = %(x_mod_y)s;
+                }
            }
            """) % locals()
@@ -3696,8 +3729,9 @@ class Composite(ScalarOp):
    def init_c_code(self):
        """
-        Return the C code for this Composite Op.
+        Assemble the C code for this Composite Op.
+        The result is assigned to `self._c_code`.
        """
        # It was already called
        if hasattr(self, '_c_code'):

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -4822,33 +4822,46 @@ class Reshape(Op):
            return [(1,) * self.ndim]
        requ = node.inputs[1]
+        input_size = mul(*ishapes[0])
        if isinstance(requ, theano.tensor.TensorConstant):
            requ = list(requ.data)
            requ_part = [ele for ele in requ if ele != -1]
            crit = len(requ) - len(requ_part)
            if crit == 1 and len(requ_part) > 0:
-                missing = mul(*ishapes[0]) // mul(*requ_part)
+                # If there are both 0 and -1 in requ_size, it is impossible
+                # to determine a right output, but we can at least prevent
+                # a division by 0. We do not want to keep a negative
+                # size here as it could lead to further weird errors
+                # after other optimizations.
+                requ_size = mul(*requ_part)
+                missing = input_size // (1 if requ_size == 0 else requ_size)
                for i, ele in enumerate(requ):
                    if ele == -1:
                        requ[i] = missing
            elif crit == 1:  # we reshape to -1
-                requ = [mul(*ishapes[0])] if ishapes[0] else [1]
+                requ = [input_size] if ishapes[0] else [1]
            elif crit > 1:
                raise ValueError('shape argument to Reshape.perform'
                                 ' must have at most one entry equal to -1')
            return [requ]
        else:
-            new_dims = [node.inputs[1][i] for i in xrange(self.ndim)]
+            requ = [requ[i] for i in xrange(self.ndim)]
            # since new_dims can have negative value (-1), the
            # multiplication of all values should be negated
            # to give a positive value.
            # To avoid optimization complexity, we avoid checking
            # for the case when there are two or more '-1' values.
            if self.ndim:
-                rest_size = (mul(*ishapes[0]) // -mul(*new_dims))
+                requ_size = -mul(*requ)
-            return [tuple([switch(eq(new_dims[i], -1),
+                # If there are both 0 and -1 in requ_size, it is impossible
+                # to determine a right output, but we can at least prevent
+                # a division by 0. We do not want to keep a negative
+                # size here as it could lead to further weird errors
+                # after other optimizations.
+                rest_size = input_size // maximum(requ_size, 1)
+            return [tuple([switch(eq(requ[i], -1),
                                  rest_size,
-                                  new_dims[i])
+                                  requ[i])
                           for i in xrange(self.ndim)])]
    def c_code_cache_version(self):

--- a/theano/tensor/nnet/tests/test_neighbours.py
+++ b/theano/tensor/nnet/tests/test_neighbours.py
@@ -363,7 +363,8 @@ class T_Images2Neibs(unittest_tools.InferShapeTester):
            self.assertRaises(ValueError, f, neibs,
                              (1, 1, 3, 320, 320))
            # End up with a step of 0
-            self.assertRaises(ValueError, f, neibs,
+            # This can lead to division by zero in DebugMode
+            self.assertRaises((ValueError, ZeroDivisionError), f, neibs,
                              (3, 320, 320, 1))
    def speed_neibs(self):

--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -7154,7 +7154,7 @@ def local_elemwise_fusion_op(OP, max_input_fct=lambda node: 32,
                                                "test_presence_of_c_code",
                                                ["x" for x in i.owner.inputs],
                                                ["z" for z in i.owner.outputs],
-                                                {})
+                                                {"fail": "%(fail)s"})
                except MethodNotDefined:
                    catch = True
                except NotImplementedError:
@@ -7218,7 +7218,8 @@ your code will run correctly, but may be slower.""")
            s_new_out[0].owner.op.c_code(s_new_out[0].owner,
                                         "test_presence_of_c_code",
                                         ["x" for x in s_g],
-                                         ["z" for x in s_new_out], {})
+                                         ["z" for x in s_new_out],
+                                         {"fail": "%(fail)s"})
        except MethodNotDefined:
            _logger.info(("%s does not implement the c_code function."
                          " As well as being potentially slow, this disables "