提交 318dd6af authored 作者: Pascal Lamblin's avatar Pascal Lamblin 提交者: GitHub

Merge pull request #5854 from lamblin/fix_div0_crash

Fix hard crash when dividing by 0
...@@ -1924,8 +1924,20 @@ class IntDiv(BinaryScalarOp): ...@@ -1924,8 +1924,20 @@ class IntDiv(BinaryScalarOp):
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
(x, y) = inputs (x, y) = inputs
(z,) = outputs (z,) = outputs
fail = sub['fail']
t = node.inputs[0].type.upcast(*[i.type for i in node.inputs[1:]]) t = node.inputs[0].type.upcast(*[i.type for i in node.inputs[1:]])
if t in imap(str, discrete_types): if t in imap(str, discrete_types):
# If we are in a gpuarray kernel, %(fail)s exits the kernel,
# and we do not have any error report, and we cannot set
# Python error messages either, so for now we just call the
# cuda function, which return a binary pattern of all 1s.
check = dedent('''
#ifndef KERNEL
PyErr_SetString(PyExc_ZeroDivisionError, "integer division by zero");
%(fail)s
#endif
''') % locals()
x_div_y_pp = '(%(x)s / %(y)s)' % locals() x_div_y_pp = '(%(x)s / %(y)s)' % locals()
x_div_y_mp = '((-%(x)s) / %(y)s)' % locals() x_div_y_mp = '((-%(x)s) / %(y)s)' % locals()
x_mod_y_mp = 'THEANO_MACRO_MOD((-%(x)s), %(y)s)' % locals() x_mod_y_mp = 'THEANO_MACRO_MOD((-%(x)s), %(y)s)' % locals()
...@@ -1944,6 +1956,7 @@ class IntDiv(BinaryScalarOp): ...@@ -1944,6 +1956,7 @@ class IntDiv(BinaryScalarOp):
else: else:
raise NotImplementedError('type not supported', t) raise NotImplementedError('type not supported', t)
check = ''
x_div_y_pp = '%(floor)s(%(x)s / %(y)s)' % locals() x_div_y_pp = '%(floor)s(%(x)s / %(y)s)' % locals()
x_div_y_mp = '%(floor)s((-%(x)s) / %(y)s)' % locals() x_div_y_mp = '%(floor)s((-%(x)s) / %(y)s)' % locals()
x_mod_y_mp = '%(fmod)s((-%(x)s), %(y)s)' % locals() x_mod_y_mp = '%(fmod)s((-%(x)s), %(y)s)' % locals()
...@@ -1956,15 +1969,18 @@ class IntDiv(BinaryScalarOp): ...@@ -1956,15 +1969,18 @@ class IntDiv(BinaryScalarOp):
raise NotImplementedError('type not supported', t) raise NotImplementedError('type not supported', t)
return dedent(""" return dedent("""
if (%(x)s < 0) { if (%(y)s == 0) {
if (%(y)s < 0) { %(check)s
%(z)s = %(x_div_y_pp)s;
} else if (%(y)s < 0) {
if (%(x)s < 0) {
%(z)s = %(x_div_y_mm)s; %(z)s = %(x_div_y_mm)s;
} else { } else {
%(z)s = - %(x_div_y_mp)s - ((%(x_mod_y_mp)s == 0) ? 0 : 1); %(z)s = - %(x_div_y_pm)s - ((%(x_mod_y_pm)s == 0) ? 0 : 1);
} }
} else { } else {
if (%(y)s < 0) { if (%(x)s < 0) {
%(z)s = - %(x_div_y_pm)s - ((%(x_mod_y_pm)s == 0) ? 0 : 1); %(z)s = - %(x_div_y_mp)s - ((%(x_mod_y_mp)s == 0) ? 0 : 1);
} else { } else {
%(z)s = %(x_div_y_pp)s; %(z)s = %(x_div_y_pp)s;
} }
...@@ -1972,7 +1988,7 @@ class IntDiv(BinaryScalarOp): ...@@ -1972,7 +1988,7 @@ class IntDiv(BinaryScalarOp):
""") % locals() """) % locals()
def c_code_cache_version(self): def c_code_cache_version(self):
return (2,) return (5,)
def grad(self, inputs, g_output): def grad(self, inputs, g_output):
return [inp.zeros_like(dtype=theano.config.floatX) return [inp.zeros_like(dtype=theano.config.floatX)
...@@ -2004,13 +2020,13 @@ class Mod(BinaryScalarOp): ...@@ -2004,13 +2020,13 @@ class Mod(BinaryScalarOp):
return x % y return x % y
def c_code_cache_version(self): def c_code_cache_version(self):
return (5,) return (8,)
def c_support_code(self): def c_support_code(self):
# We use a macro as python use % as a special string character, # We use a macro as python use % as a special string character,
# and the output of c_code may be run through another level # and the output of c_code may be run through another level
# of string formatting. # of string formatting.
return "#define THEANO_MACRO_MOD(x,y) (x % y)" return "#define THEANO_MACRO_MOD(x, y) (x % y)"
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
""" """
...@@ -2020,6 +2036,7 @@ class Mod(BinaryScalarOp): ...@@ -2020,6 +2036,7 @@ class Mod(BinaryScalarOp):
""" """
(x, y) = inputs (x, y) = inputs
(z,) = outputs (z,) = outputs
fail = sub['fail']
t = node.inputs[0].type.upcast(*[i.type for i in node.inputs[1:]]) t = node.inputs[0].type.upcast(*[i.type for i in node.inputs[1:]])
if (str(t) in imap(str, discrete_types) or if (str(t) in imap(str, discrete_types) or
t in ['uint8', 'int8', 'uint16', 'int16'] or t in ['uint8', 'int8', 'uint16', 'int16'] or
...@@ -2029,6 +2046,16 @@ class Mod(BinaryScalarOp): ...@@ -2029,6 +2046,16 @@ class Mod(BinaryScalarOp):
# keep them out of safety, and verify they are useless with an # keep them out of safety, and verify they are useless with an
# assert. # assert.
assert str(t) in imap(str, discrete_types) assert str(t) in imap(str, discrete_types)
# If we are in a gpuarray kernel, %(fail)s exits the kernel,
# and we do not have any error report, and we cannot set
# Python error messages either, so for now we just call the
# cuda function, returning a binary pattern depending on dtype
check = dedent('''
#ifndef KERNEL
PyErr_SetString(PyExc_ZeroDivisionError, "integer modulo by zero");
%(fail)s
#endif
''') % locals()
x_mod_y = "THEANO_MACRO_MOD(%(x)s, %(y)s)" % locals() x_mod_y = "THEANO_MACRO_MOD(%(x)s, %(y)s)" % locals()
x_mod_ymm = "THEANO_MACRO_MOD(-%(x)s, -%(y)s)" % locals() x_mod_ymm = "THEANO_MACRO_MOD(-%(x)s, -%(y)s)" % locals()
x_mod_ypm = "THEANO_MACRO_MOD(%(x)s, -%(y)s)" % locals() x_mod_ypm = "THEANO_MACRO_MOD(%(x)s, -%(y)s)" % locals()
...@@ -2040,26 +2067,32 @@ class Mod(BinaryScalarOp): ...@@ -2040,26 +2067,32 @@ class Mod(BinaryScalarOp):
# keep them out of safety, and verify they are useless with an # keep them out of safety, and verify they are useless with an
# assert. # assert.
assert str(t) in imap(str, float_types) assert str(t) in imap(str, float_types)
x_mod_y = "fmod(%(x)s,%(y)s)" % locals() check = ''
x_mod_ymm = "fmod(-%(x)s,-%(y)s)" % locals() x_mod_y = "fmod(%(x)s, %(y)s)" % locals()
x_mod_ypm = "fmod(%(x)s,-%(y)s)" % locals() x_mod_ymm = "fmod(-%(x)s, -%(y)s)" % locals()
x_mod_ymp = "fmod(-%(x)s,%(y)s)" % locals() x_mod_ypm = "fmod(%(x)s, -%(y)s)" % locals()
x_mod_ymp = "fmod(-%(x)s, %(y)s)" % locals()
elif str(t) in imap(str, complex_types): elif str(t) in imap(str, complex_types):
raise self.complex_error raise self.complex_error
else: else:
raise NotImplementedError('type not supported', t) raise NotImplementedError('type not supported', t)
return dedent(""" return dedent("""
if (%(x)s < 0){ if (%(y)s == 0) {
if (%(y)s < 0){ %(check)s
%(z)s = -(%(x_mod_ymm)s); %(z)s = %(x_mod_y)s;
}else{ } else if (%(y)s < 0){
%(z)s = - %(x_mod_ymp)s + (%(x_mod_ymp)s != 0 ? %(y)s : 0); if (%(x)s < 0){
} %(z)s = -(%(x_mod_ymm)s);
}else if (%(y)s < 0){ } else {
%(z)s = (%(x_mod_ypm)s) + (%(x_mod_ypm)s != 0 ? %(y)s : 0); %(z)s = (%(x_mod_ypm)s) + (%(x_mod_ypm)s != 0 ? %(y)s : 0);
}else{ }
%(z)s = %(x_mod_y)s; } else {
if (%(x)s < 0){
%(z)s = - %(x_mod_ymp)s + (%(x_mod_ymp)s != 0 ? %(y)s : 0);
} else {
%(z)s = %(x_mod_y)s;
}
} }
""") % locals() """) % locals()
...@@ -3696,8 +3729,9 @@ class Composite(ScalarOp): ...@@ -3696,8 +3729,9 @@ class Composite(ScalarOp):
def init_c_code(self): def init_c_code(self):
""" """
Return the C code for this Composite Op. Assemble the C code for this Composite Op.
The result is assigned to `self._c_code`.
""" """
# It was already called # It was already called
if hasattr(self, '_c_code'): if hasattr(self, '_c_code'):
......
...@@ -4822,33 +4822,46 @@ class Reshape(Op): ...@@ -4822,33 +4822,46 @@ class Reshape(Op):
return [(1,) * self.ndim] return [(1,) * self.ndim]
requ = node.inputs[1] requ = node.inputs[1]
input_size = mul(*ishapes[0])
if isinstance(requ, theano.tensor.TensorConstant): if isinstance(requ, theano.tensor.TensorConstant):
requ = list(requ.data) requ = list(requ.data)
requ_part = [ele for ele in requ if ele != -1] requ_part = [ele for ele in requ if ele != -1]
crit = len(requ) - len(requ_part) crit = len(requ) - len(requ_part)
if crit == 1 and len(requ_part) > 0: if crit == 1 and len(requ_part) > 0:
missing = mul(*ishapes[0]) // mul(*requ_part) # If there are both 0 and -1 in requ_size, it is impossible
# to determine a right output, but we can at least prevent
# a division by 0. We do not want to keep a negative
# size here as it could lead to further weird errors
# after other optimizations.
requ_size = mul(*requ_part)
missing = input_size // (1 if requ_size == 0 else requ_size)
for i, ele in enumerate(requ): for i, ele in enumerate(requ):
if ele == -1: if ele == -1:
requ[i] = missing requ[i] = missing
elif crit == 1: # we reshape to -1 elif crit == 1: # we reshape to -1
requ = [mul(*ishapes[0])] if ishapes[0] else [1] requ = [input_size] if ishapes[0] else [1]
elif crit > 1: elif crit > 1:
raise ValueError('shape argument to Reshape.perform' raise ValueError('shape argument to Reshape.perform'
' must have at most one entry equal to -1') ' must have at most one entry equal to -1')
return [requ] return [requ]
else: else:
new_dims = [node.inputs[1][i] for i in xrange(self.ndim)] requ = [requ[i] for i in xrange(self.ndim)]
# since new_dims can have negative value (-1), the # since new_dims can have negative value (-1), the
# multiplication of all values should be negated # multiplication of all values should be negated
# to give a positive value. # to give a positive value.
# To avoid optimization complexity, we avoid checking # To avoid optimization complexity, we avoid checking
# for the case when there are two or more '-1' values. # for the case when there are two or more '-1' values.
if self.ndim: if self.ndim:
rest_size = (mul(*ishapes[0]) // -mul(*new_dims)) requ_size = -mul(*requ)
return [tuple([switch(eq(new_dims[i], -1), # If there are both 0 and -1 in requ_size, it is impossible
# to determine a right output, but we can at least prevent
# a division by 0. We do not want to keep a negative
# size here as it could lead to further weird errors
# after other optimizations.
rest_size = input_size // maximum(requ_size, 1)
return [tuple([switch(eq(requ[i], -1),
rest_size, rest_size,
new_dims[i]) requ[i])
for i in xrange(self.ndim)])] for i in xrange(self.ndim)])]
def c_code_cache_version(self): def c_code_cache_version(self):
......
...@@ -363,7 +363,8 @@ class T_Images2Neibs(unittest_tools.InferShapeTester): ...@@ -363,7 +363,8 @@ class T_Images2Neibs(unittest_tools.InferShapeTester):
self.assertRaises(ValueError, f, neibs, self.assertRaises(ValueError, f, neibs,
(1, 1, 3, 320, 320)) (1, 1, 3, 320, 320))
# End up with a step of 0 # End up with a step of 0
self.assertRaises(ValueError, f, neibs, # This can lead to division by zero in DebugMode
self.assertRaises((ValueError, ZeroDivisionError), f, neibs,
(3, 320, 320, 1)) (3, 320, 320, 1))
def speed_neibs(self): def speed_neibs(self):
......
...@@ -7154,7 +7154,7 @@ def local_elemwise_fusion_op(OP, max_input_fct=lambda node: 32, ...@@ -7154,7 +7154,7 @@ def local_elemwise_fusion_op(OP, max_input_fct=lambda node: 32,
"test_presence_of_c_code", "test_presence_of_c_code",
["x" for x in i.owner.inputs], ["x" for x in i.owner.inputs],
["z" for z in i.owner.outputs], ["z" for z in i.owner.outputs],
{}) {"fail": "%(fail)s"})
except MethodNotDefined: except MethodNotDefined:
catch = True catch = True
except NotImplementedError: except NotImplementedError:
...@@ -7218,7 +7218,8 @@ your code will run correctly, but may be slower.""") ...@@ -7218,7 +7218,8 @@ your code will run correctly, but may be slower.""")
s_new_out[0].owner.op.c_code(s_new_out[0].owner, s_new_out[0].owner.op.c_code(s_new_out[0].owner,
"test_presence_of_c_code", "test_presence_of_c_code",
["x" for x in s_g], ["x" for x in s_g],
["z" for x in s_new_out], {}) ["z" for x in s_new_out],
{"fail": "%(fail)s"})
except MethodNotDefined: except MethodNotDefined:
_logger.info(("%s does not implement the c_code function." _logger.info(("%s does not implement the c_code function."
" As well as being potentially slow, this disables " " As well as being potentially slow, this disables "
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论