提交 0a744788 authored 作者: nouiz's avatar nouiz

Merge pull request #381 from lamblin/intdiv_c_code

C code for integer division
...@@ -16,6 +16,7 @@ import math ...@@ -16,6 +16,7 @@ import math
import warnings import warnings
from copy import copy from copy import copy
from itertools import imap from itertools import imap
from textwrap import dedent
import numpy import numpy
...@@ -1305,15 +1306,69 @@ true_div = TrueDiv(upcast_out, name='true_div') ...@@ -1305,15 +1306,69 @@ true_div = TrueDiv(upcast_out, name='true_div')
class IntDiv(BinaryScalarOp): class IntDiv(BinaryScalarOp):
complex_error = ComplexError(
"Theano does not support integer division (//) on "
"complex numbers, since numpy deprecated it.")
def impl(self, x, y): def impl(self, x, y):
return x // y return x // y
def c_support_code(self):
# We use a macro as python use % as a special string character,
# and the output of c_code may be run through another level
# of string formatting.
return "#define THEANO_MACRO_MOD(x,y) (x % y)"
def c_code(self, node, name, (x, y), (z,), sub): def c_code(self, node, name, (x, y), (z,), sub):
raise NotImplementedError("For integer arguments the behavior of" t = node.inputs[0].type.upcast(*[i.type for i in node.inputs[1:]])
" division in C and in Python [can] differ" if t in imap(str, discrete_types):
" when the quotient is negative. C actually" x_div_y_pp = '(%(x)s / %(y)s)' % locals()
" does not even specify a correct behaviour" x_div_y_mp = '((-%(x)s) / %(y)s)' % locals()
" in this case, it is up to the chip.") x_mod_y_mp = 'THEANO_MACRO_MOD((-%(x)s), %(y)s)' % locals()
x_div_y_pm = '(%(x)s / (-%(y)s))' % locals()
x_mod_y_pm = 'THEANO_MACRO_MOD(%(x)s, (-%(y)s))' % locals()
x_div_y_mm = '((-%(x)s) / (-%(y)s))' % locals()
elif t in imap(str, float_types):
# We need to call different functions of math.h
# depending on the type
if t == 'float32':
floor = 'floorf'
fmod = 'fmodf'
elif t == 'float64':
floor = 'floor'
fmod = 'fmod'
else:
raise NotImplementedError('type not supported', t)
x_div_y_pp = '%(floor)s(%(x)s / %(y)s)' % locals()
x_div_y_mp = '%(floor)s((-%(x)s) / %(y)s)' % locals()
x_mod_y_mp = '%(fmod)s((-%(x)s), %(y)s)' % locals()
x_div_y_pm = '%(floor)s(%(x)s / (-%(y)s))' % locals()
x_mod_y_pm = '%(fmod)s(%(x)s, (-%(y)s))' % locals()
x_div_y_mm = '%(floor)s((-%(x)s) / (-%(y)s))' % locals()
elif t in complex_types:
raise self.complex_error
else:
raise NotImplementedError('type not supported', t)
return dedent("""
if (%(x)s < 0) {
if (%(y)s < 0) {
%(z)s = %(x_div_y_mm)s;
} else {
%(z)s = - %(x_div_y_mp)s - ((%(x_mod_y_mp)s == 0) ? 0 : 1);
}
} else {
if (%(y)s < 0) {
%(z)s = - %(x_div_y_pm)s - ((%(x_mod_y_pm)s == 0) ? 0 : 1);
} else {
%(z)s = %(x_div_y_pp)s;
}
}
""") % locals()
def c_code_cache_version(self):
return (2,)
def grad(self, inputs, g_output): def grad(self, inputs, g_output):
return [None] * len(inputs) return [None] * len(inputs)
...@@ -1346,7 +1401,9 @@ class Mod(BinaryScalarOp): ...@@ -1346,7 +1401,9 @@ class Mod(BinaryScalarOp):
return (5,) return (5,)
def c_support_code(self): def c_support_code(self):
#We use a macro as python use % as a special string caractere. # We use a macro as python use % as a special string character,
# and the output of c_code may be run through another level
# of string formatting.
return "#define THEANO_MACRO_MOD(x,y) (x % y)" return "#define THEANO_MACRO_MOD(x,y) (x % y)"
def c_code(self, node, name, (x, y), (z, ), sub): def c_code(self, node, name, (x, y), (z, ), sub):
...@@ -1383,21 +1440,21 @@ class Mod(BinaryScalarOp): ...@@ -1383,21 +1440,21 @@ class Mod(BinaryScalarOp):
elif str(t) in imap(str, complex_types): elif str(t) in imap(str, complex_types):
raise self.complex_error raise self.complex_error
else: else:
raise NotImplementedError('type not supported', type) raise NotImplementedError('type not supported', t)
return """ return dedent("""
if (%(x)s < 0){ if (%(x)s < 0){
if (%(y)s < 0){ if (%(y)s < 0){
%(z)s = -(%(x_mod_ymm)s); %(z)s = -(%(x_mod_ymm)s);
}else{ }else{
%(z)s = - %(x_mod_ymp)s + (%(x_mod_ymp)s != 0 ? %(y)s : 0); %(z)s = - %(x_mod_ymp)s + (%(x_mod_ymp)s != 0 ? %(y)s : 0);
} }
}else if (%(y)s < 0){ }else if (%(y)s < 0){
%(z)s = (%(x_mod_ypm)s) + (%(x_mod_ypm)s != 0 ? %(y)s : 0); %(z)s = (%(x_mod_ypm)s) + (%(x_mod_ypm)s != 0 ? %(y)s : 0);
}else{ }else{
%(z)s = %(x_mod_y)s; %(z)s = %(x_mod_y)s;
} }
""" % locals() """) % locals()
def grad(self, (x, y), (gz, )): def grad(self, (x, y), (gz, )):
return None, None return None, None
...@@ -1660,51 +1717,51 @@ class RoundHalfToEven(UnaryScalarOp): ...@@ -1660,51 +1717,51 @@ class RoundHalfToEven(UnaryScalarOp):
if not node.outputs[0].type.dtype in ['float32', 'float64']: if not node.outputs[0].type.dtype in ['float32', 'float64']:
Exception("The output should be float32 or float64") Exception("The output should be float32 or float64")
return """ return dedent("""
#ifndef ROUNDING_EPSILON #ifndef ROUNDING_EPSILON
#define ROUNDING_EPSILON 0.0000001 #define ROUNDING_EPSILON 0.0000001
#endif #endif
if (%(x)s < 0.0){ if (%(x)s < 0.0){
// We implement the else part like that: -else( -%(x)s); // We implement the else part like that: -else( -%(x)s);
%(typ)s i; %(typ)s i;
std::modf( -%(x)s, &i ); std::modf( -%(x)s, &i );
// If %(x)s is exactly halfway between two integers // If %(x)s is exactly halfway between two integers
if ((-%(x)s -(i +0.5)) < epsilon){ if ((-%(x)s -(i +0.5)) < epsilon){
// If 'i' is even then return 'i' // If 'i' is even then return 'i'
if (std::fmod( i, 2.0 ) < epsilon){ if (std::fmod( i, 2.0 ) < epsilon){
%(z)s = - i; %(z)s = - i;
}else{ }else{
// Else return the nearest even integer // Else return the nearest even integer
%(z)s = - ceil( i +0.5 ); %(z)s = - ceil( i +0.5 );
} }
}else{ }else{
// round to closest // round to closest
%(z)s = - round(%(x)s+5); %(z)s = - round(%(x)s+5);
} }
}else{ }else{
%(typ)s i; %(typ)s i;
std::modf( %(x)s, &i ); std::modf( %(x)s, &i );
// If %(x)s is exactly halfway between two integers // If %(x)s is exactly halfway between two integers
if ((%(x)s -(i +0.5)) < epsilon){ if ((%(x)s -(i +0.5)) < epsilon){
// If 'i' is even then return 'i' // If 'i' is even then return 'i'
if (std::fmod( i, 2.0 ) < epsilon){ if (std::fmod( i, 2.0 ) < epsilon){
%(z)s = i; %(z)s = i;
}else{ }else{
// Else return the nearest even integer // Else return the nearest even integer
%(z)s = ceil( i +0.5 ); %(z)s = ceil( i +0.5 );
} }
}else{ }else{
// round to closest // round to closest
%(z)s = round(%(x)s+5); %(z)s = round(%(x)s+5);
} }
} }
#undef ROUNDING_EPSILON #undef ROUNDING_EPSILON
""" """)
round_half_to_even = RoundHalfToEven(same_out_float_only) round_half_to_even = RoundHalfToEven(same_out_float_only)
......
...@@ -567,11 +567,9 @@ _good_broadcast_div_mod_normal_float_no_complex = dict( ...@@ -567,11 +567,9 @@ _good_broadcast_div_mod_normal_float_no_complex = dict(
column=(rand(2, 3), rand(2, 1)), column=(rand(2, 3), rand(2, 1)),
dtype_mixup_1=(rand(2, 3), randint_nonzero(2, 3)), dtype_mixup_1=(rand(2, 3), randint_nonzero(2, 3)),
dtype_mixup_2=(randint_nonzero(2, 3), rand(2, 3)), dtype_mixup_2=(randint_nonzero(2, 3), rand(2, 3)),
# Fix problem with integers and uintegers and add them. integer=(randint(2, 3), randint_nonzero(2, 3)),
# Then remove their specific addition to CeilIntDivTester tests. uinteger=(randint(2, 3).astype("uint8"),
# integer=(randint(2, 3), randint_nonzero(2, 3)), randint_nonzero(2, 3).astype("uint8")),
# uinteger=(randint(2, 3).astype("uint8"),
# randint_nonzero(2, 3).astype("uint8")),
# This empty2 doesn't work for some tests. I don't remember why # This empty2 doesn't work for some tests. I don't remember why
#empty2=(numpy.asarray([0]), numpy.asarray([])), #empty2=(numpy.asarray([0]), numpy.asarray([])),
) )
...@@ -610,31 +608,32 @@ if config.floatX=='float32': ...@@ -610,31 +608,32 @@ if config.floatX=='float32':
# float32. # float32.
# This is probably caused by our way of computing the gradient error. # This is probably caused by our way of computing the gradient error.
div_grad_rtol=0.025 div_grad_rtol=0.025
TrueDivTester = makeBroadcastTester(op = tensor.true_div,
expected = lambda x, y: check_floatX((x, y), x / y), TrueDivTester = makeBroadcastTester(
good = _good_broadcast_div_mod_normal_float, op=tensor.true_div,
# integers = (randint(2, 3), randint_nonzero(2, 3)), expected = (lambda x, y:
# dtype_mixup_1 = (rand(2, 3), randint_nonzero(2, 3)), check_floatX((x, y), numpy.true_divide(x, y))),
# dtype_mixup_2 = (randint_nonzero(2, 3), rand(2, 3))), good=_good_broadcast_div_mod_normal_float,
grad = _grad_broadcast_div_mod_normal, grad=_grad_broadcast_div_mod_normal,
grad_rtol=div_grad_rtol, grad_rtol=div_grad_rtol,
) )
TrueDivInplaceTester = makeBroadcastTester(op = inplace.true_div_inplace,
expected = lambda x, y: x / y, TrueDivInplaceTester = makeBroadcastTester(
good = _good_broadcast_div_mod_normal_float_inplace, op=inplace.true_div_inplace,
grad = _grad_broadcast_div_mod_normal, expected=(lambda x, y: numpy.true_divide(x, y)),
grad_rtol=div_grad_rtol, good=copymod(
inplace = True) _good_broadcast_div_mod_normal_float_inplace,
# The output is now in float, we cannot work inplace on an int.
without=['integer', 'uinteger']),
grad=_grad_broadcast_div_mod_normal,
grad_rtol=div_grad_rtol,
inplace=True)
CeilIntDivTester = makeBroadcastTester( CeilIntDivTester = makeBroadcastTester(
op=tensor.ceil_intdiv, op=tensor.ceil_intdiv,
expected=lambda x, y: check_floatX((x, y), (x // y) + ((x % y) != 0)), expected=lambda x, y: check_floatX((x, y), (x // y) + ((x % y) != 0)),
good=copymod(_good_broadcast_div_mod_normal_float_no_complex, good=_good_broadcast_div_mod_normal_float_no_complex,
integer=(randint(2, 3), randint_nonzero(2, 3)),
uinteger=(randint(2, 3).astype("uint8"),
randint_nonzero(2, 3).astype("uint8")),
),
name='CeilIntDiv', name='CeilIntDiv',
# As we implement this function with neq, the gradient returned is always 0. # As we implement this function with neq, the gradient returned is always 0.
# grad=_grad_broadcast_div_mod_normal, # grad=_grad_broadcast_div_mod_normal,
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论