Merge pull request #381 from lamblin/intdiv_c_code

C code for integer division

Merge pull request #381 from lamblin/intdiv_c_code
0a744788 · nouiz · bc578962 · 96211a04 · 0a744788 · 0a744788
--- a/theano/scalar/basic.py
+++ b/theano/scalar/basic.py
@@ -16,6 +16,7 @@ import math
 import warnings
 from copy import copy
 from itertools import imap
+from textwrap import dedent
 import numpy
@@ -1305,15 +1306,69 @@ true_div = TrueDiv(upcast_out, name='true_div')
 class IntDiv(BinaryScalarOp):
+    complex_error = ComplexError(
+                "Theano does not support integer division (//) on "
+                "complex numbers, since numpy deprecated it.")
    def impl(self, x, y):
        return x // y
+    def c_support_code(self):
+        # We use a macro as python use % as a special string character,
+        # and the output of c_code may be run through another level
+        # of string formatting.
+        return "#define THEANO_MACRO_MOD(x,y) (x % y)"
    def c_code(self, node, name, (x, y), (z,), sub):
-        raise NotImplementedError("For integer arguments the behavior of"
+        t = node.inputs[0].type.upcast(*[i.type for i in node.inputs[1:]])
-                                  " division in C and in Python [can] differ"
+        if t in imap(str, discrete_types):
-                                  " when the quotient is negative.  C actually"
+            x_div_y_pp = '(%(x)s / %(y)s)' % locals()
-                                  " does not even specify a correct behaviour"
+            x_div_y_mp = '((-%(x)s) / %(y)s)' % locals()
-                                  " in this case, it is up to the chip.")
+            x_mod_y_mp = 'THEANO_MACRO_MOD((-%(x)s), %(y)s)' % locals()
+            x_div_y_pm = '(%(x)s / (-%(y)s))' % locals()
+            x_mod_y_pm = 'THEANO_MACRO_MOD(%(x)s, (-%(y)s))' % locals()
+            x_div_y_mm = '((-%(x)s) / (-%(y)s))' % locals()
+        elif t in imap(str, float_types):
+            # We need to call different functions of math.h
+            # depending on the type
+            if t == 'float32':
+                floor = 'floorf'
+                fmod = 'fmodf'
+            elif t == 'float64':
+                floor = 'floor'
+                fmod = 'fmod'
+            else:
+                raise NotImplementedError('type not supported', t)
+            x_div_y_pp = '%(floor)s(%(x)s / %(y)s)' % locals()
+            x_div_y_mp = '%(floor)s((-%(x)s) / %(y)s)' % locals()
+            x_mod_y_mp = '%(fmod)s((-%(x)s), %(y)s)' % locals()
+            x_div_y_pm = '%(floor)s(%(x)s / (-%(y)s))' % locals()
+            x_mod_y_pm = '%(fmod)s(%(x)s, (-%(y)s))' % locals()
+            x_div_y_mm = '%(floor)s((-%(x)s) / (-%(y)s))' % locals()
+        elif t in complex_types:
+            raise self.complex_error
+        else:
+            raise NotImplementedError('type not supported', t)
+        return dedent("""
+            if (%(x)s < 0) {
+                if (%(y)s < 0) {
+                    %(z)s = %(x_div_y_mm)s;
+                } else {
+                    %(z)s = - %(x_div_y_mp)s - ((%(x_mod_y_mp)s == 0) ? 0 : 1);
+                }
+            } else {
+                if (%(y)s < 0) {
+                    %(z)s = - %(x_div_y_pm)s - ((%(x_mod_y_pm)s == 0) ? 0 : 1);
+                } else {
+                    %(z)s = %(x_div_y_pp)s;
+                }
+            }
+            """) % locals()
+    def c_code_cache_version(self):
+        return (2,)
    def grad(self, inputs, g_output):
        return [None] * len(inputs)
@@ -1346,7 +1401,9 @@ class Mod(BinaryScalarOp):
        return (5,)
    def c_support_code(self):
-        #We use a macro as python use % as a special string caractere.
+        # We use a macro as python use % as a special string character,
+        # and the output of c_code may be run through another level
+        # of string formatting.
        return "#define THEANO_MACRO_MOD(x,y) (x % y)"
    def c_code(self, node, name, (x, y), (z, ), sub):
@@ -1383,21 +1440,21 @@ class Mod(BinaryScalarOp):
        elif str(t) in imap(str, complex_types):
            raise self.complex_error
        else:
-            raise NotImplementedError('type not supported', type)
+            raise NotImplementedError('type not supported', t)
-        return """
+        return dedent("""
-if (%(x)s < 0){
+            if (%(x)s < 0){
-   if (%(y)s < 0){
+               if (%(y)s < 0){
-      %(z)s = -(%(x_mod_ymm)s);
+                  %(z)s = -(%(x_mod_ymm)s);
-   }else{
+               }else{
-      %(z)s = - %(x_mod_ymp)s + (%(x_mod_ymp)s != 0 ? %(y)s : 0);
+                  %(z)s = - %(x_mod_ymp)s + (%(x_mod_ymp)s != 0 ? %(y)s : 0);
-   }
+               }
-}else if (%(y)s < 0){
+            }else if (%(y)s < 0){
-   %(z)s = (%(x_mod_ypm)s) + (%(x_mod_ypm)s != 0 ? %(y)s : 0);
+               %(z)s = (%(x_mod_ypm)s) + (%(x_mod_ypm)s != 0 ? %(y)s : 0);
-}else{
+            }else{
-   %(z)s = %(x_mod_y)s;
+               %(z)s = %(x_mod_y)s;
-}
+            }
-        """ % locals()
+            """) % locals()
    def grad(self, (x, y), (gz, )):
        return None, None
@@ -1660,51 +1717,51 @@ class RoundHalfToEven(UnaryScalarOp):
        if not node.outputs[0].type.dtype in ['float32', 'float64']:
            Exception("The output should be float32 or float64")
-        return """
+        return dedent("""
-#ifndef ROUNDING_EPSILON
+            #ifndef ROUNDING_EPSILON
-#define ROUNDING_EPSILON 0.0000001
+            #define ROUNDING_EPSILON 0.0000001
-#endif
+            #endif
-    if (%(x)s < 0.0){
+            if (%(x)s < 0.0){
-      // We implement the else part like that: -else( -%(x)s);
+              // We implement the else part like that: -else( -%(x)s);
-      %(typ)s i;
+              %(typ)s i;
-      std::modf( -%(x)s, &i );
+              std::modf( -%(x)s, &i );
-      // If %(x)s is exactly halfway between two integers
+              // If %(x)s is exactly halfway between two integers
-      if ((-%(x)s -(i +0.5)) < epsilon){
+              if ((-%(x)s -(i +0.5)) < epsilon){
-          // If 'i' is even then return 'i'
+                  // If 'i' is even then return 'i'
-        if (std::fmod( i, 2.0 ) < epsilon){
+                if (std::fmod( i, 2.0 ) < epsilon){
-          %(z)s = - i;
+                  %(z)s = - i;
-        }else{
+                }else{
-          // Else return the nearest even integer
+                  // Else return the nearest even integer
-          %(z)s = - ceil( i +0.5 );
+                  %(z)s = - ceil( i +0.5 );
-        }
+                }
-      }else{
+              }else{
-        // round to closest
+                // round to closest
-        %(z)s = - round(%(x)s+5);
+                %(z)s = - round(%(x)s+5);
-      }
+              }
-    }else{
+            }else{
-      %(typ)s i;
+              %(typ)s i;
-      std::modf( %(x)s, &i );
+              std::modf( %(x)s, &i );
-      // If %(x)s is exactly halfway between two integers
+              // If %(x)s is exactly halfway between two integers
-      if ((%(x)s -(i +0.5)) < epsilon){
+              if ((%(x)s -(i +0.5)) < epsilon){
-          // If 'i' is even then return 'i'
+                  // If 'i' is even then return 'i'
-        if (std::fmod( i, 2.0 ) < epsilon){
+                if (std::fmod( i, 2.0 ) < epsilon){
-          %(z)s = i;
+                  %(z)s = i;
-        }else{
+                }else{
-          // Else return the nearest even integer
+                  // Else return the nearest even integer
-          %(z)s =  ceil( i +0.5 );
+                  %(z)s =  ceil( i +0.5 );
-        }
+                }
-      }else{
+              }else{
-        // round to closest
+                // round to closest
-        %(z)s = round(%(x)s+5);
+                %(z)s = round(%(x)s+5);
-      }
+              }
-    }
+            }
-#undef ROUNDING_EPSILON
+            #undef ROUNDING_EPSILON
-        """
+            """)
 round_half_to_even = RoundHalfToEven(same_out_float_only)

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -567,11 +567,9 @@ _good_broadcast_div_mod_normal_float_no_complex = dict(
    column=(rand(2, 3), rand(2, 1)),
    dtype_mixup_1=(rand(2, 3), randint_nonzero(2, 3)),
    dtype_mixup_2=(randint_nonzero(2, 3), rand(2, 3)),
-# Fix problem with integers and uintegers and add them.
+    integer=(randint(2, 3), randint_nonzero(2, 3)),
-# Then remove their specific addition to CeilIntDivTester tests.
+    uinteger=(randint(2, 3).astype("uint8"),
-#    integer=(randint(2, 3), randint_nonzero(2, 3)),
+              randint_nonzero(2, 3).astype("uint8")),
-#    uinteger=(randint(2, 3).astype("uint8"),
-#              randint_nonzero(2, 3).astype("uint8")),
    # This empty2 doesn't work for some tests. I don't remember why
    #empty2=(numpy.asarray([0]), numpy.asarray([])),
    )
@@ -610,31 +608,32 @@ if config.floatX=='float32':
    # float32.
    # This is probably caused by our way of computing the gradient error.
    div_grad_rtol=0.025
-TrueDivTester = makeBroadcastTester(op = tensor.true_div,
-                                  expected = lambda x, y: check_floatX((x, y), x / y),
+TrueDivTester = makeBroadcastTester(
-                                  good = _good_broadcast_div_mod_normal_float,
+        op=tensor.true_div,
-#                                               integers = (randint(2, 3), randint_nonzero(2, 3)),
+        expected = (lambda x, y:
-#                                               dtype_mixup_1 = (rand(2, 3), randint_nonzero(2, 3)),
+            check_floatX((x, y), numpy.true_divide(x, y))),
-#                                               dtype_mixup_2 = (randint_nonzero(2, 3), rand(2, 3))),
+        good=_good_broadcast_div_mod_normal_float,
-                                  grad = _grad_broadcast_div_mod_normal,
+        grad=_grad_broadcast_div_mod_normal,
-                                  grad_rtol=div_grad_rtol,
+        grad_rtol=div_grad_rtol,
-                                )
+        )
-TrueDivInplaceTester = makeBroadcastTester(op = inplace.true_div_inplace,
-                                         expected = lambda x, y: x / y,
+TrueDivInplaceTester = makeBroadcastTester(
-                                         good = _good_broadcast_div_mod_normal_float_inplace,
+        op=inplace.true_div_inplace,
-                                         grad = _grad_broadcast_div_mod_normal,
+        expected=(lambda x, y: numpy.true_divide(x, y)),
-                                         grad_rtol=div_grad_rtol,
+        good=copymod(
-                                         inplace = True)
+            _good_broadcast_div_mod_normal_float_inplace,
+            # The output is now in float, we cannot work inplace on an int.
+            without=['integer', 'uinteger']),
+        grad=_grad_broadcast_div_mod_normal,
+        grad_rtol=div_grad_rtol,
+        inplace=True)
 CeilIntDivTester = makeBroadcastTester(
    op=tensor.ceil_intdiv,
    expected=lambda x, y: check_floatX((x, y), (x // y) + ((x % y) != 0)),
-    good=copymod(_good_broadcast_div_mod_normal_float_no_complex,
+    good=_good_broadcast_div_mod_normal_float_no_complex,
-                 integer=(randint(2, 3), randint_nonzero(2, 3)),
-                 uinteger=(randint(2, 3).astype("uint8"),
-                           randint_nonzero(2, 3).astype("uint8")),
-                 ),
    name='CeilIntDiv',
    # As we implement this function with neq, the gradient returned is always 0.
 #    grad=_grad_broadcast_div_mod_normal,