second try fixing the integer-division bug

546cf660 · James Bergstra · d7c2cfcc · 546cf660 · 546cf660 · 546cf660
--- a/theano/gof/cc.py
+++ b/theano/gof/cc.py
@@ -770,9 +770,13 @@ def _execute(cthunk, init_tasks, tasks, error_storage):
                trace = task.trace
            except AttributeError:
                trace = ()
-            exc_type, _exc_value, exc_trace = error_storage
-            exc_value = exc_type(_exc_value, task)
-            exc_value.__thunk_trace__ = trace # this can be used to retrieve the location the Op was declared
+            try:
+                exc_type, _exc_value, exc_trace = error_storage
+                exc_value = exc_type(_exc_value, task)
+                exc_value.__thunk_trace__ = trace # this can be used to retrieve the location the Op was declared
+            except:
+                print >> sys.stderr, 'ERROR retrieving error_storage', error_storage
+                raise
            raise exc_type, exc_value, exc_trace
    execute.cthunk = cthunk
    return execute

--- a/theano/scalar/basic.py
+++ b/theano/scalar/basic.py
@@ -213,7 +213,7 @@ class _scalar_py_operators:
    def __add__(self,other): return add(self,other)
    def __sub__(self,other): return sub(self,other)
    def __mul__(self,other): return mul(self,other)
-    def __div__(self,other): return div(self,other)
+    def __div__(self,other): return div_proxy(self,other)
    def __mod__(self,other): return mod(self,other)
    def __pow__(self,other): return pow(self,other)

@@ -221,7 +221,7 @@ class _scalar_py_operators:
    def __radd__(self,other): return add(other,self)
    def __rsub__(self,other): return sub(other,self)
    def __rmul__(self,other): return mul(other,self)
-    def __rdiv__(self,other): return div(other,self)
+    def __rdiv__(self,other): return div_proxy(other,self)
    def __rmod__(self,other): return mod(other,self)
    def __rpow__(self,other): return pow(other,self)

@@ -567,16 +567,44 @@ class Sub(BinaryScalarOp):
        return gz, -gz
 sub = Sub(upcast_out, name = 'sub')

-class Div(BinaryScalarOp):
+def div_proxy(x, y):
+    """Proxy for either true_div or int_div, depending on types of x, y.
+    """
+    if as_scalar(x).type.dtype.startswith('int') and as_scalar(y).type.dtype.startswith('int'):
+        return int_div(x, y)
+    else:
+        return true_div(x, y)
+
+class TrueDiv(BinaryScalarOp):
+    def output_types(self, types):
+        if all(t.dtype.startswith('int') for t in types):
+            return [float64]
+        else:
+            return super(TrueDiv, self).output_types(types)
    def impl(self, x, y):
-        return x / y
+        x = numpy.asarray(x)
+        y = numpy.asarray(y)
+        if str(x.dtype).startswith('int') and str(y.dtype).startswith('int'):
+            return float(x) / y
+        else:
+            return x / y
    def c_code(self, node, name, (x, y), (z, ), sub):
        if node.inputs[0].type in int_types and node.inputs[1].type in int_types:
-            raise NotImplementedError("For integer arguments the behavior of division in C and in Python differ when the quotient is negative (to implement).")
+            return "%(z)s = ((double)%(x)s) / %(y)s;" % locals()
        return "%(z)s = %(x)s / %(y)s;" % locals()
    def grad(self, (x, y), (gz, )):
        return gz / y, -(gz * x) / (y * y)
-div = Div(upcast_out, name = 'div')
+true_div = TrueDiv(upcast_out, name = 'true_div')
+
+class IntDiv(BinaryScalarOp):
+    def impl(self, x, y):
+        return x // y
+    def c_code(self, node, name, (x,y), (z,), sub):
+        raise NotImplementedError("For integer arguments the behavior of division in C and in Python [can] differ when the quotient is negative.  C actually does not even specify a correct behaviour in this case, it is up to the chip.")
+    def grad(self, inputs, g_output):
+        return [None] * len(inputs)
+int_div = IntDiv(upcast_out, name = 'int_div')
+

 class Mod(BinaryScalarOp):
    def impl(self, x, y):

--- a/theano/scalar/tests/test_basic.py
+++ b/theano/scalar/tests/test_basic.py
@@ -11,6 +11,7 @@ If you do want to rewrite these tests, bear in mind:

 import unittest

+import theano
 from theano.gof import Variable, Op, Env
 from theano import gof

@@ -25,7 +26,7 @@ class test_ScalarOps(unittest.TestCase):

    def test_straightforward(self):
        x, y, z = inputs()
-        e = mul(add(x, y), div(x, y))
+        e = mul(add(x, y), div_proxy(x, y))
        g = Env([x, y], [e])
        fn = gof.DualLinker().accept(g).make_function()
        assert fn(1.0, 2.0) == 1.5
@@ -35,7 +36,7 @@ class test_composite(unittest.TestCase):

    def test_straightforward(self):
        x, y, z = inputs()
-        e = mul(add(x, y), div(x, y))
+        e = mul(add(x, y), div_proxy(x, y))
        C = Composite([x, y], [e])
        c = C.make_node(x, y)
        # print c.c_code(['x', 'y'], ['z'], dict(id = 0))
@@ -59,7 +60,7 @@ class test_composite(unittest.TestCase):

    def test_with_constants(self):
        x, y, z = inputs()
-        e = mul(add(70.0, y), div(x, y))
+        e = mul(add(70.0, y), div_proxy(x, y))
        C = Composite([x, y], [e])
        c = C.make_node(x, y)
        assert "70.0" in c.op.c_code(c, 'dummy', ['x', 'y'], ['z'], dict(id = 0))
@@ -153,6 +154,27 @@ class test_logical(unittest.TestCase):
        for a,b in ((0,1), (0,0), (1,0), (1,1)):
            self.failUnless(fn(a,b) == ~a, (a,))

+
+class test_div(unittest.TestCase):
+    def test_0(self):
+        a = int8()
+        b = int32()
+        c = complex64()
+        d = float64()
+        f = float32()
+
+        print (a/b).owner.op
+        assert isinstance((a/b).owner.op, IntDiv)
+        assert isinstance((b/a).owner.op, IntDiv)
+        assert isinstance((b/d).owner.op, TrueDiv)
+        assert isinstance((b/f).owner.op, TrueDiv)
+        assert isinstance((f/a).owner.op, TrueDiv)
+        assert isinstance((d/b).owner.op, TrueDiv)
+        assert isinstance((d/f).owner.op, TrueDiv)
+        assert isinstance((f/c).owner.op, TrueDiv)
+        assert isinstance((a/c).owner.op, TrueDiv)
+
+
 if __name__ == '__main__':
    unittest.main()


--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -573,7 +573,7 @@ class _tensor_py_operators:
            return NotImplemented
    def __div__(self,other): 
        try: 
-            return div(self,other)
+            return div_proxy(self,other)
        except Exception, e:
            return NotImplemented
    def __pow__(self,other): 
@@ -599,7 +599,7 @@ class _tensor_py_operators:
    def __radd__(self,other): return add(other,self)
    def __rsub__(self,other): return sub(other,self)
    def __rmul__(self,other): return mul(other,self)
-    def __rdiv__(self,other): return div(other,self)
+    def __rdiv__(self,other): return div_proxy(other,self)
    def __rmod__(self,other): return mod(other,self)
    def __rpow__(self,other): return pow(other,self)

@@ -1292,6 +1292,14 @@ repeat = Repeat()
 # Arithmetics
 ##########################

+def div_proxy(x, y):
+    """Proxy for either true_div or int_div, depending on types of x, y.
+    """
+    if as_tensor_variable(x).type.dtype.startswith('int') and as_tensor_variable(y).type.dtype.startswith('int'):
+        return int_div(x, y)
+    else:
+        return true_div(x, y)
+
 @_scal_elemwise
 def add(a, b):
    """elementwise addition"""
@@ -1305,8 +1313,12 @@ def mul(a, b):
    """elementwise multiplication"""

 @_scal_elemwise
-def div(a, b):
-    """elementwise division"""
+def true_div(a, b):
+    """elementwise [true] division (inverse of multiplication)"""
+
+@_scal_elemwise
+def int_div(a, b):
+    """elementwise integer-division"""

 @_scal_elemwise
 def mod(a, b):
@@ -1324,7 +1336,8 @@ pprint.assign(add, printing.OperatorPrinter('+', -2, 'either'))
 pprint.assign(mul, printing.OperatorPrinter('*', -1, 'either'))
 pprint.assign(sub, printing.OperatorPrinter('-', -2, 'left'))
 pprint.assign(neg, printing.OperatorPrinter('-',  0, 'either'))
-pprint.assign(div, printing.OperatorPrinter('/', -1, 'left'))
+pprint.assign(true_div, printing.OperatorPrinter('/', -1, 'left'))
+pprint.assign(int_div, printing.OperatorPrinter('//', -1, 'left'))
 pprint.assign(pow, printing.OperatorPrinter('**', 1, 'right'))



--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
@@ -324,8 +324,8 @@ class Elemwise(Op):
      Elemwise(add, {0 : 0}) # represents the += operation (x += y)
      Elemwise(add, {0 : 1}) # represents += on the second argument (y += x)
      Elemwise(mul)(rand(10, 5), rand(1, 5)) # the second input is completed along the first dimension to match the first input
-      Elemwise(div)(rand(10, 5), rand(10, 1)) # same but along the second dimension
-      Elemwise(div)(rand(1, 5), rand(10, 1)) # the output has size (10, 5)
+      Elemwise(true_div)(rand(10, 5), rand(10, 1)) # same but along the second dimension
+      Elemwise(int_div)(rand(1, 5), rand(10, 1)) # the output has size (10, 5)
      Elemwise(log)(rand(3, 4, 5))
    """


--- a/theano/tensor/inplace.py
+++ b/theano/tensor/inplace.py
@@ -168,7 +168,11 @@ def mul_inplace(a, b):
    """elementwise multiplication (inplace on `a`)"""

 @_scal_inplace
-def div_inplace(a, b):
+def true_div_inplace(a, b):
+    """elementwise division (inplace on `a`)"""
+
+@_scal_inplace
+def int_div_inplace(a, b):
    """elementwise division (inplace on `a`)"""

 @_scal_inplace
@@ -183,7 +187,8 @@ pprint.assign(add_inplace, printing.OperatorPrinter('+=', -2, 'either'))
 pprint.assign(mul_inplace, printing.OperatorPrinter('*=', -1, 'either'))
 pprint.assign(sub_inplace, printing.OperatorPrinter('-=', -2, 'left'))
 pprint.assign(neg_inplace, printing.OperatorPrinter('-=',  0, 'either'))
-pprint.assign(div_inplace, printing.OperatorPrinter('/=', -1, 'left'))
+pprint.assign(true_div_inplace, printing.OperatorPrinter('/=', -1, 'left'))
+pprint.assign(int_div_inplace, printing.OperatorPrinter('//=', -1, 'left'))
 pprint.assign(pow_inplace, printing.OperatorPrinter('**=', 1, 'right'))



--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -392,7 +392,7 @@ class Canonizer(gof.LocalOptimizer):
            takes one to an arbitrary number of inputs, e.g. add or
            mul
    * inverse: an Op class such that inverse(main(x, y), y) == x
-               e.g. sub or div
+               e.g. sub or true_div
    * reciprocal: a function such that main(x, reciprocal(y)) ==
                  inverse(x, y) e.g. neg or inv

@@ -410,7 +410,7 @@ class Canonizer(gof.LocalOptimizer):
    Examples:
      T = theano.tensor
      add_canonizer = Canonizer(T.add, T.sub, T.neg, lambda n, d: sum(n) - sum(d))
-      mul_canonizer = Canonizer(T.mul, T.div, T.inv, lambda n, d: prod(n) / prod(d))
+      mul_canonizer = Canonizer(T.mul, T.true_div, T.inv, lambda n, d: prod(n) / prod(d))
    
    Examples of optimizations mul_canonizer can perform:
      x / x -> 1
@@ -738,7 +738,7 @@ def mul_calculate(num, denum, aslist = False):
            return [v]
    return v

-local_mul_canonizer = Canonizer(T.mul, T.div, T.inv, mul_calculate, False)
+local_mul_canonizer = Canonizer(T.mul, T.true_div, T.inv, mul_calculate, False)
 register_canonicalize(local_mul_canonizer, name = 'local_mul_canonizer')

 @gof.local_optimizer([T.neg])
@@ -757,9 +757,9 @@ def local_mul_to_neg(node):
        return False
 register_specialize(local_mul_to_neg)

-@gof.local_optimizer([T.div])
+@gof.local_optimizer([T.true_div])
 def local_div_to_inv(node):
-    if node.op == T.div and N.all(local_mul_canonizer.get_constant(node.inputs[0]) == 1.0):
+    if node.op == T.true_div and N.all(local_mul_canonizer.get_constant(node.inputs[0]) == 1.0):
        return [T.inv(local_mul_canonizer.merge_num_denum(node.inputs[1:], []))]
    else:
        return False
@@ -971,7 +971,7 @@ def attempt_distribution(factor, num, denum):
            list(itertools.starmap(local_mul_canonizer.merge_num_denum, neg_pairs))), num, denum

 @gof.local_optimizer([T.mul, T.add, T.mul], [T.mul, T.sub, T.mul],
-                     [T.mul, T.add, T.div], [T.mul, T.sub, T.div])
+                     [T.mul, T.add, T.true_div], [T.mul, T.sub, T.true_div])
 def local_greedy_distributor(node):
    """
    This optimization tries to apply distributivity of multiplication

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -268,7 +268,7 @@ MulInplaceTester = makeBroadcastTester(op = inplace.mul_inplace,
                                         grad = _grad_broadcast_binary_normal,
                                         inplace = True)

-DivTester = makeBroadcastTester(op = div,
+DivTester = makeBroadcastTester(op = true_div,
                                  expected = lambda x, y: x / y,
                                  good = dict(same_shapes = (rand(2, 3), rand(2, 3)),
                                              scalar = (rand(2, 3), rand(1, 1)),
@@ -286,7 +286,7 @@ DivTester = makeBroadcastTester(op = div,
                                              scalar = (rand(2, 3), rand(1, 1)),
                                              row = (rand(2, 3), rand(1, 3)),
                                              column = (rand(2, 3), rand(2, 1))))
-DivInplaceTester = makeBroadcastTester(op = inplace.div_inplace,
+DivInplaceTester = makeBroadcastTester(op = inplace.true_div_inplace,
                                         expected = lambda x, y: x / y,
                                         good = dict(same_shapes = (rand(2, 3), rand(2, 3)),
                                                     scalar = (rand(2, 3), rand(1, 1)),
@@ -1136,6 +1136,31 @@ class T_exp(unittest.TestCase):
            numpy.asarray([[ 1.5089518 ,  1.48439076, -4.7820262 ],
            [ 2.04832468,  0.50791564, -1.58892269]])])

+class T_divimpl(unittest.TestCase):
+    def test_impls(self):
+        i = iscalar()
+        ii = lscalar()
+        d = dscalar()
+        f = fscalar()
+        c = cscalar()
+
+        assert numpy.allclose(function([i, ii, d, f, c], i/d)(5, 3, 7.0, 11.0, complex(5,3)),
+                (5.0/7.0))
+        assert numpy.allclose(function([i, ii, d, f, c], d/i)(5, 3, 7.0, 11.0, complex(5,3)),
+                (7.0/5.0))
+        assert numpy.allclose(function([i, ii, d, f, c], i/f)(5, 3, 7.0, 11.0, complex(5,3)),
+                (5.0/11.0))
+        assert numpy.allclose(function([i, ii, d, f, c], f/i)(5, 3, 7.0, 11.0, complex(5,3)),
+                (11.0/5.0))
+        assert numpy.allclose(function([i, ii, d, f, c], i/ii)(5, 3, 7.0, 11.0, complex(5,3)),
+                (5/3))
+        assert numpy.allclose(function([i, ii, d, f, c], ii/i)(5, 3, 7.0, 11.0, complex(5,3)),
+                (3/5))
+        assert numpy.allclose(function([i, ii, d, f, c], true_div(i,ii))(5, 3, 7.0, 11.0, complex(5,3)),
+                (5./3.))
+        assert numpy.allclose(function([i, ii, d, f, c], true_div(ii,i))(5, 3, 7.0, 11.0, complex(5,3)),
+                (3./5.))
+
 # class T_abs(unittest.TestCase):
 #     def test_impl(self):
 #         t = as_tensor_variable(1.0)