injected new elemwise in tensor.py

af1b2de4 · olivier@olivier-desktop · 761c7f9f · af1b2de4 · af1b2de4 · af1b2de4
--- a/_test_elemwise2.py
+++ b/_test_elemwise2.py
@@ -22,19 +22,37 @@ def env(inputs, outputs, validate = True, features = []):
 class _test_DimShuffle(unittest.TestCase):
-    def test_straightforward(self):
+    def with_linker(self, linker):
-        x, y, z = inputs()
+        for xsh, shuffle, zsh in [((2, 3), (1, 'x', 0), (3, 1, 2)),
-        e0 = DimShuffle(x, [1, 'x', 0]).out
+                                  ((1, 2, 3), (1, 2), (2, 3)),
-        f = gof.PerformLinker(env([x], [e0])).make_function(inplace=True)
+                                  ((1, 2, 1, 3), (1, 3), (2, 3)),
-        assert f(numpy.ones((2, 3))).shape == (3, 1, 2)
+                                  ((2, 3, 4), (2, 1, 0), (4, 3, 2)),
+                                  ((2, 3, 4), ('x', 2, 1, 0, 'x'), (1, 4, 3, 2, 1)),
+                                  ((1, 4, 3, 2, 1), (3, 2, 1), (2, 3, 4)),
+                                  ((1, 1, 4), (1, 2), (1, 4))]:
+            x = modes.build(Tensor('float64', [1 * (entry == 1) for entry in xsh], name = 'x'))
+            e = DimShuffle(x, shuffle).out
+#             print shuffle, e.owner.grad(e.owner.inputs, e.owner.outputs).owner.new_order
+            f = linker(env([x], [e])).make_function(inplace=False)
+            assert f(numpy.ones(xsh)).shape == zsh
+    def test_perform(self):
+        self.with_linker(gof.PerformLinker)
+#     def test_straightforward(self):
+#         x, y, z = inputs()
+#         e0 = DimShuffle(x, [1, 'x', 0]).out
+#         f = gof.PerformLinker(env([x], [e0])).make_function(inplace=True)
+#         assert f(numpy.ones((2, 3))).shape == (3, 1, 2)
 class _test_Broadcast(unittest.TestCase):
    def with_linker(self, linker):
-        for xsh, ysh in [((5, 5), (5, 5)),
+        for xsh, ysh in [((3, 5), (3, 5)),
-                         ((5, 5), (1, 5)),
+                         ((3, 5), (1, 5)),
-                         ((5, 5), (5, 1)),
+                         ((3, 5), (3, 1)),
                         ((1, 5), (5, 1)),
                         ((1, 1), (1, 1)),
                         ((2, 3, 4, 5), (2, 3, 4, 5)),
@@ -53,6 +71,10 @@ class _test_Broadcast(unittest.TestCase):
            yv = numpy.asarray(numpy.random.rand(*ysh))
            zv = xv + yv
+#             print "AAAAAAAAAAAAAAAAAA"
+#             print f(xv, yv)
+#             print zv
+#             print "BBBBBBBBBBBBBBBBBB"
            self.failUnless((f(xv, yv) == zv).all())
    def with_linker_inplace(self, linker):
@@ -105,7 +127,9 @@ class _test_CAReduce(unittest.TestCase):
        for xsh, tosum in [((5, 6), (0, 1)),
                           ((5, 6), (0, )),
                           ((5, 6), (1, )),
-                           ((2, 3, 4, 5), (0, 1, 3))]:
+                           ((5, 6), ()),
+                           ((2, 3, 4, 5), (0, 1, 3)),
+                           ((), ())]:
            x = modes.build(Tensor('float64', [1 * (entry == 1) for entry in xsh], name = 'x'))
            e = CAReduce(Add, [x], dimensions_to_reduce = tosum).out
            f = linker(env([x], [e])).make_function(inplace = False)
@@ -113,7 +137,13 @@ class _test_CAReduce(unittest.TestCase):
            zv = xv
            for axis in reversed(sorted(tosum)):
                zv = numpy.add.reduce(zv, axis)
-            self.failUnless((f(xv) - zv < 1e-10).all())
+#             print "AAAAAAAAAAAAAAAAAA"
+#             print xsh, tosum
+#             print f(xv)
+#             print zv
+#             print f(xv) - zv
+#             print "BBBBBBBBBBBBBBBBBB"
+            self.failUnless((numpy.abs(f(xv) - zv) < 1e-10).all())
    def test_perform(self):
        self.with_linker(gof.PerformLinker)
@@ -123,27 +153,27 @@ class _test_CAReduce(unittest.TestCase):
 if __name__ == '__main__':
-#    unittest.main()
+    unittest.main()
-    x = modes.build(Tensor('float64', [0, 0], name = 'x'))
+#     x = modes.build(Tensor('float64', [0, 0], name = 'x'))
-    y = modes.build(Tensor('float64', [0, 0], name = 'y'))
+#     y = modes.build(Tensor('float64', [0, 0], name = 'y'))
-    e = Broadcast(SquareDiff, (x, y), {0:0}).out
+#     e = Broadcast(SquareDiff, (x, y), {0:0}).out
-    f = gof.CLinker(env([x, y], [e])).make_function(inplace = False)
+#     f = gof.CLinker(env([x, y], [e])).make_function(inplace = False)
-    xv = numpy.random.rand(1000, 1000)
+#     xv = numpy.random.rand(1000, 1000)
-    yv = numpy.random.rand(1000, 1000)
+#     yv = numpy.random.rand(1000, 1000)
-    zv = numpy.random.rand(1000, 1000)
+#     zv = numpy.random.rand(1000, 1000)
-    add = numpy.frompyfunc(lambda x, y: x + y, 2, 1)
+#     add = numpy.frompyfunc(lambda x, y: x + y, 2, 1)
-    t0 = time.time()
+#     t0 = time.time()
-    for i in xrange(100):
+#     for i in xrange(100):
-        xv -= yv
+#         xv -= yv
-        xv *= xv
+#         xv *= xv
-#        xv += yv
+# #        xv += yv
-    print time.time() - t0
+#     print time.time() - t0
-    t0 = time.time()
+#     t0 = time.time()
-    for i in xrange(100):
+#     for i in xrange(100):
-        f(xv, yv)
+#         f(xv, yv)
-    print time.time() - t0
+#     print time.time() - t0

--- a/_test_tensor.py
+++ b/_test_tensor.py
@@ -7,7 +7,9 @@ from compile import Function, eval_outputs
 import gradient
 import gof, gof.graph
 from gof.python25 import any
+import gof
+from elemwise2 import DimShuffle
 def _numpy_checker(x, y):
    """
@@ -58,6 +60,15 @@ def verify_grad(testcase, op_cls, pt, n_tests=1, rng=numpy.random, eps=0.0000001
        if not isinstance(analytic_grad, (list, tuple)):
            analytic_grad = [analytic_grad]
+#         if num_grad.max_err(analytic_grad) > 1.0e-4:
+#             print "aaaaaaaaaa"
+#             print gof.Env(tensor_pt, [cost])
+#             print gof.Env(tensor_pt, symbolic_grad)
+#             print analytic_grad
+#             print num_grad.gf
+#             print num_grad.max_err(analytic_grad)
+#             print "bbbbbbbbbb"
        if num_grad.max_err(analytic_grad) > 1.0e-4:
            raise Exception(verify_grad.E_grad)
 verify_grad.E_grad = 'gradient error exceeded tolerance'
@@ -361,6 +372,15 @@ class T_add(unittest.TestCase):
                f = Function([a,b], [fn(a, b)], linker_cls = gof.CLinker)
                self.failUnless(numpy.all(fn(a.data, b.data) == f(a.data, b.data)))
+    def test_grad_scalar_l(self):
+        verify_grad(self, Add, [numpy.asarray([3.0]), numpy.random.rand(3)])
+    def test_grad_scalar_r(self):
+        verify_grad(self, Add, [numpy.random.rand(3), numpy.asarray([3.0])])
+    def test_grad_row(self):
+        verify_grad(self, Add, [numpy.random.rand(3, 5), numpy.random.rand(1, 5)])
+    def test_grad_col(self):
+        verify_grad(self, Add, [numpy.random.rand(3, 5), numpy.random.rand(3, 1)])
 class T_abs(unittest.TestCase):
    def test_impl(self):
@@ -381,8 +401,8 @@ class T_abs(unittest.TestCase):
    class AbsBadGrad(tensor._Elemwise):
        def impl(self, x):
            return numpy.abs(x)
-        def grad(self, x, gz):
+        def grad(self, (x, ), (gz, )):
-            return scale(gz * sgn(x),0.9)
+            return mul(gz * sgn(x),0.9),
        def c_foreach(self, (x_i, ), (z_i, )):
            return "z_i = abs(x_i);"
@@ -401,7 +421,7 @@ class T_fill(unittest.TestCase):
        o = t.owner
        self.failUnless(o.inputs[0].broadcastable == (0,))
 #        self.failUnless(o.inputs[0].dtype[0:3] == 'int')
-        self.failUnless(o.inputs[1].broadcastable == ())
+        self.failUnless(o.inputs[1].broadcastable == (1,))
 #        self.failUnless(o.inputs[1].dtype[0:3] == 'flo')
        self.failUnless(o.outputs[0].broadcastable == (0,))
 #        self.failUnless(o.outputs[0].dtype[0:3] == 'flo')
@@ -432,47 +452,70 @@ class T_mul(unittest.TestCase):
    def test_elemwise(self):
        a = astensor(0.0)
        b = astensor(0.0)
-        check_eq2_both(self, [a,b], mul_elemwise(a,b), [3.0, 4.0], 12.0)
+        check_eq2_both(self, [a,b], mul(a,b), [3.0, 4.0], 12.0)
-        check_eq2_both(self, [a,b], mul_elemwise(b,a), [-1.0,2.0], -2.0)
+        check_eq2_both(self, [a,b], mul(b,a), [-1.0,2.0], -2.0)
-        self.failUnless(isinstance(mul(a,b).owner, Scale))
+        #self.failUnless(isinstance(mul(a,b).owner, Scale))
        a = astensor(numpy.ones(2))
        b = astensor(numpy.ones(2))
        aa = numpy.asarray([-0.5, 4.0])
        bb = numpy.asarray([-0.5, 2.0])
-        check_eq2_both(self, [a,b], mul_elemwise(a,b), [aa,bb], numpy.asarray([0.25, 8.0]))
+        check_eq2_both(self, [a,b], mul(a,b), [aa,bb], numpy.asarray([0.25, 8.0]))
-        check_eq2_both(self, [a,b], mul_elemwise(a,b), [bb,aa], numpy.asarray([0.25, 8.0]))
+        check_eq2_both(self, [a,b], mul(a,b), [bb,aa], numpy.asarray([0.25, 8.0]))
-        self.failUnless(isinstance(mul(a,b).owner, MulElemwise))
+        #self.failUnless(isinstance(mul(a,b).owner, MulElemwise))
    def test_scalar(self):
        r = numpy.random.rand(2,3)
        a = astensor(r)
        b = astensor(2.0)
-        check_eq2_both(self, [a,b], scale(a,b), [r, 2.0], r*2.0)
+        check_eq2_both(self, [a,b], mul(a,b), [r, 2.0], r*2.0)
-        check_eq2_both(self, [a,b], scale(a,b), [r, 4.0], r*4.0)
+        check_eq2_both(self, [a,b], mul(a,b), [r, 4.0], r*4.0)
        self.failUnless(b.data == 2.0)
-    def test_operator(self):
+    def test_rowcol(self):
-        a = astensor([1,1])
+        r1 = numpy.random.rand(3,5)
-        aa = astensor([1,1])
+        r2 = numpy.random.rand(1,5)
-        b = astensor(4)
+        r3 = numpy.random.rand(3,1)
-        self.failUnless(isinstance((a*b).owner, Scale))
+        a1, a2, a3 = astensor(r1), astensor(r2), astensor(r3)
-        self.failUnless(isinstance((b*a).owner, Scale))
+        check_eq2_both(self, [a1,a2], mul(a1,a2), [r1, r2], r1*r2)
-        self.failUnless(isinstance((a*aa).owner, MulElemwise))
+        check_eq2_both(self, [a1,a3], mul(a1,a3), [r1, r3], r1*r3)
-        self.failUnless(isinstance((aa*a).owner, MulElemwise))
+    def test_grad_elemwise(self):
+        verify_grad(self, Mul, [numpy.random.rand(3,4), numpy.random.rand(3,4)])
+    def test_grad_scalar_l(self):
+        verify_grad(self, Mul, [numpy.asarray([3.0]), numpy.random.rand(3)])
+    def test_grad_scalar_r(self):
+        verify_grad(self, Mul, [numpy.random.rand(3), numpy.asarray([3.0])])
+    def test_grad_row(self):
+        verify_grad(self, Mul, [numpy.random.rand(3, 5), numpy.random.rand(1, 5)])
+    def test_grad_row2(self):
+        op = lambda x, y: Mul(x, DimShuffle(y, ['x', 0]).out)
+        verify_grad(self, op, [numpy.random.rand(3, 5), numpy.random.rand(5)])
+    def test_grad_col(self):
+        verify_grad(self, Mul, [numpy.random.rand(3, 5), numpy.random.rand(3, 1)])
+#     def test_operator(self):
+#         a = astensor([1,1])
+#         aa = astensor([1,1])
+#         b = astensor(4)
+#         self.failUnless(isinstance((a*b).owner, Scale))
+#         self.failUnless(isinstance((b*a).owner, Scale))
+#         self.failUnless(isinstance((a*aa).owner, MulElemwise))
+#         self.failUnless(isinstance((aa*a).owner, MulElemwise))
    def test_wrong_shapes(self):
        a = astensor(numpy.ones(3))
        b = astensor(numpy.ones(4))
        try:
-            check_eq2(self, [a,b], MulElemwise(a,b).out,
+            check_eq2(self, [a,b], Mul(a,b).out,
                      [numpy.ones(3), numpy.ones(4)], 1.0)
            self.fail()
        except ValueError, e:
-            self.failUnless(e[0] is tensor._assert_same_shapes.E_shape)
+            self.failUnless('shape mismatch' in str(e))
        try:
-            check_eq2_c(self, [a,b], MulElemwise(a,b).out,
+            check_eq2_c(self, [a,b], Mul(a,b).out,
                        [numpy.ones(3), numpy.ones(4)], 1.0)
            self.fail()
        except ValueError, e:
@@ -482,14 +525,14 @@ class T_div(unittest.TestCase):
    def setUp(self):
        numpy.random.seed(9999)
    def test_grad_e(self):
-        verify_grad(self, DivElemwise, [numpy.ones(()), numpy.ones(())])
+        verify_grad(self, Div, [numpy.random.rand(3), numpy.ones(3)])
-        verify_grad(self, DivElemwise, [numpy.random.rand(3), numpy.ones(3)])
+        verify_grad(self, Div, [numpy.random.rand(3,5), numpy.random.rand(3,5)+0.1])
-        verify_grad(self, DivElemwise, [numpy.random.rand(3,5), numpy.random.rand(3,5)+0.1])
+        verify_grad(self, Div, [numpy.ones(()), numpy.ones(())])
    def test_grad_sl(self):
-        verify_grad(self, DivElemwise, [numpy.ones(()), numpy.ones(())])
+        verify_grad(self, Div, [numpy.ones((3, 5)), numpy.ones((1, 1))])
-        verify_grad(self, DivElemwise, [numpy.random.rand(3), numpy.ones(3)])
+        verify_grad(self, Div, [numpy.random.rand(3), numpy.ones((1, ))])
-        verify_grad(self, DivElemwise, [numpy.random.rand(3,5), numpy.random.rand(3,5)+0.1])
+        verify_grad(self, Div, [numpy.random.rand(3,5), numpy.random.rand(1,1)])
 class T_log2(unittest.TestCase):
    def test0(self):
@@ -509,12 +552,16 @@ class T_pow(unittest.TestCase):
    def setUp(self):
        numpy.random.seed(9999)
    def test_elemwise(self):
-        verify_grad(self, DivElemwise, [numpy.random.rand(3,4), numpy.random.rand(3,4)+0.1])
+        verify_grad(self, Div, [numpy.random.rand(3,4), numpy.random.rand(3,4)+0.1])
-        verify_grad(self, PowElemwise, [numpy.random.rand(3,4), numpy.random.rand(3,4)])
+        verify_grad(self, Pow, [numpy.random.rand(3,4), numpy.random.rand(3,4)])
    def test_scalar_l(self):
-        verify_grad(self, PowScalarL, [numpy.random.rand(3), numpy.asarray(3.0)])
+        verify_grad(self, Pow, [numpy.asarray([3.0]), numpy.random.rand(3)])
    def test_scalar_r(self):
-        verify_grad(self, PowScalarR, [numpy.random.rand(3), numpy.asarray(3.0)])
+        verify_grad(self, Pow, [numpy.random.rand(3), numpy.asarray([3.0])])
+    def test_row(self):
+        verify_grad(self, Pow, [numpy.random.rand(3, 5), numpy.random.rand(1, 5)])
+    def test_col(self):
+        verify_grad(self, Pow, [numpy.random.rand(3, 5), numpy.random.rand(3, 1)])
 class _testCase_matinv(unittest.TestCase):

--- a/base_tensor.py
+++ b/base_tensor.py
@@ -94,7 +94,7 @@ class BaseTensor(ResultBase):
                    'complex128': (complex, 'theano_complex128', 'NPY_COMPLEX128'),
                    'complex64': (complex, 'theano_complex64', 'NPY_COMPLEX64')}[self.dtype]
        except KeyError:
-            raise TypeError("Unsupported dtype for BaseTensor: %s" % self.dtype)
+            raise TypeError("Unsupported dtype for %s: %s" % (self.__class__.__name__, self.dtype))
    #
    # Hash for constant folding

--- a/elemwise2.py
+++ b/elemwise2.py
@@ -3,12 +3,16 @@ import elemwise_cgen as cgen
 import numpy
 from gof import Op, Viewer, Destroyer
-from tensor import Tensor
+from base_tensor import BaseTensor as Tensor
 from scalar import upcast, Scalar
 import scalar_ops
 import gof
+def astensor(data):
+    assert isinstance(data, Tensor)
+    return data
 ##################
 ### DimShuffle ###
@@ -18,6 +22,8 @@ class DimShuffle(Op, Viewer):
    def __init__(self, input, new_order, inplace = True):
+        input = astensor(input)
        ib = input.broadcastable
        ob = []
        for value in new_order:
@@ -36,12 +42,22 @@ class DimShuffle(Op, Viewer):
        self.inplace = inplace
-        self.numorder = [x for x in new_order if type(x) == int]
+        self.drop = []
-        self.is_transposition = sorted(new_order) == range(len(ib))
+        self.augment = []
-        self.dup_dims = len(set(self.numorder)) != len(self.numorder)
+        i2j = {}
-        self.all_dims = len(set(self.numorder)) == len(ib)
+        j = 0
-        if self.dup_dims or not self.all_dims:
+        for i, b in enumerate(ib):
-            raise NotImplementedError("You must provide a permutation of *all* the input dimensions with *no duplicates*.")
+            if i not in new_order:
+                if b == 1:
+                    self.drop.append(i)
+                else:
+                    raise NotImplementedError("You cannot drop a non-broadcastable dimension.")
+            else:
+                i2j[i] = j
+                j += 1
+        self.shuffle = [i2j[x] for x in new_order if x != 'x']
+        self.augment = [i for i, x in enumerate(new_order) if x == 'x']
    def clone_with_new_inputs(self, *new_inputs):
        return DimShuffle(new_inputs[0], self.new_order, self.inplace)
@@ -53,19 +69,31 @@ class DimShuffle(Op, Viewer):
            return {}
    def perform(self):
-        res = self.inputs[0].data.transpose(self.numorder)
+        res = self.inputs[0].data
        shape = list(res.shape)
-        new_shape = []
+        for drop in reversed(self.drop):
-        for entry in self.new_order:
+            shape.pop(drop)
-            if entry == 'x':
+        res = res.reshape(shape)
-                new_shape.append(1)
-            else:
+        res = res.transpose(self.shuffle)
-                new_shape.append(shape.pop(0))
-        res = res.reshape(new_shape)
+        shape = list(res.shape)
+        for augm in self.augment:
+            shape.insert(augm, 1)
+        res = res.reshape(shape)
        if not self.inplace:
            res = numpy.copy(res)
        self.outputs[0].data = res
+    def grad(self, (x, ), (gz, )):
+        grad_order = ['x'] * len(self.inputs[0].broadcastable)
+        for i, x in enumerate(self.new_order):
+            if x != 'x':
+                grad_order[x] = i
+        return DimShuffle(gz, grad_order).out,
    def __str__(self):
        return "%s(%s, %s)" % (self.__class__.__name__, str(self.inputs[0]), self.new_order)
@@ -90,6 +118,9 @@ class Transpose(DimShuffle):
 class Broadcast(Op, Destroyer):
    def __init__(self, scalar_opclass, inputs, inplace_pattern = {}):
+        inputs = map(astensor, inputs)
        try:
            assert len(set([len(input.broadcastable) for input in inputs])) == 1
        except (AssertionError, AttributeError):
@@ -141,15 +172,29 @@ class Broadcast(Op, Destroyer):
            if r in scalar_ograds:
                return ograds[scalar_ograds.index(r)]
            op = r.owner
+            if op is None:
+                b = [1] * len(inputs[0].broadcastable)
+                res = astensor(numpy.asarray(r.data).reshape(b),
+                               broadcastable = b)
+                return res
            op_class = op.__class__
-            bcasted = Broadcast(op_class, [transform(input) for input in op.inputs], {})
+            bcasted = Broadcast(op_class, [transform(input) for input in op.inputs], {}).out
            return bcasted
        ret = []
        for scalar_igrad, input in zip(scalar_igrads, inputs):
            r = transform(scalar_igrad)
            to_sum = [i for i, bcast in enumerate(input.broadcastable) if bcast]
            if to_sum:
+                shuffle = []
+                j = 0
+                for bcast in input.broadcastable:
+                    if bcast == 1:
+                        shuffle.append('x')
+                    else:
+                        shuffle.append(j)
+                        j += 1
                sr = Sum(r, axis = to_sum).out
+                sr = DimShuffle(sr, shuffle).out
                ret.append(sr)
            else:
                ret.append(r)
@@ -269,16 +314,19 @@ def make_broadcast(scalar_opclass, inplace_pattern = {}, name = None):
        New.__name__ = "Tensor" + scalar_opclass.__name__
    return New
-def broadcast(op):
+def wrap_broadcast(op):
    def instantiate(*inputs):
+        inputs = map(astensor, inputs)
        target_length = max([len(input.broadcastable) for input in inputs])
        args = []
        for input in inputs:
-            difference = target_length - len(input.broadcastable)
+            length = len(input.broadcastable)
+            difference = target_length - length
            if not difference:
                args.append(input)
            else:
-                args.append(DimShuffle(input, ['x']*difference + range(length)))
+                args.append(DimShuffle(input, ['x']*difference + range(length)).out)
        return op(*args)
    return instantiate
@@ -319,6 +367,8 @@ class CAReduce(Op):
    """
    def __init__(self, scalar_opclass, inputs, dimensions_to_reduce = None):
+        inputs = map(astensor, inputs)
        if scalar_opclass.nin != 2 or scalar_opclass.nout != 1:
            raise NotImplementedError("CAReduce only supports binary functions with a single output.")
        if len(inputs) != 1:
@@ -346,9 +396,13 @@ class CAReduce(Op):
    def perform(self):
        result = self.inputs[0].data
-        for dimension in reversed(sorted(self.dimensions_to_reduce)):
+        to_reduce = reversed(sorted(self.dimensions_to_reduce))
+        if to_reduce:
+            for dimension in to_reduce:
                result = self.ufunc.reduce(result, dimension)
            self.outputs[0].data = result
+        else:
+            self.outputs[0].data = numpy.copy(result)
    def _c_all(self, inames, onames, sub):
@@ -363,6 +417,9 @@ class CAReduce(Op):
        tosum = self.dimensions_to_reduce
+        if tosum == ():
+            return Broadcast(scalar_ops.Identity, (input, ))._c_all(inames, onames, sub)
        order1 = [i for i in xrange(len(input.broadcastable)) if i not in tosum]
        order = order1 + list(tosum)
@@ -459,7 +516,19 @@ def make_reduce(scalar_opclass, name = None):
        New.__name__ = "Reduce" + scalar_opclass.__name__
    return New
-Sum = make_reduce(scalar_ops.Add, name = 'Sum')
+class Sum(make_reduce(scalar_ops.Add)):
+    def grad(self, (x, ), (gz, )):
+        if self.dimensions_to_reduce == ():
+            return gz,
+        new_dims = []
+        i = 0
+        for j, _ in enumerate(x.broadcastable):
+            if j in self.dimensions_to_reduce:
+                new_dims.append('x')
+            else:
+                new_dims.append(i)
+                i += 1
+        return Broadcast(scalar_ops.Second, (x, DimShuffle(gz, new_dims).out)).out, 
 def reduce(op):

--- a/gof/cc.py
+++ b/gof/cc.py
@@ -832,8 +832,14 @@ class DualLinker(Linker):
        op_order_1 = env1.toposort()
        op_order_2 = [equiv[op.outputs[0]].owner for op in op_order_1] # we need to have the exact same order so we can compare each step
+        def c_make_thunk(op):
+            try:
+                return CLinker(op).make_thunk(True)[0]
+            except AbstractFunctionError:
+                return op.perform
        thunks1 = [op.perform for op in op_order_1]
-        thunks2 = [CLinker(op).make_thunk(True)[0] for op in op_order_2]
+        thunks2 = [c_make_thunk(op) for op in op_order_2]
        def f():
            for input1, input2 in zip(env1.inputs, env2.inputs):

--- a/gradient.py
+++ b/gradient.py
@@ -76,14 +76,17 @@ def grad_sources_inputs(sources, graph_inputs):
        #if all output gradients are None, continue
        if all(map(lambda x:x is None, g_outputs)): continue
-        output_arg = _unpack_result(g_outputs)
+#         output_arg = _unpack_result(g_outputs)
-        input_arg = _unpack_result(op.inputs)
+#         input_arg = _unpack_result(op.inputs)
+        output_arg = g_outputs
+        input_arg = op.inputs
        op_grad = op.grad(input_arg, output_arg)
        if op_grad is None:
            raise ValueError(_msg_retNone, op.__class__)
        if isinstance(op_grad, float):
            raise TypeError('wtf!!!!!!!!', op)
-        g_inputs = _pack_result(op_grad)
+        g_inputs = op_grad #_pack_result(op_grad)
        assert isinstance(g_inputs, (list, tuple))
        if len(g_inputs) != len(op.inputs):
            raise ValueError(_msg_badlen, 
@@ -123,6 +126,10 @@ class numeric_grad:
        """
        gf = [numpy.ndarray(x.shape) for x in pt]
        f_pt = f(*pt)
+        if isinstance(f, (list, tuple)):
+            f_pt = [numpy.copy(x) for x in f_pt]
+        else:
+            f_pt = numpy.copy(f_pt)
        for idx in xrange(len(gf)):
            if len(pt[idx].shape) == 0:

--- a/scalar.py
+++ b/scalar.py
@@ -12,6 +12,10 @@ def as_scalar(x, name = None):
        s = Scalar('float64', name = name)
        s.data = x
        return s
+    if isinstance(x, int):
+        s = Scalar('int32', name = name)
+        s.data = x
+        return s
    if isinstance(x, Scalar):
        return x
@@ -45,7 +49,8 @@ class Scalar(ResultBase):
 #             and self.data == other.data
    def dtype_specs(self):
-        return {'float64': (float, 'double', 'PyFloat_Check', 'PyFloat_AsDouble', 'PyFloat_FromDouble')}[self.dtype]
+        return {'float64': (float, 'npy_float64', 'PyFloat_Check', 'PyFloat_AsDouble', 'PyFloat_FromDouble'),
+                'int32': (int, 'npy_int32', 'PyInt_Check', 'PyInt_AsLong', 'PyInt_FromLong')}[self.dtype]
    def c_declare(self, name, sub):
        return """

--- a/scalar_ops.py
+++ b/scalar_ops.py
@@ -18,7 +18,7 @@ class Sub(BinaryScalarOp):
    def c_code(self, (x, y), (z, ), sub):
        return "%(z)s = %(x)s - %(y)s;" % locals()
    def grad(self, (x, y), (gz, )):
-        return gz, -gz
+        return gz, neg(gz)
 class Mul(BinaryScalarOp):
    def impl(self, x, y):
@@ -34,62 +34,119 @@ class Div(BinaryScalarOp):
    def c_code(self, (x, y), (z, ), sub):
        return "%(z)s = %(x)s / %(y)s;" % locals()
    def grad(self, (x, y), (gz, )):
-        return div(gz, y), -div(mul(x, gz), y*y)
+        return div(gz, y), neg(div(mul(x, gz), mul(y, y)))
 class Pow(BinaryScalarOp):
    def impl(self, x, y):
        return x ** y
    def c_code(self, (x, y), (z, ), sub):
        return "%(z)s = pow(%(x)s, %(y)s);" % locals()
+    def grad(self, (x, y), (gz, )):
+        return mul(gz, mul(y, pow(x, sub(y, as_scalar(1))))), mul(gz, mul(log(x), pow(x, y)))
 class First(BinaryScalarOp):
    def impl(self, x, y):
        return x
    def c_code(self, (x, y), (z, ), sub):
        return "%(z)s = %(x)s;" % locals()
+    def grad(self, (x, y), (gz, )):
+        return gz, None
 class Second(BinaryScalarOp):
    def impl(self, x, y):
        return y
    def c_code(self, (x, y), (z, ), sub):
        return "%(z)s = %(y)s;" % locals()
+    def grad(self, (x, y), (gz, )):
+        return None, gz
-class SquareDiff(BinaryScalarOp):
+# class SquareDiff(BinaryScalarOp):
-    def impl(self, x, y):
+#     def impl(self, x, y):
-        diff = (x - y)
+#         diff = (x - y)
-        return diff * diff
+#         return diff * diff
-    def c_code(self, (x, y), (z, ), sub):
+#     def c_code(self, (x, y), (z, ), sub):
-        return "%(z)s = %(x)s - %(y)s; %(z)s *= %(z)s;" % locals()
+#         return "%(z)s = %(x)s - %(y)s; %(z)s *= %(z)s;" % locals()
+class Identity(UnaryScalarOp):
+    def impl(self, x):
+        return x
+    def c_code(self, (x, ), (z, ), sub):
+        return "%(z)s = %(x)s;" % locals()
+    def grad(self, (x, y), (gz, )):
+        return gz,
 class Neg(UnaryScalarOp):
    def impl(self, x):
        return -x
    def grad(self, (x, ), (gz, )):
-        return -gz
+        return neg(gz),
    def c_code(self, (x, ), (z, ), sub):
        return "%(z)s = -%(x)s;" % locals()
+class Abs(UnaryScalarOp):
+    def impl(self, x):
+        return numpy.abs(x)
+    def grad(self, (x, ), (gz, )):
+        return mul(gz, sgn(x)),
+    def c_code(self, (x, ), (z, ), sub):
+        return "%(z)s = abs(%(x)s);" % locals()
+class Sgn(UnaryScalarOp):
+    def impl(self, x):
+        return numpy.abs(x) / x
+    def grad(self, (x, ), (gz, )):
+        return None,
+    def c_code(self, (x, ), (z, ), sub):
+        return "%(z)s = %(x)s/abs(%(x)s);" % locals() # TODO: C use copysign
 class Inv(UnaryScalarOp):
    def impl(self, x):
        return 1 / x
    def grad(self, (x, ), (gz, )):
-        return -gz / (x*x)
+        return div(neg(gz), mul(x, x)),
    def c_code(self, (x, ), (z, ), sub):
        return "%(z)s = 1 / %(x)s;" % locals()
 class Log(UnaryScalarOp):
    def impl(self, x):
        return math.log(x)
+    def grad(self, (x, ), (gz, )):
+        return div(gz, x),
    def c_code(self, (x, ), (z, ), sub):
        return "%(z)s = log(%(x)s);" % locals()
+class Log2(UnaryScalarOp):
+    def impl(self, x):
+        return numpy.log2(x)
+    def grad(self, (x, ), (gz, )):
+        return div(gz, mul(x, as_scalar(math.log(2.0)))),
+    def c_code(self, (x, ), (z, ), sub):
+        return "%(z)s = log2(%(x)s);" % locals()
 class Exp(UnaryScalarOp):
    def impl(self, x):
        return math.exp(x)
+    def grad(self, (x, ), (gz, )):
+        return mul(gz, exp(x)),
    def c_code(self, (x, ), (z, ), sub):
        return "%(z)s = exp(%(x)s);" % locals()
+class Sqr(UnaryScalarOp):
+    def impl(self, x):
+        return x*x
+    def grad(self, (x, ), (gz, )):
+        return mul(gz, mul(x, as_scalar(2))),
+    def c_code(self, (x, ), (z, ), sub):
+        return "%(z)s = %(x)s * %(x)s;" % locals()
+class Sqrt(UnaryScalarOp):
+    def impl(self, x):
+        return math.sqrt(x)
+    def grad(self, (x, ), (gz, )):
+        return div(mul(gz, as_scalar(0.5)), sqrt(x)),
+    def c_code(self, (x, ), (z, ), sub):
+        return "%(z)s = sqrt(%(x)s);" % locals()
 # class Sigmoid(UnaryComposite):
 #     def expand_impl(self, x):

--- a/tensor.py
+++ b/tensor.py