merge

b51535d9 · Olivier Breuleux · cf18fe68 · d99f7648 · b51535d9 · b51535d9
--- a/_test_gradient.py
+++ b/_test_gradient.py
@@ -4,23 +4,11 @@
 #
 import unittest
 import numpy
-import tensor_ops as T
-import tensor
 import gof
 from gradient import *
 import gradient
-class posneg(T.TensorOp):
-    nout=2
-    def impl(self, x): return x, -x
-    def grad(self, x, (gpos, gneg)): return gpos - gneg
-class posnegzero(T.TensorOp):
-    nout=3
-    def impl(self, x): return x, -x, 0.0
-    def grad(self, x, (gpos, gneg, gzero)): return gpos - gneg
 class _test_grad_sources_inputs(unittest.TestCase):
    def test_retNone1(self): 
        """Test that it is not ok to return None from op.grad()"""

--- a/_test_tensor.py
+++ b/_test_tensor.py
+from tensor import *
+import tensor as T
+import unittest
+from copy import copy
+from compile import Function
+import gradient
+#TODO: consider moving this function / functionality to gradient.py
+#      rationale: it's tricky, and necessary everytime you want to verify
+#      gradient numerically
+def verify_grad(testcase, op_cls, pt_list, n_tests=1, rng=numpy.random, eps=0.0000001, tol=0.0001):
+    """testcase.failUnless( analytic gradient matches finite-diff gradient) """
+    for test_num in xrange(n_tests):
+        for pt in pt_list:
+            tensor_pt = [tensor(p) for p in pt]
+            o = op_cls(*tensor_pt)
+            if len(o.outputs) > 1:
+                raise NotImplementedError('cant (yet) autotest gradient of op with multiple outputs')
+                # we could make loop over outputs making random projections R for each,
+                # but this doesn't handle the case where not all the outputs are
+                # differentiable... so I leave this as TODO for now -jsb.
+            o_fn = Function(tensor_pt, o.outputs)
+            o_fn_out = o_fn(*pt)
+            random_projection = rng.rand(*o_fn_out.shape)
+            t_r = tensor(random_projection)
+            #random projection of o onto t_r
+            cost = sum(t_r * o.outputs[0])
+            cost_fn = Function(tensor_pt, [cost])
+            num_grad = gradient.numeric_grad(cost_fn, pt)
+            grad_fn = Function(tensor_pt, gradient.grad(cost, tensor_pt))
+            analytic_grad = grad_fn()
+            if not isinstance(analytic_grad, (list, tuple)):
+                analytic_grad = [analytic_grad]
+            if num_grad.max_err(analytic_grad) > 1.0e-4:
+                raise Exception(verify_grad.E_grad)
+verify_grad.E_grad = 'gradient error exceeded tolerance'
+class T_tensor(unittest.TestCase):
+    def test0(self): # allocate from a scalar float
+        t = tensor(1.0)
+        self.failUnless(isinstance(t, Tensor))
+        self.failUnless(t.dtype == 'float64')
+        self.failUnless(t.broadcastable == ())
+        self.failUnless(t.role == None)
+        self.failUnless(isinstance(t.data, numpy.ndarray))
+        self.failUnless(str(t.data.dtype) == 'float64')
+        self.failUnless(t.data == 1.0)
+    def test0_int(self): # allocate from a scalar float
+        t = tensor(1)
+        self.failUnless(isinstance(t, Tensor))
+        self.failUnless(t.dtype == 'int64')
+    def test1(self): # allocate from a vector of ints, not broadcastable
+        t = tensor(numpy.ones(5,dtype='int32'))
+        self.failUnless(isinstance(t, Tensor))
+        self.failUnless(t.dtype == 'int32')
+        self.failUnless(t.broadcastable == (0,))
+        self.failUnless(isinstance(t.data, numpy.ndarray))
+        self.failUnless(str(t.data.dtype) == 'int32')
+    def test2(self): # allocate from a column matrix of complex with name
+        t = tensor(numpy.ones((5,1),dtype='complex64'),name='bart')
+        self.failUnless(isinstance(t, Tensor))
+        self.failUnless(t.dtype == 'complex64')
+        self.failUnless(t.broadcastable == (0,1))
+        self.failUnless(isinstance(t.data, numpy.ndarray))
+        self.failUnless(t.name == 'bart')
+    def test2b(self): # allocate from a column matrix, not broadcastable
+        t = tensor(numpy.ones((5,1),dtype='complex64'),broadcastable=0)
+        self.failUnless(isinstance(t, Tensor))
+        self.failUnless(t.dtype == 'complex64')
+        self.failUnless(t.broadcastable == (0,0))
+        self.failUnless(isinstance(t.data, numpy.ndarray))
+    def test_data_normal(self): #test that assigning to .data works when it should
+        t = tensor(numpy.ones((5,1),dtype='complex64'), broadcastable=0)
+        o27 = numpy.ones((2,7))
+        t.data = o27
+        lst = t._data
+        self.failUnless(t.data.shape == (2,7))
+        self.failUnless(t.data is o27)
+        self.failUnless(t._data is lst)
+    def test_data_badrank0(self):
+        t = tensor(numpy.ones((5,1),dtype='complex64'), broadcastable=0)
+        try:
+            t.data = numpy.ones((2,7,1))
+            self.fail()
+        except ValueError, e:
+            self.failUnless(e[0] is Tensor.filter.E_rank)
+        try:
+            t.data = numpy.ones(1)
+            self.fail()
+        except ValueError, e:
+            self.failUnless(e[0] is Tensor.filter.E_rank)
+    def test_data_badrank1(self):
+        t = tensor(numpy.ones((1,1),dtype='complex64'), broadcastable=1)
+        try:
+            t.data = numpy.ones((1,1,1))
+            self.fail()
+        except ValueError, e:
+            self.failUnless(e[0] is Tensor.filter.E_rank)
+        try:
+            t.data = numpy.ones(1)
+            self.fail()
+        except ValueError, e:
+            self.failUnless(e[0] is Tensor.filter.E_rank)
+    def test_data_badshape0(self):
+        t = tensor(numpy.ones((1,1),dtype='complex64'), broadcastable=1)
+        try:
+            t.data = numpy.ones((1,2))
+            self.fail()
+        except ValueError, e:
+            self.failUnless(e[0] is Tensor.filter.E_shape)
+        try:
+            t.data = numpy.ones((0,1))
+            self.fail()
+        except ValueError, e:
+            self.failUnless(e[0] is Tensor.filter.E_shape)
+class T_stdlib(unittest.TestCase):
+    def test0(self):
+        t = tensor(1.0)
+        tt = copy(t)
+        self.failUnless(t.dtype == tt.dtype)
+        self.failUnless(t.broadcastable == tt.broadcastable)
+        self.failUnless(t.broadcastable is tt.broadcastable)
+        self.failIf(t.data is tt.data)
+def check_eq(self, node_in, node_out, arg_in, arg_out):
+    fn = Function([node_in], [node_out])
+    self.failUnless( numpy.all(fn(arg_in) == arg_out), (arg_in, arg_out))
+def check_eq2(self, inputs, output, args_in, arg_out):
+    fn = Function(inputs, [output])
+    val = fn(*args_in)
+    self.failUnless( numpy.all(val == arg_out), (val, arg_out))
+class T_abs(unittest.TestCase):
+    def test_impl(self):
+        t = tensor(1.0)
+        check_eq(self, t, abs(t), 1.0, 1.0)
+        check_eq(self, t, abs(t), -1.0, 1.0)
+        t = tensor([0.0, 0.0])
+        d = numpy.asarray([-0.4, 1.2])
+        check_eq(self, t, abs(t), d, abs(d))
+        check_eq(self, t, abs(t), -d, abs(-d))
+    def test_grad(self):
+        verify_grad(self, Abs, [[numpy.ones(())], [numpy.ones(3)]])
+    class AbsBadGrad(T._Elemwise):
+        def impl(self, x):
+            return numpy.abs(x)
+        def grad(self, x, gz):
+            return -gz * sgn(x)
+        def c_foreach(self, (x_i, ), (z_i, )):
+            return "z_i = abs(x_i);"
+    def test_badgrad(self):
+        try:
+            verify_grad(self, T_abs.AbsBadGrad, [[numpy.ones(())], [numpy.ones(3)]])
+            self.fail()
+        except Exception, e:
+            self.failUnless(str(e) == verify_grad.E_grad, str(e))
+class T_sum(unittest.TestCase):
+    def test_impl(self):
+        t = tensor(0.0)
+        check_eq(self, t, Sum(t).out, 1.0, 1.0)
+        check_eq(self, t, Sum(t).out, -1.0, -1.0)
+        t = tensor([0.0, 0.0])
+        d = numpy.asarray([-0.4, 1.2])
+        check_eq(self, t, Sum(t).out, d, numpy.sum(d))
+        check_eq(self, t, Sum(t).out, -d, -numpy.sum(d))
+class T_mul(unittest.TestCase):
+    def test_elemwise(self):
+        a = tensor(0.0)
+        b = tensor(0.0)
+        check_eq2(self, [a,b], mul_elemwise(a,b), [3.0, 4.0], 12.0)
+        check_eq2(self, [a,b], mul_elemwise(a,a), [-1.0,2.0], 1.0)
+        check_eq2(self, [a,b], mul(a,b), [3.0, 4.0], 12.0)
+        check_eq2(self, [a,b], mul(a,a), [-1.0,2.0], 1.0)
+        a = tensor(numpy.ones(2))
+        b = tensor(numpy.ones(2))
+        aa = numpy.asarray([-0.5, 4.0])
+        bb = numpy.asarray([-0.5, 2.0])
+        check_eq2(self, [a,b], mul_elemwise(a,b), [aa,bb], numpy.asarray([0.25, 8.0]))
+        check_eq2(self, [a,b], mul_elemwise(a,b), [aa,aa], numpy.asarray([0.25, 16.0]))
+        check_eq2(self, [a,b], mul(a,b), [aa,bb], numpy.asarray([0.25, 8.0]))
+        check_eq2(self, [a,b], mul(a,b), [aa,aa], numpy.asarray([0.25, 16.0]))
+    def test_wrong_shapes(self):
+        a = tensor(numpy.ones(3))
+        b = tensor(numpy.ones(4))
+        try:
+            check_eq2(self, [a,b], MulElemwise(a,b).out,
+                    [numpy.ones(3), numpy.ones(4)], 1.0)
+            self.fail()
+        except ValueError, e:
+            self.failUnless(e is T._assert_same_shapes.E_shape)
+if __name__ == '__main__':
+    unittest.main()
--- a/compile.py
+++ b/compile.py
@@ -2,7 +2,7 @@
 import gof
-#TODO: put together some default optimizations
+#TODO: put together some default optimizations (TRAC #67)
 _optimizations = None

--- a/gof/_test_cc.py
+++ b/gof/_test_cc.py
@@ -9,8 +9,9 @@ from env import Env
 class Double(ResultBase):
    def __init__(self, data, name = "oignon"):
+        ResultBase.__init__(self, role = None, name = name)
        assert isinstance(data, float)
-        ResultBase.__init__(self, role = None, data = data, name = name)
+        self.data = data
    def __str__(self):
        return self.name

--- a/gof/_test_ext.py
+++ b/gof/_test_ext.py
@@ -13,7 +13,8 @@ from toolbox import EquivTool
 class MyResult(ResultBase):
    def __init__(self, name):
-        ResultBase.__init__(self, role = None, data = [1000], name = name)
+        ResultBase.__init__(self, role = None, name = name)
+        self.data = [1000]
    def __str__(self):
        return self.name

--- a/gof/_test_graph.py
+++ b/gof/_test_graph.py
@@ -11,7 +11,8 @@ class MyResult(ResultBase):
    def __init__(self, thingy):
        self.thingy = thingy
-        ResultBase.__init__(self, role = None, data = [self.thingy])
+        ResultBase.__init__(self, role = None )
+        self.data = [self.thingy]
    def __eq__(self, other):
        return self.same_properties(other)

--- a/gof/_test_link.py
+++ b/gof/_test_link.py
@@ -11,8 +11,9 @@ from link import *
 class Double(ResultBase):
    def __init__(self, data, name = "oignon"):
+        ResultBase.__init__(self, role = None, name = name)
        assert isinstance(data, float)
-        ResultBase.__init__(self, role = None, data = data, name = name)
+        self.data = data
    def __str__(self):
        return self.name
@@ -60,6 +61,10 @@ class Div(Binary):
    def impl(self, x, y):
        return x / y
+class RaiseErr(Unary):
+    def impl(self, x):
+        raise NotImplementedError()
 import modes
 modes.make_constructors(globals())
@@ -103,6 +108,31 @@ class _test_PerformLinker(unittest.TestCase):
        assert fn(1.0, 2.0, 3.0) == 1.5
        assert e.data != 1.5
+    def test_input_output_same(self):
+        x, y, z = inputs()
+        a,d = add(x,y), div(x,y)
+        e = mul(a,d)
+        fn = perform_linker(env([e], [e])).make_function()
+        self.failUnless(1 is fn(1))
+    def test_input_dependency0(self):
+        x, y, z = inputs()
+        a,d = add(x,y), div(x,y)
+        e = mul(a,d)
+        fn = perform_linker(env([x, a], [e])).make_function()
+        #perform linker should have recognized that one input is a function of
+        #the other one, which makes no sense
+        self.fail('this graph should not have been compiled')
+    def test_skiphole(self):
+        x,y,z = inputs()
+        a = add(x,y)
+        r = RaiseErr(a).out
+        e = add(r,a)
+        fn = perform_linker(env([x, y,r], [e])).make_function()
+        self.failUnless(fn(1.0,2.0,4.5) == 7.5)
 if __name__ == '__main__':
    unittest.main()

--- a/gof/_test_modes.py
+++ b/gof/_test_modes.py
@@ -12,8 +12,9 @@ from env import Env
 class Double(ResultBase):
    def __init__(self, data, name = "oignon"):
+        ResultBase.__init__(self, role = None, name = name)
        assert isinstance(data, float)
-        ResultBase.__init__(self, role = None, data = data, name = name)
+        self.data = data
    def __str__(self):
        return self.name

--- a/gof/_test_op.py
+++ b/gof/_test_op.py
@@ -9,7 +9,8 @@ class MyResult(ResultBase):
    def __init__(self, thingy):
        self.thingy = thingy
-        ResultBase.__init__(self, role = None, data = [self.thingy])
+        ResultBase.__init__(self, role = None)
+        self.data = [self.thingy]
    def __eq__(self, other):
        return self.same_properties(other)

--- a/gof/_test_opt.py
+++ b/gof/_test_opt.py
@@ -11,7 +11,8 @@ from toolbox import *
 class MyResult(ResultBase):
    def __init__(self, name):
-        ResultBase.__init__(self, role = None, data = [1000], name = name)
+        ResultBase.__init__(self, role = None, name = name)
+        self.data = [1000]
    def __str__(self):
        return self.name

--- a/gof/_test_toolbox.py
+++ b/gof/_test_toolbox.py
@@ -13,7 +13,8 @@ from toolbox import *
 class MyResult(ResultBase):
    def __init__(self, name):
-        ResultBase.__init__(self, role = None, data = [1000], name = name)
+        ResultBase.__init__(self, role = None, name = name)
+        self.data = [1000]
    def __str__(self):
        return self.name

--- a/gof/result.py
+++ b/gof/result.py
@@ -57,11 +57,10 @@ class ResultBase(object):
    __slots__ = ['_role', '_data', 'state', '_name']
-    def __init__(self, role=None, data=None, name=None):
+    def __init__(self, role=None, name=None):
        self._role = role
        self._data = [None]
        self.state = Empty
-        self.__set_data(data)
        self.name = name

--- a/gradient.py
+++ b/gradient.py
 import gof, gof.result
+import numpy #for numeric_grad
 _msg_retNone = 'op.grad(...) returned None, consider returning [None]'
 _msg_badlen = 'op.grad(...) returned wrong number of gradients'
@@ -105,3 +106,58 @@ def grad(cost, param):
        return gmap.get(param, None)
+class numeric_grad:
+    def __init__(self, f, pt, eps=1.0e-7):
+        """Return the gradient of f at pt.
+        This function computes the gradient by a one-sided finite differences of a
+        fixed step size (eps).
+        It is assumed that f(...) will return a scalar.
+        It is assumed that all f's inputs are numpy.ndarray objects.
+        """
+        gf = [numpy.ndarray(x.shape) for x in pt]
+        f_pt = f(*pt)
+        for idx in xrange(len(gf)):
+            if len(pt[idx].shape) == 0:
+                orig = pt[idx]
+                pt[idx] = numpy.asarray(pt[idx] + eps)
+                f_eps = f(*pt)
+                gf[idx] = numpy.asarray((f_eps - f_pt)/eps)
+                pt[idx] = orig
+            elif len(pt[idx].shape) == 1:
+                for i in xrange(pt[idx].shape[0]):
+                    orig = pt[idx][i]
+                    pt[idx][i] = pt[idx][i] + eps
+                    f_eps = f(*pt)
+                    gf[idx][i] = numpy.asarray((f_eps - f_pt)/eps)
+                    pt[idx][i] = orig
+            elif len(args[idx].shape) == 2:
+                for i in xrange(pt[idx].shape[0]):
+                    for j in xrange(args[idx].shape[1]):
+                        orig = pt[idx][i,j]
+                        pt[idx][i,j] = pt[idx][i,j] + eps
+                        f_eps = f(*pt)
+                        gf[idx][i,j] = numpy.asarray((f_eps - f_pt)/eps)
+                        pt[idx][i,j] = orig
+            else:
+                raise NotImplementedError()
+        self.gf = gf
+    @staticmethod
+    def abs_rel_err(a,b,eps=1.0e-10):
+        """Return a small number when a and b are close, relative to how big they are"""
+        return abs( (a-b) / (a+b+eps))
+    def max_err(self, g_pt):
+        """Return the biggest relative error between g_pt and self.gf"""
+        assert len(g_pt) == len(self.gf)
+        errs = []
+        for a, b in zip(g_pt, self.gf):
+            errs.append(numpy.max(numeric_grad.abs_rel_err(a,b)))
+        return max(errs)
--- a/tensor.py
+++ b/tensor.py
+"""A ResultBase to store numpy.ndarray with basic accompanying Ops"""
 import numpy
 from copy import copy
 import inspect
-from gof import ResultBase, Op, utils
+from gof import ResultBase, Op, utils, Destroyer, Viewer
+###########################
+# Tensor Class
+###########################
+class Tensor(ResultBase):
+    """ResultBase to store numpy.ndarray or equivalent via .data
-def tensor(data, name = None):
+    Attributes:
-    data = numpy.asarray(data)
+    _dtype - numpy dtype string such as 'int64' or 'float64' (among others)
-    return Tensor(data.dtype, [0]*len(data.shape), data, name)
+    _broadcastable - tuple of ints in  (0,1) saying which dimensions of this
+        tensor are guaranteed to be 1, and up for broadcasting
-def _broadcastable_pattern(pattern):
+    Properties:
-    def factory(data = None, name = None):
+    dtype - read-only access to _dtype, which should not be changed
-        if data: assert len(data.shape) == len(pattern)
+    broadcastable - read-only access to _broadcastable, which should not be changed
-        return Tensor(data.dtype, pattern, data, name)
-    return factory
-matrix = _broadcastable_pattern([0, 0])
+    Operators:
-row = _broadcastable_pattern([1, 0])
+    - most numeric operators are overloaded to return Ops that *would* perform
-col = _broadcastable_pattern([0, 1])
+      the corresponding calculation
-class Tensor(ResultBase):
+    """
-    def __init__(self, dtype=None, broadcastable=None, data=None, name=None, constant=False):
-        if dtype is None or broadcastable is None:
-            if data is None:
-                raise TypeError("Provide non-None data to complete the dtype and broadcastable flags.")
-            data = numpy.asarray(data)
-            dtype = data.dtype
-            if constant:
-                broadcastable = [1*(x == 1) for x in data.shape]
-            else:
-                broadcastable = [0] * len(data.shape)
-        self.broadcastable = broadcastable
-        self.dtype = str(dtype)
-        self.constant = constant
-        ResultBase.__init__(self, role = None, data = data, name = name)
-    def __get_constant(self):
+    def __init__(self, dtype, broadcastable, role=None, name=None):
-        return self._constant
+        """Initialize a Tensor"""
-    def __set_constant(self, value):
+        # data is not given here. This may seem a bit strange, but when data was
-        if value:
+        # an argument, it made sense to use *either* the given dtype,
-            self.indestructible = True
+        # broadcastable, or override them from the fields of data. This makes
-        self._constant = value
+        # the function ugly, especially because it isn't obvious how to set
+        # broadcastable from data.  
+        #
+        # The only clean option I could think of, when passing a data arg was to 
+        # require the broadcastable field to be given.  Since broadcastable is
+        # the argument that is awkward to construct, I decided to put all this
+        # into the tensor(data,...) function below, which is like a second
+        # constructor that works with an ndarray.
+        ResultBase.__init__(self, role=role, name=name)
+        self._dtype = str(dtype)
+        self._broadcastable = tuple(broadcastable)
-    constant = property(__get_constant, __set_constant)
+    ######################
+    # ResultBase interface
+    ######################
-    def filter(self, data):
+    # 
-        arr = numpy.asarray(data, dtype = self.dtype)
+    # filter
+    #
+    def filter(self, arr):
+        if not isinstance(arr, numpy.ndarray):
+            arr = numpy.asarray(arr, dtype = self.dtype)
+        if len(self.broadcastable) != len(arr.shape):
+            raise ValueError(Tensor.filter.E_rank)
        for b, s in zip(self.broadcastable, arr.shape):
-            assert not b or s == 1
+            if b and (s != 1):
+                raise ValueError(Tensor.filter.E_shape)
        return arr
+    # these strings are here so that tests can use them
+    filter.E_rank = 'wrong rank'
+    filter.E_shape = 'non-unit size on broadcastable dimension'
+    #
+    # type information  : Olivier what does this mean?
+    #
    def dtype_specs(self):
        return {'float64': (float, 'double', 'NPY_DOUBLE')}[self.dtype]
+    #
+    # C codegen stubs
+    #
    def c_declare(self):
        return """
        PyArrayObject* %%(name)s;
@@ -117,6 +135,16 @@ class Tensor(ResultBase):
    def c_libraries(self):
        return []
+    ############################
+    # Tensor specific attributes
+    #
+    ############################
+    dtype = property(lambda self: self._dtype)
+    broadcastable = property(lambda self: self._broadcastable)
+    # STDLIB
    def __copy__(self):
        """
        Returns a copy of this Tensor. If there is data stored inside it, it is also copied.
@@ -125,4 +153,660 @@ class Tensor(ResultBase):
        cpy.data = copy(self.data)
        return cpy
+    #UNARY
+    def __abs__(self): return Abs(self).out
+    def __neg__(self): return Neg(self).out
+    #CASTS
+    def __int__(self): return AsInt(self).out
+    def __float__(self): return AsInt(self).out
+    def __complex__(self): return AsComplex(self).out
+    #COMPARISONS
+    def __lt__(self,other): return lt(self, other)
+    def __le__(self,other): return le(self, other)
+    def __gt__(self,other): return gt(self, other)
+    def __ge__(self,other): return ge(self, other)
+    #ARITHMETIC - NORMAL
+    def __add__(self,other): return add(self,other)
+    def __sub__(self,other): return sub(self,other)
+    def __mul__(self,other): return mul(self,other)
+    def __div__(self,other): return div(self,other)
+    def __pow__(self,other): return pow(self,other)
+    #ARITHMETIC - INPLACE
+    def __iadd__(self,other): return add_inplace(self,other)
+    def __isub__(self,other): return sub_inplace(self,other)
+    def __imul__(self,other): return mul_inplace(self,other)
+    def __idiv__(self,other): return div_inplace(self,other)
+    def __ipow__(self,other): return pow_inplace(self,other)
+    #ARITHMETIC - RIGHT-OPERAND
+    def __radd__(self,other): return add(other,self)
+    def __rsub__(self,other): return sub(other,self)
+    def __rmul__(self,other): return mul(other,self)
+    def __rdiv__(self,other): return div(other,self)
+    def __rpow__(self,other): return pow(other,self)
+    #TRANSPOSE
+    def __get_T(self):
+        return tensor_copy(transpose(self))
+    T = property(__get_T)
+    #SLICING
+    def __getitem__(self, key): raise NotImplementedError()
+    def __getslice__(self, key): raise NotImplementedError()
+# alternate Tensor constructor
+def tensor(data, broadcastable=None, role=None, name=None):
+    """Return a Tensor containing given data"""
+    data = numpy.asarray(data)
+    if broadcastable is None:
+        broadcastable = [s==1 for s in data.shape]
+    elif broadcastable in [0, 1]:
+        broadcastable = [broadcastable] *  len(data.shape)
+    rval = Tensor(data.dtype, broadcastable, role, name)
+    rval.data = data # will raise if broadcastable was mis-specified
+    return rval
+############################
+# Supporting Ops
+############################
+def _scalar_switch(normal_f, scalar_f, scalar_f_reverse = None):
+    """a decorator for operators before broadcasting works properly"""
+    def f(x, y):
+        def as_tensor(obj):
+            if isinstance(obj, Tensor):
+                return obj
+            else:
+                return tensor(obj)
+        x, y = as_tensor(x), as_tensor(y)
+        if 0 not in y.broadcastable:
+            return scalar_f(x, y)
+        if 0 not in x.broadcastable:
+            if scalar_f_reverse:
+                return scalar_f_reverse(y, x)
+            else:
+                raise TypeError("You cannot do this operation on a scalar.")
+        return normal_f(x, y)
+    return f
+class _Op(Op):
+    """A convenient base for the ops in this file"""
+    nin = -1
+    nout = 1
+    def __init__(self, *inputs):
+        def as_tensor(obj):
+            if isinstance(obj, Tensor):
+                return obj
+            else:
+                return tensor(obj)
+        inputs = map(as_tensor, inputs)
+        if self.nin >= 0:
+            if len(inputs) != self.nin:
+                raise TypeError("Wrong number of inputs for %s (got %i, expected %i)") \
+                    % (self, len(inputs), self.nin)
+        i_broadcastables = [getattr(input, 'broadcastable', None) for input in inputs]
+        i_dtypes = [getattr(input, 'dtype', None) for input in inputs]
+        o_broadcastables = utils.from_return_values(self.propagate_broadcastable(*i_broadcastables))
+        o_dtypes = utils.from_return_values(self.propagate_dtype(*i_dtypes))
+        self.inputs = inputs
+        self.outputs = [Tensor(dtype, broadcastable) for broadcastable, dtype in zip(o_broadcastables, o_dtypes)]
+    def propagate_broadcastable(self, *inputs):
+        raise AbstractFunctionError()
+    def propagate_dtype(self, *i_dtypes):
+        def upcast(dtype, *dtypes):
+            z = numpy.zeros((), dtype = dtype)
+            for dtype in dtypes:
+                z = z + numpy.zeros((), dtype = dtype)
+            return str(z.dtype)
+        for dtype in i_dtypes:
+            if dtype is None:
+                raise TypeError("Expected a Tensor.")
+        rval = upcast(*i_dtypes)
+        return rval
+    def impl(self, *inputs):
+        raise AbstractFunctionError()
+    def perform(self):
+        self.outputs[0].data = self.impl(*[input.data for input in self.inputs])
+    def c_var_names(self):
+        (self, inames, onames), _1, _2, _3 = inspect.getargspec(self.c_impl)
+        inames = utils.from_return_values(inames)
+        onames = utils.from_return_values(onames)
+        return [inames, onames]
+    def c_code(self):
+        return self.c_impl(self.inputs, self.outputs)
+    def c_impl(self, inputs, outputs):
+        raise AbstractFunctionError()
+class _Unary:
+    nin = 1
+class _Binary:
+    nin = 2
+def _assert_same_shapes(x, *rest):
+    """Ensure that all inputs to the function impl have the same size (foils numpy's broadcasting)"""
+    shape = x.shape
+    for other in rest:
+        if other.shape != shape:
+            raise _assert_same_shapes.E_shape
+_assert_same_shapes.E_shape = ValueError("The dimensions of the inputs do not match.")
+def _assert_tensor_scalar(x, a):
+    """ensure that the second input is a scalar"""
+    if numpy.product(a.shape) != 1:
+        raise ValueError("The second argument must be a scalar.")
+class _Elemwise(_Op):
+    @staticmethod
+    def extract_name(name):
+        if name.endswith("_i"):
+            return name[:-2]
+        else:
+            return name
+    @staticmethod
+    def is_loop_var(name):
+        return name.endswith("_i")
+    def c_var_names(self):
+        cls = self.__class__
+        (self, inames, onames), _1, _2, _3 = inspect.getargspec(self.c_foreach)
+        spec = ([cls.extract_name(name) for name in inames],
+                [cls.extract_name(name) for name in onames])
+        return spec
+    def loop_variables(self):
+        cls = self.__class__
+        (self, inames, onames), _1, _2, _3 = inspect.getargspec(cls.c_foreach)
+        return ([cls.extract_name(name) for name in inames if cls.is_loop_var(name)],
+                [cls.extract_name(name) for name in onames if cls.is_loop_var(name)])
+    def propagate_broadcastable(self, *inputs):
+        inames, onames = self.c_var_names()
+        iloop, oloop = self.loop_variables()
+        if oloop != onames:
+            raise Exception(\
+                    "Cannot infer broadcastable for non-loop variable(s) %s" \
+                    % set(onames).difference(oloop), self.__class__)
+        all_bcast = [broadcastable for broadcastable, iname in zip(inputs, inames) if iname in iloop]
+        ret = []
+        for arr in zip(*all_bcast):
+            if 0 in arr:
+                ret.append(0)
+            else:
+                ret.append(1)
+        return [ret] * self.nout
+    @classmethod
+    def inplace_version(cls):
+        class Ret(cls, Destroyer):
+            def destroy_list(self):
+                return self.inputs[0]
+        return Ret
+    def c_init(self, inputs, outputs):
+        pass
+    def c_foreach(self, inputs, outputs):
+        pass
+    def c_finalize(self, inputs, outputs):
+        pass
+class TensorScalarOp(_Elemwise):
+    def c_var_names(self):
+        return (['x', '_a'], ['z', ])
+    def loop_variables(self):
+        return (['x', ], ['z', ])
+    def c_init((x, _a), (z, )):
+        return """
+        if (PyArray_SIZE(_a) != 1) {
+            PyErr_SetString(PyExc_ValueError, \"The size of the scalar argument is not 1.\");
+        }
+        _a_dtype a = ((_a_dtype*)PyArray_DATA(_a))[0];
+        """
+    def _c_foreach(self):
+        return "z_i = %s;" % self.c_expr
+def constructor(op_cls):
+    def f(*args, **kwargs):
+        op = op_cls(*args, **kwargs)
+        if len(op.outputs) > 1:
+            return op.outputs
+        else:
+            return op.outputs[0]
+    return f
+##########################
+# Unary Operations
+##########################
+class Abs(_Elemwise):
+    def impl(self, x):
+        return numpy.abs(x)
+    def grad(self, x, gz):
+        return gz * Sgn(x).out #TODO: handle the corner case (get it? pun?)
+    def c_foreach(self, (x_i, ), (z_i, )):
+        return "z_i = abs(x_i);"
+#Constructor not necessary because builtin abs() does this
+class Neg(_Elemwise):
+    def impl(self, x):
+        return -x
+    def grad(self, x, gz):
+        return -gz
+    def c_foreach(self, (x_i, ), (z_i, )):
+        return "z_i = -x_i;"
+#Constructor not necessary because unary '-' does this
+class Sgn(_Elemwise):
+    def impl(self, x):
+        return numpy.abs(x) / x
+    def grad(self, x, gz):
+        return [None]
+    def c_foreach(self, (x_i, ), (z_i, )):
+        return "z_i = x_i/abs(x_i);" # TODO: C use copysign
+sgn = constructor(Sgn)
+class Sum(_Elemwise):
+    def impl(self, x):
+        return numpy.sum(x)
+    def grad(self, x, gz):
+        return fill(x, gz)
+    def propagate_broadcastable(self, *inputs):
+        return [()]
+    def c_init(self, (x, ), (sum, )):
+        return "sum_dtype* sump = ((sum_dtype*)PyArray_DATA(sum)); sump[0] = 0;"
+    def c_foreach(self, (x_i, ), (sum, )):
+        return "sump[0] += x_i;"
+sum = constructor(Sum)
+class Fill(_Elemwise):
+    def impl(self, model, value):
+        return (model * 0) + value #TODO: we can probably do better than this
+    def grad(self, (model, value), gz):
+        return None, sum(gz)
+    def c_init(self, (model, value), (z, )):
+        return "value_dtype value0 = ((value_dtype*)PyArray_DATA(value))[0];"
+    def c_foreach(self, (model_i, value), (z_i, )):
+        return "z_i = value0;"
+fill = constructor(Fill)
+class TensorCopy(_Elemwise):
+    def impl(self, x):
+        return numpy.array(x)
+    def grad(self, x, gz):
+        return gz
+    def c_foreach(self, (x_i, ), (z_i, )):
+        return "z_i = x_i;"
+tensor_copy = constructor(TensorCopy)
+if 0:
+    ##########################
+    # View Operations
+    ##########################
+    class transpose(_Op, Viewer):
+        def view_map(self):
+            return {self.out: [self.inputs[0]]}
+        def impl(self, x):
+            return x.T
+        def grad(self, x, gz):
+            return transpose_copy(gz)
+        def propagate_broadcastable(self, x):
+            rval = list(x)
+            rval.reverse()
+            return [rval]
+        def c_impl(self, x, z):
+            return """
+            PyArrayObject* transposed = (PyArrayObject*)PyArray_Transpose(%(x)s, NULL);
+            if (%(z)s) {
+                Py_XDECREF(%(z)s);
+            }
+            %(z)s = transposed;
+            """
+    class Subtensor(_Op, Viewer):
+        def view_map(self): 
+            return {self.out: [self.inputs[0]]}
+        def impl(x, item): 
+            rval = x.__getitem__(item)
+            #print 'get_slice running', rval
+            return rval
+        def grad(x, gz):
+            # - option: allocate a potentially large matrix of zeros, and fill in
+            # the appropriate elements from gz
+            # - option: return a sparse matrix
+            # - option: return gz, but think about how to include a special addition
+            # function that uses a matching view over the original data
+            raise NotImplemented 
+if 0:
+    ##########################
+    # Arithmetic : Add
+    ##########################
+    # Elemwise #
+    class add_elemwise(_Elemwise):
+        def impl(self, x, y):
+            _assert_same_shapes(x, y)
+            return x + y
+        def grad(self, (x, y), gz):
+            return gz, gz
+        def c_foreach(self, (x_i, y_i), (z_i, )):
+            return "z_i = x_i + y_i;"
+    class add_elemwise_inplace(add_elemwise.inplace_version()):
+        def impl(self, x, y):
+            _assert_same_shapes(x, y)
+            x += y
+            return x
+    # Scalar #
+    class add_scalar(TensorScalarOp):
+        def impl(self, x, a):
+            _assert_tensor_scalar(x, a)
+            return x + a
+        def grad(self, (x, a), gz):
+            return gz, sum(gz)
+        c_expr = "x_i + a"
+    class add_scalar_inplace(add_scalar.inplace_version()):
+        def impl(self, x, a):
+            _assert_tensor_scalar(x, a)
+            x += a
+            return x
+    add = _scalar_switch(add_elemwise, add_scalar, add_scalar)
+    add_inplace = _scalar_switch(add_elemwise_inplace, add_scalar_inplace)
+if 0:
+    ##########################
+    # Arithmetic : Sub
+    ##########################
+    # Elemwise #
+    class SubElemwise(_Elemwise):
+        def impl(self, x, y):
+            _assert_same_shapes(x, y)
+            return x - y
+        def grad(self, (x, y), gz):
+            return gz, -gz
+        def c_foreach(self, (x_i, y_i), (z_i, )):
+            return "z_i = x_i - y_i;"
+    class SubElemwiseInplace(SubElemwise.inplace_version()):
+        def impl(self, x, y):
+            _assert_same_shapes(x, y)
+            x -= y
+            return x
+    # Scalar #
+    def sub_scalar_r(x, a):
+        return add_scalar(x, -a)
+    def sub_scalar_l(x, a):
+        return add_scalar(-x, a)
+    def sub_scalar_rinplace(x, a):
+        return add_scalar_inplace(x, -a)
+    sub = _scalar_switch(sub_elemwise, sub_scalar_r, sub_scalar_l)
+    sub_inplace = _scalar_switch(sub_elemwise_inplace, sub_scalar_rinplace)
+if 1:
+    ##########################
+    # Arithmetic : Mul
+    ##########################
+    # Elemwise #
+    class MulElemwise(_Elemwise):
+        def impl(self, x, y):
+            _assert_same_shapes(x, y)
+            return x * y
+        def grad(self, (x, y), gz):
+            return mul(y, gz), mul(x, gz)
+        def c_foreach(self, (x_i, y_i), (z_i, )):
+            return "z_i = x_i * y_i;"
+    mul_elemwise = constructor(MulElemwise)
+    class MulElemwiseInplace(MulElemwise.inplace_version()):
+        def impl(self, x, y):
+            _assert_same_shapes(x, y)
+            x *= y
+            return x
+    mul_elemwise_inplace = constructor(MulElemwiseInplace)
+    # Scalar #
+    class Scale(TensorScalarOp):
+        def impl(self, x, a):
+            _assert_tensor_scalar(x, a)
+            return x * a
+        def grad(self, (x, a), gz):
+            return scale(a, gz), sum(mul_elemwise(x, gz))
+        c_expr = "x_i * a"
+    scale = constructor(Scale)
+    class ScaleInplace(Scale.inplace_version()):
+        def impl(self, x, a):
+            _assert_tensor_scalar(x, a)
+            x *= a
+            return x
+    scale_inplace = constructor(ScaleInplace)
+    mul = _scalar_switch(mul_elemwise, scale, scale)
+    mul_inplace = _scalar_switch(mul_elemwise_inplace, scale_inplace)
+if 0:
+    ##########################
+    # Arithmetic : Div
+    ##########################
+    # Elemwise #
+    class DivElemwise(_Elemwise):
+        def impl(self, x, y):
+            _assert_same_shapes(x, y)
+            return x / y
+        def grad(self, (x, y), gz):
+            return div(gz, y), -div(mul(x, gz), sqr(y))
+        def c_foreach(self, (x_i, y_i), (z_i, )):
+            return "z_i = x_i / y_i;"
+    class DivElemwiseInplace(DivElemwise.inplace_version()):
+        def impl(self, x, y):
+            _assert_same_shapes(x, y)
+            x /= y
+            return x
+    # Scalar #
+    def div_scalar_r(x, a):
+        return scale(x, inv_elemwise(a))
+    def div_scalar_l(x, a):
+        return scale(inv_elemwise(x), a)
+    def div_scalar_rinplace(x, a):
+        return scale_inplace(x, inv_elemwise(a))
+    div = _scalar_switch(div_elemwise, div_scalar_r, div_scalar_l)
+    div_inplace = _scalar_switch(div_elemwise_inplace, div_scalar_rinplace)
+if 0:
+    ##########################
+    # Arithmetic : Pow
+    ##########################
+    # Elemwise #
+    class PowElemwise(_Elemwise):
+        def impl(self, x, y):
+            _assert_same_shapes(x, y)
+            return x ** y
+        def grad(self, (x, s), gz):
+            gx = gz * s * (pow_elemwise(x, s-1.0))
+            gs = gz * log(x) * pow_elemwise(x, s)
+            return gx, gs
+        def c_foreach(self, (x_i, s_i), (z_i, )):
+            return "z_i = pow(x_i, s_i)"
+    class PowElemwiseInplace(PowElemwise.inplace_version()):
+        def impl(self, x, y):
+            _assert_same_shapes(x, y)
+            x **= y
+            return x
+    # Scalar #
+    class PowScalarL(TensorScalarOp):
+        def impl(self, x, a):
+            _assert_tensor_scalar(x, a)
+            return a ** x
+        def grad(self, (x, s), gz):
+            gx = sum(gz * s * pow_scalar_l(add_scalar(s,-1.0), x))
+            gs = scale(mul(gz, pow_scalar_l(s, x)), log(x))
+            return gx, gs
+        c_expr = "pow(a, x_i)"
+    class PowScalarR(TensorScalarOp):
+        def impl(self, x, a):
+            _assert_tensor_scalar(x, a)
+            return x ** a
+        def grad(self, (x, s), gz):
+            gx = scale(mul_elemwise(gz,pow_scalar_r(x, add_scalar(s,-1.0))), s)
+            gs = sum(mul_elemwise(mul_elemwise(gz, pow_scalar_r(x,s)), log(x)))
+            return gx, gs
+        c_expr = "pow(x_i, a)"
+    class PowScalarRInplace(PowScalarR.inplace_version()):
+        def impl(self, x, a):
+            _assert_tensor_scalar(x, a)
+            x **= a
+            return x
+    pow = _scalar_switch(pow_elemwise, pow_scalar_r, pow_scalar_l)
+    pow_inplace = _scalar_switch(pow_elemwise_inplace, pow_scalar_rinplace)
+if 0:
+    ##########################
+    # Comparisons 
+    ##########################
+    # Less-than
+    class lt_elemwise(_Elemwise):
+        def __init__(self, *args):
+            raise NotImplementedError()
+    class lt_scalar_r(_Elemwise):
+        def __init__(self, *args):
+            raise NotImplementedError()
+    # Less-than or equal
+    class le_elemwise(_Elemwise):
+        def __init__(self, *args):
+            raise NotImplementedError()
+    class le_scalar_r(_Elemwise):
+        def __init__(self, *args):
+            raise NotImplementedError()
+    # Greater-than or equal
+    class gt_elemwise(_Elemwise):
+        def __init__(self, *args):
+            raise NotImplementedError()
+    class gt_scalar_r(_Elemwise):
+        def __init__(self, *args):
+            raise NotImplementedError()
+    # Greater-than or equal
+    class ge_elemwise(_Elemwise):
+        def __init__(self, *args):
+            raise NotImplementedError()
+    class ge_scalar_r(_Elemwise):
+        def __init__(self, *args):
+            raise NotImplementedError()
+if 0:
+    def _broadcastable_pattern(pattern):
+        def factory(data = None, name = None, dtype=None):
+            if data: 
+                assert len(data.shape) == len(pattern)
+                if dtype is not None:
+                    assert dtype is data.dtype
+                dtype = data.dtype
+                rval = Tensor(dtype, pattern, name)
+                rval.data = data
+            else:
+                rval = Tensor(dtype, pattern, name)
+            return  rval
+        return factory
+    row = _broadcastable_pattern([1, 0])
+    col = _broadcastable_pattern([0, 1])
+    matrix = _broadcastable_pattern([0, 0])
+if 0: #old __init__ code
+    """Create a Tensor
+    If data is given:
+        - constant defaults to True
+        - if dtype is given, it must match data.dtype
+            - otherwise: default is data.dtype
+        - if broadcastable is given, len(broadcastable) must match len(data.shape)
+            - otherwise: if it is constant, it defaults to 1 where shape[i]==1
+            - if it is not constant, it defaults to 0s
+    If data is not given:
+        - constant defaults to False
+    """
+    if dtype is None or broadcastable is None:
+        if data is None:
+            raise TypeError("Provide non-None data to complete the dtype and broadcastable flags.")
+        data = numpy.asarray(data)
+        if constant is None:
+            constant = True
+        dtype = data.dtype
+        if constant:
+            broadcastable = [1*(x == 1) for x in data.shape]
+        else:
+            broadcastable = [0] * len(data.shape)
+if 0:
+    def tensor__new__(cls, *args, **kwargs):
+        """__new__ is overloaded to handle the special form Tensor(x) when x is
+        a Tensor or an Op whose default output is a Tensor.  In these cases, the
+        argument x is returned, and a new Tensor is not created.
+        """
+        if len(args) == 1:
+            a = args[0]
+        t = super(Tensor, cls).__new__(cls, *args, **kwargs)
+        t.__init__(*args, **kwargs)
+        return t
--- a/tensor_ops.py
+++ b/tensor_ops.py
@@ -5,215 +5,205 @@ import gof.op
 from tensor import *
-def _upcast(dtype, *dtypes):
-    z = numpy.zeros((), dtype = dtype)
-    for dtype in dtypes:
-        z = z + numpy.zeros((), dtype = dtype)
-    return str(z.dtype)
-def _wrap_as_tensor(x):
-    if isinstance(x,Op):
-        return _wrap_as_tensor(x.out)
-    elif isinstance(x, Tensor):
-        return x
-    else:
-        return Tensor(data=x, constant=True)
-# TensorOp is a convenient base class, permitting to factor the code for the
-# Ops in this file.
-# It is not necessary to inherit from TensorOp to make an Op that manipulates
-# Tensors.
-class TensorOp(Op):
-    nin = -1
-    nout = 1
-    cast_method = lambda self, *args: _upcast(*args)
-    def __init__(self, *inputs):
-        inputs = map(_wrap_as_tensor, inputs)
+# # TensorOp is a convenient base class, permitting to factor the code for the
+# # Ops in this file.
+# # It is not necessary to inherit from TensorOp to make an Op that manipulates
+# # Tensors.
+# <<<<<<< /u/breuleuo/hg/new_theano/tensor_ops.py
+# class TensorOp(Op):
-        if self.nin >= 0:
+#     nin = -1
-            if len(inputs) != self.nin:
+#     nout = 1
-                raise TypeError("Wrong number of inputs for %s (got %i, expected %i)") \
-                    % (self, len(inputs), self.nin)
-        i_broadcastables = [getattr(input, 'broadcastable', None) for input in inputs]
+#     cast_method = lambda self, *args: _upcast(*args)
-        i_dtypes = [getattr(input, 'dtype', None) for input in inputs]
-        o_broadcastables = utils.from_return_values(self.propagate_broadcastable(*i_broadcastables))
+#     def __init__(self, *inputs):
-        o_dtypes = utils.from_return_values(self.propagate_dtype(*i_dtypes))
-        self.inputs = inputs
+#         inputs = map(_wrap_as_tensor, inputs)
-        self.outputs = [Tensor(dtype, broadcastable) for broadcastable, dtype in zip(o_broadcastables, o_dtypes)]
-    def propagate_broadcastable(self, *inputs):
+#         if self.nin >= 0:
-        raise AbstractFunctionError()
+#             if len(inputs) != self.nin:
+#                 raise TypeError("Wrong number of inputs for %s (got %i, expected %i)") \
+#                     % (self, len(inputs), self.nin)
-    def propagate_dtype(self, *i_dtypes):
+#         i_broadcastables = [getattr(input, 'broadcastable', None) for input in inputs]
-        for dtype in i_dtypes:
+#         i_dtypes = [getattr(input, 'dtype', None) for input in inputs]
-            if dtype is None:
-                raise TypeError("Expected a Tensor.")
-        return self.cast_method(*i_dtypes)
-    def impl(self, *inputs):
+#         o_broadcastables = utils.from_return_values(self.propagate_broadcastable(*i_broadcastables))
-        raise AbstractFunctionError()
+#         o_dtypes = utils.from_return_values(self.propagate_dtype(*i_dtypes))
-    def perform(self):
+#         self.inputs = inputs
-        res = self.impl(*[input.data for input in self.inputs])
+#         self.outputs = [Tensor(dtype, broadcastable) for broadcastable, dtype in zip(o_broadcastables, o_dtypes)]
-        if self.nout == 1:
-            self.outputs[0].data = res
-        else:
-            for output, value in zip(self.outputs, res):
-                output.data = value
-    def c_var_names(self):
-        (self, inames, onames), _1, _2, _3 = inspect.getargspec(self.c_impl)
-        inames = utils.from_return_values(inames)
-        onames = utils.from_return_values(onames)
-        return [inames, onames]
-    def c_code(self):
-        return self.c_impl(self.inputs, self.outputs)
-    def c_impl(self, inputs, outputs):
+#     def propagate_broadcastable(self, *inputs):
-        raise AbstractFunctionError()
+#         raise AbstractFunctionError()
+#     def propagate_dtype(self, *i_dtypes):
+#         for dtype in i_dtypes:
+#             if dtype is None:
+#                 raise TypeError("Expected a Tensor.")
+#         return self.cast_method(*i_dtypes)
+#     def impl(self, *inputs):
+#         raise AbstractFunctionError()
-class UnaryTensorOp(TensorOp):
+#     def perform(self):
-    nin = 1
+#         res = self.impl(*[input.data for input in self.inputs])
+#         if self.nout == 1:
+#             self.outputs[0].data = res
+#         else:
+#             for output, value in zip(self.outputs, res):
+#                 output.data = value
-class BinaryTensorOp(TensorOp):
+#     def c_var_names(self):
-    nin = 2
+#         (self, inames, onames), _1, _2, _3 = inspect.getargspec(self.c_impl)
+#         inames = utils.from_return_values(inames)
+#         onames = utils.from_return_values(onames)
+#         return [inames, onames]
+#     def c_code(self):
+#         return self.c_impl(self.inputs, self.outputs)
+#     def c_impl(self, inputs, outputs):
+#         raise AbstractFunctionError()
+# class UnaryTensorOp(TensorOp):
+#     nin = 1
+# class BinaryTensorOp(TensorOp):
+#     nin = 2
+# # class Transpose(UnaryTensorOp):
+# #     def propagate_broadcastable(self, x):
+# #         x2 = copy(x)
+# #         x2.reverse()
+# #         return [x2]
+# #     def impl(self, x):
+# #         return x.T
-# class Transpose(UnaryTensorOp):
+# #     def c_impl(self, x, z):
+# #         return """
+# #         PyArrayObject* transposed = (PyArrayObject*)PyArray_Transpose(%(x)s, NULL);
+# #         //if (PyArray_REFCOUNT(transposed) == 1) {
+# #         //    printf("lala\\n");
+# #         //}
+# #         //if (%(z)s) {
+# #         //    Py_XDECREF(%(z)s);
+# #         //}
+# #         %(z)s = transposed;
+# #         Py_XINCREF(%(z)s);
+# #         """
+# def scalar_switch(normal_f, scalar_f, scalar_f_reverse = None):
+#     def f(x, y):
+#         x, y = _wrap_as_tensor(x), _wrap_as_tensor(y)
+#         if 0 not in y.broadcastable:
+#             return scalar_f(x, y)
+#         if 0 not in x.broadcastable:
+#             if scalar_f_reverse:
+#                 return scalar_f_reverse(y, x)
+#             else:
+#                 raise TypeError("You cannot do this operation on a scalar.")
+#         return normal_f(x, y)
+#     return f
-#     def propagate_broadcastable(self, x):
+# # Wrapper to ensure that all inputs to the function impl have the same size (foils numpy's broadcasting)
-#         x2 = copy(x)
+# def assert_same_shapes(x, *rest):
-#         x2.reverse()
+#     shape = x.shape
-#         return [x2]
+#     for other in rest:
+#         if other.shape != shape:
-#     def impl(self, x):
+#             raise ValueError("The dimensions of the inputs do not match.")
-#         return x.T
+# # Wrapper to ensure that the last input to impl is a scalar
-#     def c_impl(self, x, z):
+# def assert_tensor_scalar(x, a):
+#     if numpy.product(a.shape) != 1:
+#         raise ValueError("The second argument must be a scalar.")
+# class Elemwise(TensorOp):
+#     @staticmethod
+#     def extract_name(name):
+#         if name.endswith("_i"):
+#             return name[:-2]
+#         else:
+#             return name
+#     @staticmethod
+#     def is_loop_var(name):
+#         return name.endswith("_i")
+#     def c_var_names(self):
+#         cls = self.__class__
+#         (self, inames, onames), _1, _2, _3 = inspect.getargspec(self.c_foreach)
+#         spec = ([cls.extract_name(name) for name in inames],
+#                 [cls.extract_name(name) for name in onames])
+#         return spec
+#     def loop_variables(self):
+#         cls = self.__class__
+#         (self, inames, onames), _1, _2, _3 = inspect.getargspec(cls.c_foreach)
+#         return ([cls.extract_name(name) for name in inames if cls.is_loop_var(name)],
+#                 [cls.extract_name(name) for name in onames if cls.is_loop_var(name)])
+#     def propagate_broadcastable(self, *inputs):
+#         inames, onames = self.c_var_names()
+#         iloop, oloop = self.loop_variables()
+#         if oloop != onames:
+#             raise Exception("Cannot infer broadcastable for non-loop variable(s) %s" % set(onames).difference(oloop))
+#         all_bcast = [broadcastable for broadcastable, iname in zip(inputs, inames) if iname in iloop]
+#         ret = []
+#         for arr in zip(*all_bcast):
+#             if 0 in arr:
+#                 ret.append(0)
+#             else:
+#                 ret.append(1)
+#         return [ret] * self.nout
+#     @classmethod
+#     def inplace_version(cls):
+#         class Ret(cls, Destroyer):
+#             def destroy_list(self):
+#                 return self.inputs[0]
+#         return Ret
+#     def c_init(self, inputs, outputs):
+#         pass
+#     def c_foreach(self, inputs, outputs):
+#         pass
+#     def c_finalize(self, inputs, outputs):
+#         pass
+# class TensorScalarOp(Elemwise):
+#     def c_var_names(self):
+#         return (['x', '_a'], ['z', ])
+#     def loop_variables(self):
+#         return (['x', ], ['z', ])
+#     def c_init((x, _a), (z, )):
 #         return """
-#         PyArrayObject* transposed = (PyArrayObject*)PyArray_Transpose(%(x)s, NULL);
+#         if (PyArray_SIZE(_a) != 1) {
-#         //if (PyArray_REFCOUNT(transposed) == 1) {
+#             PyErr_SetString(PyExc_ValueError, \"The size of the scalar argument is not 1.\");
-#         //    printf("lala\\n");
+#         }
-#         //}
+#         _a_dtype a = ((_a_dtype*)PyArray_DATA(_a))[0];
-#         //if (%(z)s) {
-#         //    Py_XDECREF(%(z)s);
-#         //}
-#         %(z)s = transposed;
-#         Py_XINCREF(%(z)s);
 #         """
+#     def _c_foreach(self):
+#         return "z_i = %s;" % self.c_expr
+# =======
+# >>>>>>> /tmp/tensor_ops.py~other.fNA50a
-def scalar_switch(normal_f, scalar_f, scalar_f_reverse = None):
-    def f(x, y):
-        x, y = _wrap_as_tensor(x), _wrap_as_tensor(y)
-        if 0 not in y.broadcastable:
-            return scalar_f(x, y)
-        if 0 not in x.broadcastable:
-            if scalar_f_reverse:
-                return scalar_f_reverse(y, x)
-            else:
-                raise TypeError("You cannot do this operation on a scalar.")
-        return normal_f(x, y)
-    return f
-# Wrapper to ensure that all inputs to the function impl have the same size (foils numpy's broadcasting)
-def assert_same_shapes(x, *rest):
-    shape = x.shape
-    for other in rest:
-        if other.shape != shape:
-            raise ValueError("The dimensions of the inputs do not match.")
-# Wrapper to ensure that the last input to impl is a scalar
-def assert_tensor_scalar(x, a):
-    if numpy.product(a.shape) != 1:
-        raise ValueError("The second argument must be a scalar.")
-class Elemwise(TensorOp):
-    @staticmethod
-    def extract_name(name):
-        if name.endswith("_i"):
-            return name[:-2]
-        else:
-            return name
-    @staticmethod
-    def is_loop_var(name):
-        return name.endswith("_i")
-    def c_var_names(self):
-        cls = self.__class__
-        (self, inames, onames), _1, _2, _3 = inspect.getargspec(self.c_foreach)
-        spec = ([cls.extract_name(name) for name in inames],
-                [cls.extract_name(name) for name in onames])
-        return spec
-    def loop_variables(self):
-        cls = self.__class__
-        (self, inames, onames), _1, _2, _3 = inspect.getargspec(cls.c_foreach)
-        return ([cls.extract_name(name) for name in inames if cls.is_loop_var(name)],
-                [cls.extract_name(name) for name in onames if cls.is_loop_var(name)])
-    def propagate_broadcastable(self, *inputs):
-        inames, onames = self.c_var_names()
-        iloop, oloop = self.loop_variables()
-        if oloop != onames:
-            raise Exception("Cannot infer broadcastable for non-loop variable(s) %s" % set(onames).difference(oloop))
-        all_bcast = [broadcastable for broadcastable, iname in zip(inputs, inames) if iname in iloop]
-        ret = []
-        for arr in zip(*all_bcast):
-            if 0 in arr:
-                ret.append(0)
-            else:
-                ret.append(1)
-        return [ret] * self.nout
-    @classmethod
-    def inplace_version(cls):
-        class Ret(cls, Destroyer):
-            def destroy_list(self):
-                return self.inputs[0]
-        return Ret
-    def c_init(self, inputs, outputs):
-        pass
-    def c_foreach(self, inputs, outputs):
-        pass
-    def c_finalize(self, inputs, outputs):
-        pass
-class TensorScalarOp(Elemwise):
-    def c_var_names(self):
-        return (['x', '_a'], ['z', ])
-    def loop_variables(self):
-        return (['x', ], ['z', ])
-    def c_init((x, _a), (z, )):
-        return """
-        if (PyArray_SIZE(_a) != 1) {
-            PyErr_SetString(PyExc_ValueError, \"The size of the scalar argument is not 1.\");
-        }
-        _a_dtype a = ((_a_dtype*)PyArray_DATA(_a))[0];
-        """
-    def _c_foreach(self):
-        return "z_i = %s;" % self.c_expr
 ###########################
@@ -257,193 +247,7 @@ class Dot(TensorOp):
        return blas.gemm_code('', '1.0', '0.0')
-#########
-## Add ##
-#########
-# Elemwise #
-class AddElemwise(Elemwise):
-    def impl(self, x, y):
-        assert_same_shapes(x, y)
-        return x + y
-    def grad(self, (x, y), gz):
-        return gz, gz
-    def c_foreach(self, (x_i, y_i), (z_i, )):
-        return "z_i = x_i + y_i;"
-class AddElemwiseInplace(AddElemwise.inplace_version()):
-    def impl(self, x, y):
-        assert_same_shapes(x, y)
-        x += y
-        return x
-# Scalar #
-class AddScalar(TensorScalarOp):
-    def impl(self, x, a):
-        assert_tensor_scalar(x, a)
-        return x + a
-    def grad(self, (x, a), gz):
-        return gz, sum(gz)
-    c_expr = "x_i + a"
-class AddScalarInplace(AddScalar.inplace_version()):
-    def impl(self, x, a):
-        assert_tensor_scalar(x, a)
-        x += a
-        return x
-#########
-## Sub ##
-#########
-# Elemwise #
-class SubElemwise(Elemwise):
-    def impl(self, x, y):
-        assert_same_shapes(x, y)
-        return x - y
-    def grad(self, (x, y), gz):
-        return gz, -gz
-    def c_foreach(self, (x_i, y_i), (z_i, )):
-        return "z_i = x_i - y_i;"
-class SubElemwiseInplace(SubElemwise.inplace_version()):
-    def impl(self, x, y):
-        assert_same_shapes(x, y)
-        x -= y
-        return x
-# Scalar #
-def sub_scalar_r(x, a):
-    return add_scalar(x, -a)
-def sub_scalar_l(x, a):
-    return add_scalar(-x, a)
-def sub_scalar_rinplace(x, a):
-    return add_scalar_inplace(x, -a)
-#########
-## Mul ##
-#########
-# Elemwise #
-class MulElemwise(Elemwise):
-    def impl(self, x, y):
-        assert_same_shapes(x, y)
-        return x * y
-    def grad(self, (x, y), gz):
-        return mul(y, gz), mul(x, gz)
-    def c_foreach(self, (x_i, y_i), (z_i, )):
-        return "z_i = x_i * y_i;"
-class MulElemwiseInplace(MulElemwise.inplace_version()):
-    def impl(self, x, y):
-        assert_same_shapes(x, y)
-        x *= y
-        return x
-# Scalar #
-class Scale(TensorScalarOp):
-    def impl(self, x, a):
-        assert_tensor_scalar(x, a)
-        return x * a
-    def grad(self, (x, a), gz):
-        return scale(a, gz), sum(mul_elemwise(x, gz))
-    c_expr = "x_i * a"
-class ScaleInplace(Scale.inplace_version()):
-    def impl(self, x, a):
-        assert_tensor_scalar(x, a)
-        x *= a
-        return x
-#########
-## Div ##
-#########
-# Elemwise #
-class DivElemwise(Elemwise):
-    def impl(self, x, y):
-        assert_same_shapes(x, y)
-        return x / y
-    def grad(self, (x, y), gz):
-        return div(gz, y), -div(mul(x, gz), sqr(y))
-    def c_foreach(self, (x_i, y_i), (z_i, )):
-        return "z_i = x_i / y_i;"
-class DivElemwiseInplace(DivElemwise.inplace_version()):
-    def impl(self, x, y):
-        assert_same_shapes(x, y)
-        x /= y
-        return x
-# Scalar #
-def div_scalar_r(x, a):
-    return scale(x, inv_elemwise(a))
-def div_scalar_l(x, a):
-    return scale(inv_elemwise(x), a)
-def div_scalar_rinplace(x, a):
-    return scale_inplace(x, inv_elemwise(a))
-#########
-## Pow ##
-#########
-# Elemwise #
-class PowElemwise(Elemwise):
-    def impl(self, x, y):
-        assert_same_shapes(x, y)
-        return x ** y
-    def grad(self, (x, s), gz):
-        gx = gz * s * (pow_elemwise(x, s-1.0))
-        gs = gz * log(x) * pow_elemwise(x, s)
-        return gx, gs
-    def c_foreach(self, (x_i, s_i), (z_i, )):
-        return "z_i = pow(x_i, s_i)"
-class PowElemwiseInplace(PowElemwise.inplace_version()):
-    def impl(self, x, y):
-        assert_same_shapes(x, y)
-        x **= y
-        return x
-# Scalar #
-class PowScalarL(TensorScalarOp):
-    def impl(self, x, a):
-        assert_tensor_scalar(x, a)
-        return a ** x
-    def grad(self, (x, s), gz):
-        gx = sum(gz * s * pow_scalar_l(add_scalar(s,-1.0), x))
-        gs = scale(mul(gz, pow_scalar_l(s, x)), log(x))
-        return gx, gs
-    c_expr = "pow(a, x_i)"
-class PowScalarR(TensorScalarOp):
-    def impl(self, x, a):
-        assert_tensor_scalar(x, a)
-        return x ** a
-    def grad(self, (x, s), gz):
-        gx = scale(mul_elemwise(gz,pow_scalar_r(x, add_scalar(s,-1.0))), s)
-        gs = sum(mul_elemwise(mul_elemwise(gz, pow_scalar_r(x,s)), log(x)))
-        return gx, gs
-    c_expr = "pow(x_i, a)"
-class PowScalarRInplace(PowScalarR.inplace_version()):
-    def impl(self, x, a):
-        assert_tensor_scalar(x, a)
-        x **= a
-        return x
@@ -451,126 +255,7 @@ class PowScalarRInplace(PowScalarR.inplace_version()):
 ## Others ##
 ############
-class Fill(Elemwise):
-    def impl(self, model, value):
-        return (model * 0) + value
-    def grad(self, (model, value), gz):
-        return None, sum(gz)
-    def c_init(self, (model, value), (z, )):
-        return "value_dtype value0 = ((value_dtype*)PyArray_DATA(value))[0];"
-    def c_foreach(self, (model_i, value), (z_i, )):
-        return "z_i = value0;"
-##########################
-#### Unary Operations ####
-##########################
-class Transpose(TensorOp, Viewer):
-    def view_map(self):
-        return {self.out: [self.inputs[0]]}
-    def impl(self, x):
-        return x.T
-    def grad(self, x, gz):
-        return transpose_copy(gz)
-    def propagate_broadcastable(self, x):
-        rval = list(x)
-        rval.reverse()
-        return [rval]
-    def c_impl(self, x, z):
-        return """
-        PyArrayObject* transposed = (PyArrayObject*)PyArray_Transpose(%(x)s, NULL);
-        if (%(z)s) {
-            Py_XDECREF(%(z)s);
-        }
-        %(z)s = transposed;
-        """
-#     def c_impl(self, (x, ), (xt, )):
-#         return """
-#         const int l = x->nd;
-#         // The user must ensure that all references to
-#         //xt->data go through xt, or there's going to be trouble..
-#         int refcheck = 0;
-#           if (x == xt)
-#             {
-#               return -1;
-#             }
-#           if (refcheck)
-#             {
-#               int refcnt =  PyArray_REFCOUNT(xt);
-#                 if ((refcnt > 2)  // you might think this should be 1.. but this works
-#                     //|| (xt->base != NULL)
-#                     || (xt->weakreflist != NULL))
-#                   {
-#                     PyErr_SetString(PyExc_ValueError,
-#                                         "cannot resize an array that has "\\
-#                                         "been referenced or is referencing\\n"\\
-#                                         "another array in this way.  Use the "\\
-#                                         "resize function");
-#                     return -2;
-#                   }
-#             }
-#         if (xt->nd != x->nd)
-#         {
-#             // this technique comes from PyArray_Resize()
-#             npy_intp * dimptr = (npy_intp*)PyDimMem_RENEW(xt->dimensions, 2 * x->nd);
-#             if (!dimptr)
-#             {
-#                   PyErr_NoMemory();
-#                   return 1;
-#             }
-#             xt->nd = x->nd;
-#             xt->dimensions = dimptr;
-#             xt->strides = dimptr + x->nd;
-#         }
-#         //copy x's dimensions and strides
-#         for (int i = 0; i < l; ++i)
-#         {
-#             xt->dimensions[i] = x->dimensions[l-i-1];
-#             xt->strides[i] = x->strides[l-i-1];
-#         }
-#         // point directly at b's type descriptor
-#         Py_INCREF(x->descr);
-#         Py_DECREF(xt->descr);
-#         xt->descr = x->descr;
-#         // name x as a base of xt, increment its refcount
-#         if ( xt->base != (PyObject*)x)
-#         {
-#           Py_INCREF(x);
-#           if ((xt->base) && (xt->base != Py_None)) 
-#             {
-#               Py_DECREF(xt->base);
-#             }
-#           xt->base = (PyObject*)x;
-#         }
-#         // mark xt as not owning its data
-#         if (PyArray_CHKFLAGS(xt, NPY_OWNDATA))
-#           {
-#             PyDataMem_FREE(xt->data);
-#             xt->flags &= ~NPY_OWNDATA;
-#           }
-#         xt->data = x->data;
-#         // this function is described in 
-#         // ~/zzz.NOBACKUP/pub/src/numpy-1.0.3.1/numpy/core/src/arrayobject.c:1890
-#         PyArray_UpdateFlags(xt, NPY_CONTIGUOUS|NPY_FORTRAN|NPY_ALIGNED|NPY_WRITEABLE); 
-#         /*
-#           TODO
-#           What should be done with the weakreflist ?
-#         */
-#     """
-def transpose_copy(x):
-    return array_copy(transpose(x))
 class Neg(Elemwise):
@@ -659,27 +344,6 @@ class SqrtInplace(Sqrt.inplace_version()):
        return x
-class Sum(Elemwise):
-    def impl(self, x):
-        return numpy.sum(x)
-    def grad(self, x, gz):
-        return fill(x, gz)
-    def propagate_broadcastable(self, *inputs):
-        return [()]
-    def c_init(self, (x, ), (sum, )):
-        return "sum_dtype* sump = ((sum_dtype*)PyArray_DATA(sum)); sump[0] = 0;"
-    def c_foreach(self, (x_i, ), (sum, )):
-        return "sump[0] += x_i;"
-class ArrayCopy(Elemwise):
-    def impl(self, x):
-        return numpy.array(x)
-    def grad(self, x, gz):
-        return gz
-    def c_foreach(self, (x_i, ), (z_i, )):
-        return "z_i = x_i;"
 class OnesLike(Elemwise):
    def impl(self, x):
        return numpy.ones_like(x)
@@ -693,10 +357,20 @@ class ZerosLike(Elemwise):
        return None
+class Min:
+    pass
-# ## Others ##
+class Max:
+    pass
-# class minmax(elemwise):
+class Argmin:
+    pass
+class Argmax:
+    pass
+class MinMax:
+    pass
 #     nout = 2
 #     def impl(x):
 #         return x.min, x.max
@@ -723,52 +397,31 @@ class ZerosLike(Elemwise):
 #         """
-# ## Array slicing ##
-# class get_slice(omega_op):
-#     def view_map(self): return {self.out: [self.inputs[0]]}
-#     def impl(x, item): 
-#         rval = x.__getitem__(item)
-#         #print 'get_slice running', rval
-#         return rval
-#     def grad(x, gz): raise NotImplemented
-#     def refresh_shape(self): 
-#         x,item = self.inputs
-#         rval = x.data.__getitem__(item.data).shape 
-#         #print 'refresh_shape', rval
-#         return rval
-#     def refresh_dtype(self):
-#         return self.inputs[0].data.dtype
+# class Transpose(UnaryTensorOp):
-from gof import modes
+#     def propagate_broadcastable(self, x):
-modes.make_constructors(globals())
+#         x2 = copy(x)
+#         x2.reverse()
+#         return [x2]
-add = scalar_switch(add_elemwise, add_scalar, add_scalar)
-add_inplace = scalar_switch(add_elemwise_inplace, add_scalar_inplace)
-sub = scalar_switch(sub_elemwise, sub_scalar_r, sub_scalar_l)
-sub_inplace = scalar_switch(sub_elemwise_inplace, sub_scalar_rinplace)
-mul = scalar_switch(mul_elemwise, scale, scale)
+#     def impl(self, x):
-mul_inplace = scalar_switch(mul_elemwise_inplace, scale_inplace)
+#         return x.T
-div = scalar_switch(div_elemwise, div_scalar_r, div_scalar_l)
+#     def c_impl(self, x, z):
-div_inplace = scalar_switch(div_elemwise_inplace, div_scalar_rinplace)
+#         return """
+#         PyArrayObject* transposed = (PyArrayObject*)PyArray_Transpose(%(x)s, NULL);
+#         //if (PyArray_REFCOUNT(transposed) == 1) {
+#         //    printf("lala\\n");
+#         //}
+#         //if (%(z)s) {
+#         //    Py_XDECREF(%(z)s);
+#         //}
+#         %(z)s = transposed;
+#         Py_XINCREF(%(z)s);
+#         """
-pow = scalar_switch(pow_elemwise, pow_scalar_r, pow_scalar_l)
-pow_inplace = scalar_switch(pow_elemwise_inplace, pow_scalar_rinplace)
-Tensor.__add__ = add
-Tensor.__sub__ = sub
-Tensor.__mul__ = mul
-Tensor.__iadd__ = add_inplace
-Tensor.__isub__ = sub_inplace
-Tensor.__imul__ = mul_inplace
-Tensor.__pow__ = pow
-Tensor.__ipow__ = pow_inplace
-Tensor.T = property(transpose)