quick port of Elemwise, add/sub/mul and scalar_switch

8c211535 · Olivier Breuleux · 93b4e940 · 8c211535 · 8c211535 · 8c211535
--- a/_test_tensor_ops.py
+++ b/_test_tensor_ops.py
@@ -14,9 +14,12 @@ import sys


 def inputs():
-    x = modes.build(tensor([[1.0, 2.0], [3.0, 4.0]], 'x'))
-    y = None
-    z = None
+    l1 = [[1.0, 2.0], [3.0, 4.0]]
+    l2 = [[3.0, 4.0], [1.0, 2.0]]
+    l3 = numpy.ones((2, 3))
+    x = modes.build(tensor(l1, 'x'))
+    y = modes.build(tensor(l2, 'y'))
+    z = modes.build(tensor(l3, 'z'))
    return x, y, z

 def env(inputs, outputs, validate = True, features = []):
@@ -27,24 +30,34 @@ class _test_TensorOps(unittest.TestCase):

    def test_0(self):
        x, y, z = inputs()
-        e = transpose(x)
-        g = env([x], [e])
-        fn, (i, ), (o, ) = gof.cc.CLinker(g).make_thunk()
-        i.data = [[1.0, 2.0], [3.0, 4.0]]
-#        print sys.getrefcount(i.data)
+#        e = mul(add(x, y), 2)
+        e = (x + y) * 2
+        fn, i, o = gof.PerformLinker(env([x, y], [e])).make_thunk(True)
        fn()
-#        print sys.getrefcount(i.data)
-#        print sys.getrefcount(o.data)
-        print o.data
-#        assert res == numpy.asarray(arr)
+        print e

-#     def test_1(self):
+#     def test_0(self):
 #         x, y, z = inputs()
-#         e = mul(add(x, y), div(x, y))
-#         g = env([x, y], [e])
-#         fn = gof.cc.CLinker(g).make_function()
-#         assert fn(1.0, 2.0) == 1.5
-#         assert e.data == 1.5
+#         e = transpose(x)
+#         g = env([x], [e])
+#         fn, (i, ), (o, ) = gof.cc.CLinker(g).make_thunk()
+#         i.data = [[1.0, 2.0], [3.0, 4.0]]
+# #        print sys.getrefcount(i.data)
+#         fn()
+# #        print sys.getrefcount(i.data)
+# #        print sys.getrefcount(o.data)
+#         print o.data
+# #        assert res == numpy.asarray(arr)
+
+# #     def test_1(self):
+# #         x, y, z = inputs()
+# #         e = mul(add(x, y), div(x, y))
+# #         g = env([x, y], [e])
+# #         fn = gof.cc.CLinker(g).make_function()
+# #         assert fn(1.0, 2.0) == 1.5
+# #         assert e.data == 1.5
+
+


 if __name__ == '__main__':

--- a/tensor.py
+++ b/tensor.py
@@ -27,6 +27,7 @@ class Tensor(ResultBase):
        if dtype is None or broadcastable is None:
            if data is None:
                raise TypeError("Provide non-None data to complete the dtype and broadcastable flags.")
+            data = numpy.asarray(data)
            dtype = data.dtype
            if constant:
                broadcastable = [1*(x == 1) for x in data.shape]
@@ -35,7 +36,7 @@ class Tensor(ResultBase):
        self.broadcastable = broadcastable
        self.dtype = str(dtype)
        self.constant = constant
-        ResultBase.__init__(self, role = None, data = None, name = name)
+        ResultBase.__init__(self, role = None, data = data, name = name)

    def __get_constant(self):
        return self._constant

--- a/tensor_ops.py
+++ b/tensor_ops.py
@@ -67,7 +67,7 @@ class TensorOp(Op):
    def c_impl(self, inputs, outputs):
        raise AbstractFunctionError()

-        
+

 class UnaryTensorOp(TensorOp):
    nin = 1
@@ -100,9 +100,6 @@ class Transpose(UnaryTensorOp):
        """


-from gof import modes
-modes.make_constructors(globals())
-


 def scalar_switch(normal_f, scalar_f, scalar_f_reverse = None):
@@ -132,12 +129,65 @@ def assert_tensor_scalar(x, a):



-class tensor_scalar_op(elemwise):
+class Elemwise(TensorOp):
+
+    @staticmethod
+    def extract_name(name):
+        if name.endswith("_i"):
+            return name[:-2]
+        else:
+            return name
+    
+    @staticmethod
+    def is_loop_var(name):
+        return name.endswith("_i")
+    
+    def c_var_names(self):
+        cls = self.__class__
+        (self, inames, onames), _1, _2, _3 = inspect.getargspec(self.c_foreach)
+        spec = ([cls.extract_name(name) for name in inames],
+                [cls.extract_name(name) for name in onames])
+        return spec
+
+    def loop_variables(self):
+        cls = self.__class__
+        (self, inames, onames), _1, _2, _3 = inspect.getargspec(cls.c_foreach)
+        return ([cls.extract_name(name) for name in inames if cls.is_loop_var(name)],
+                [cls.extract_name(name) for name in onames if cls.is_loop_var(name)])
+    
+    def propagate_broadcastable(self, *inputs):
+        inames, onames = self.c_var_names()
+        iloop, oloop = self.loop_variables()
+        if oloop != onames:
+            raise Exception("Cannot infer broadcastable for non-loop variable(s) %s" % set(onames).difference(oloop))
+        all_bcast = [broadcastable for broadcastable, iname in zip(inputs, inames) if iname in iloop]
+        ret = []
+        for arr in zip(*all_bcast):
+            if 0 in arr:
+                ret.append(0)
+            else:
+                ret.append(1)
+        return [ret] * self.nout
+
    @classmethod
-    def variable_names(cls):
+    def inplace_version(cls):
+        return cls # placeholder
+
+    def c_init(self, inputs, outputs):
+        pass
+
+    def c_foreach(self, inputs, outputs):
+        pass
+
+    def c_finalize(self, inputs, outputs):
+        pass
+
+
+
+class TensorScalarOp(Elemwise):
+    def c_var_names(self):
        return (['x', '_a'], ['z', ])
-    @classmethod
-    def loop_variables(cls):
+    def loop_variables(self):
        return (['x', ], ['z', ])
    def c_init((x, _a), (z, )):
        return """
@@ -162,7 +212,7 @@ class AddElemwise(Elemwise):
        return x + y
    def grad(self, (x, y), gz):
        return gz, gz
-    def c_foreach((x_i, y_i), (z_i, )):
+    def c_foreach(self, (x_i, y_i), (z_i, )):
        return "z_i = x_i + y_i;"

 class AddElemwiseInplace(AddElemwise.inplace_version()):
@@ -180,33 +230,49 @@ class AddScalar(TensorScalarOp):
        return gz, sum(gz)
    c_expr = "x_i + a"

-AddScalarInplace = add_scalar.inplace_version()
-add_scalar_inplace.set_impl(tensor_scalar_impl(numpy.ndarray.__iadd__))
-
+class AddScalarInplace(AddScalar.inplace_version()):
+    def impl(self, x, a):
+        assert_tensor_scalar(x, a)
+        x += a
+        return x

-class twice(elemwise):
-    def impl(x):
+# shortcuts #
+class Twice(Elemwise):
+    def impl(self, x):
        return 2.0 * x
-    def grad(x, gz):
+    def grad(self, x, gz):
        return scale(gz, 2.0)
-    def c_foreach((x_i, ), (z_i, )):
+    def c_foreach(self, (x_i, ), (z_i, )):
        "z_i = x_i + x_i;"

-twice_inplace = twice.inplace_version()
+class TwiceInplace(Twice.inplace_version()):
+    def impl(self, x):
+        x *= 2.0
+        return x


-## Subtraction ##

-class sub_elemwise(elemwise):
-    impl = assert_same_shapes(numpy.ndarray.__sub__)
-    def grad(x, y, gz):
+#########
+## Sub ##
+#########
+
+# Elemwise #
+class SubElemwise(Elemwise):
+    def impl(self, x, y):
+        assert_same_shapes(x, y)
+        return x - y
+    def grad(self, (x, y), gz):
        return gz, -gz
-    def c_foreach((x_i, y_i), (z_i, )):
+    def c_foreach(self, (x_i, y_i), (z_i, )):
        return "z_i = x_i - y_i;"

-sub_elemwise_inplace = sub_elemwise.inplace_version()
-sub_elemwise_inplace.set_impl(assert_same_shapes(numpy.ndarray.__isub__))
+class SubElemwiseInplace(SubElemwise.inplace_version()):
+    def impl(self, x, y):
+        assert_same_shapes(x, y)
+        x -= y
+        return x

+# Scalar #
 def sub_scalar_r(x, a):
    return add_scalar(x, -a)

@@ -217,408 +283,430 @@ def sub_scalar_r_inplace(x, a):
    return add_scalar_inplace(x, -a)


-## Element-wise multiplication ##

-class mul_elemwise(elemwise):
-    impl = assert_same_shapes(numpy.ndarray.__mul__)
-    def grad(x, y, gz):
+#########
+## Mul ##
+#########
+
+# Elemwise #
+class MulElemwise(Elemwise):
+    def impl(self, x, y):
+        assert_same_shapes(x, y)
+        return x * y
+    def grad(self, (x, y), gz):
        return mul(y, gz), mul(x, gz)
-    def c_foreach((x_i, y_i), (z_i, )):
+    def c_foreach(self, (x_i, y_i), (z_i, )):
        return "z_i = x_i * y_i;"

-mul_elemwise_inplace = mul_elemwise.inplace_version()
-mul_elemwise_inplace.set_impl(assert_same_shapes(numpy.ndarray.__imul__))
-
+class MulElemwiseInplace(MulElemwise.inplace_version()):
+    def impl(self, x, y):
+        assert_same_shapes(x, y)
+        x *= y
+        return x

-class scale(tensor_scalar_op):
-    impl = tensor_scalar_impl(numpy.ndarray.__mul__)
-    def grad(x, a, gz):
+# Scalar #
+class Scale(TensorScalarOp):
+    def impl(self, x, a):
+        assert_tensor_scalar(x, a)
+        return x * a
+    def grad(self, (x, a), gz):
        return scale(a, gz), sum(mul_elemwise(x, gz))
    c_expr = "x_i * a"

-scale_inplace = scale.inplace_version()
-scale_inplace.set_impl(tensor_scalar_impl(numpy.ndarray.__imul__))
-
+class ScaleInplace(Scale.inplace_version()):
+    def impl(self, x, a):
+        assert_tensor_scalar(x, a)
+        x *= a
+        return x

-class sqr(elemwise):
-    def impl(x):
+# shortcuts #
+class Sqr(Elemwise):
+    def impl(self, x):
        return x * x
-    def grad(x, gz):
+    def grad(self, x, gz):
        return scale(mul_elemwise(x, gz), 2.0)
-    def c_foreach((x_i, ), (z_i, )):
+    def c_foreach(self, (x_i, ), (z_i, )):
        return "z_i = x_i * x_i;"

-isqr = sqr.inplace_version()
-isqr.set_impl(lambda x: x.__imul__(x))
-
+class SqrInplace(Sqr.inplace_version()):
+    def impl(x):
+        x *= x
+        return x


-class sqrt(elemwise):
-    impl = numpy.sqrt
-    def grad(x, gz):
+class Sqrt(Elemwise):
+    def impl(self, x):
+        return numpy.sqrt(x)
+    def grad(self, x, gz):
        return scale(div(gz, sqrt(x)), 0.5)
-    def c_foreach((x_i, ), (z_i, )):
+    def c_foreach(self, (x_i, ), (z_i, )):
        return "z_i = pow(x_i, 0.5);"

-isqrt = sqrt.inplace_version()
-isqrt.set_impl(lambda x: x.__ipow__(0.5))
-
-
-
-## Element-wise division ##
-
-class div_elemwise(elemwise):
-    impl = assert_same_shapes(numpy.ndarray.__div__)
-    def grad(x, y, gz):
-        return div(gz, y), -div(mul(x, gz), sqr(y))
-    def c_foreach((x_i, y_i), (z_i, )):
-        return "z_i = x_i / y_i;"
-
-div_elemwise_inplace = div_elemwise.inplace_version()
-div_elemwise_inplace.set_impl(assert_same_shapes(numpy.ndarray.__idiv__))
-
-def div_scalar_r(x, a):
-    return scale(x, inv_elemwise(a))
-
-def div_scalar_l(x, a):
-    return scale(inv_elemwise(x), a)
-
-def div_scalar_r_inplace(x, a):
-    return scale_inplace(x, inv_elemwise(a))
-
-
-
-## Scaling ##
-
-class scale(tensor_scalar_op):
-    impl = tensor_scalar_impl(numpy.ndarray.__mul__)
-    def grad(x, a, gz):
-        return scale(a, gz), sum(mul_elemwise(x, gz))
-    c_expr = "x_i * a"
-
-scale_inplace = scale.inplace_version()
-scale_inplace.set_impl(tensor_scalar_impl(numpy.ndarray.__imul__))
-
-
-class neg(elemwise):
-    impl = numpy.ndarray.__neg__
-    def grad(x, gz):
-        return -gz
-    def c_foreach((x_i, ), (z_i, )):
-        return "z_i = -x_i;"
-
-neg_inplace = neg.inplace_version()
-neg_inplace.set_impl(lambda x: x.__imul__(-1))
-
-
-class inv_elemwise(elemwise):
-    impl = lambda x: 1 / x
-    def grad(x, gz):
-        return -gz
-    def c_foreach((x_i, ), (z_i, )):
-        return "z_i = 1 / x_i;"
-
-inv_elemwise_inplace = inv_elemwise.inplace_version()
-
-
-## Dot product ##
+class SqrtInplace(Sqrt.inplace_version()):
+    def impl(self, x):
+        x **= 0.5
+        return x

-class dot(omega_op):
-    @staticmethod
-    def _output_shape(xshape, yshape):
-        if len(xshape) == 0: # x is a scalar
-            shape = yshape
-        else:
-            if len(yshape) >= 2: #y is a matrix or tensor
-                assert xshape[-1] == yshape[-2]
-                shape = tuple(xshape[:-1]+ yshape[:-2]+yshape[-1:])
-            elif len(yshape)==1: #y is vector
-                assert xshape[-1] == yshape[-1]
-                shape = tuple(xshape[:-1])
-            else:                #y is a scalar
-                shape = xshape
-        return shape
-
-    impl = numpy.dot
-    def grad(x, y, gz):
-        return dot(gz, transpose(y)), dot(transpose(x), gz)
-    def refresh(self, alloc=False):
-        x,y = self.inputs
-        shape = self._output_shape(x.shape, y.shape)
-        dtype = upcast(x.dtype, y.dtype)
-        if self.out.data is not None \
-                and self.out.shape == shape \
-                and self.out.dtype == dtype:
-                    return  #everything is ok
-        if alloc or self.out.data is not None: #data should be allocated
-            self.out.data = None
-            self.out.shape = shape
-            self.out.dtype = dtype
-            self.out.alloc()
-        else:
-            self.out.shape = shape
-            self.out.dtype = dtype
-    def c_support_code(self):
-        return blas.cblas_header_text()
-    def c_libs(self):
-        return blas.ldflags()
-    def c_impl((_x, _y), (_z, )):
-        return blas.gemm_code('', '1.0', '0.0')
-
-
-
-## Transposition ##
-
-class transpose(omega_op):
-    def view_map(self): return {self.out: [self.inputs[0]]}
-    impl = numpy.transpose
-    def grad(x, gz):
-        return transpose_copy(gz)
-    def refresh_shape(self):
-        rval = list(self.inputs[0].shape)
-        rval.reverse()
-        return rval
-    def refresh_dtype(self):
-        return  self.inputs[0].dtype
-    def c_impl((x, ), (xt, )):
-        return """
-        const int l = x->nd;
-        // The user must ensure that all references to
-        //xt->data go through xt, or there's going to be trouble..
-        int refcheck = 0;
-
-          if (x == xt)
-            {
-              return -1;
-            }
-          if (refcheck)
-            {
-              int refcnt =  PyArray_REFCOUNT(xt);
-                if ((refcnt > 2)  // you might think this should be 1.. but this works
-                    //|| (xt->base != NULL)
-                    || (xt->weakreflist != NULL))
-                  {
-                    PyErr_SetString(PyExc_ValueError,
-                                        "cannot resize an array that has "\\
-                                        "been referenced or is referencing\\n"\\
-                                        "another array in this way.  Use the "\\
-                                        "resize function");
-                    return -2;
-                  }
-            }
-
-        if (xt->nd != x->nd)
-        {
-            // this technique comes from PyArray_Resize()
-            npy_intp * dimptr = (npy_intp*)PyDimMem_RENEW(xt->dimensions, 2 * x->nd);
-            if (!dimptr)
-            {
-                  PyErr_NoMemory();
-                  return 1;
-            }
-            xt->nd = x->nd;
-            xt->dimensions = dimptr;
-            xt->strides = dimptr + x->nd;
-        }
-        //copy x's dimensions and strides
-        for (int i = 0; i < l; ++i)
-        {
-            xt->dimensions[i] = x->dimensions[l-i-1];
-            xt->strides[i] = x->strides[l-i-1];
-        }

-        // point directly at b's type descriptor
-        Py_INCREF(x->descr);
-        Py_DECREF(xt->descr);
-        xt->descr = x->descr;
-
-        // name x as a base of xt, increment its refcount
-        if ( xt->base != (PyObject*)x)
-        {
-          Py_INCREF(x);
-          if ((xt->base) && (xt->base != Py_None)) 
-            {
-              Py_DECREF(xt->base);
-            }
-          xt->base = (PyObject*)x;
-        }
+# ## Element-wise division ##
+
+# class div_elemwise(elemwise):
+#     impl = assert_same_shapes(numpy.ndarray.__div__)
+#     def grad(x, y, gz):
+#         return div(gz, y), -div(mul(x, gz), sqr(y))
+#     def c_foreach((x_i, y_i), (z_i, )):
+#         return "z_i = x_i / y_i;"
+
+# div_elemwise_inplace = div_elemwise.inplace_version()
+# div_elemwise_inplace.set_impl(assert_same_shapes(numpy.ndarray.__idiv__))
+
+# def div_scalar_r(x, a):
+#     return scale(x, inv_elemwise(a))
+
+# def div_scalar_l(x, a):
+#     return scale(inv_elemwise(x), a)
+
+# def div_scalar_r_inplace(x, a):
+#     return scale_inplace(x, inv_elemwise(a))
+
+
+
+# ## Scaling ##
+
+# class scale(tensor_scalar_op):
+#     impl = tensor_scalar_impl(numpy.ndarray.__mul__)
+#     def grad(x, a, gz):
+#         return scale(a, gz), sum(mul_elemwise(x, gz))
+#     c_expr = "x_i * a"
+
+# scale_inplace = scale.inplace_version()
+# scale_inplace.set_impl(tensor_scalar_impl(numpy.ndarray.__imul__))
+
+
+# class neg(elemwise):
+#     impl = numpy.ndarray.__neg__
+#     def grad(x, gz):
+#         return -gz
+#     def c_foreach((x_i, ), (z_i, )):
+#         return "z_i = -x_i;"
+
+# neg_inplace = neg.inplace_version()
+# neg_inplace.set_impl(lambda x: x.__imul__(-1))
+
+
+# class inv_elemwise(elemwise):
+#     impl = lambda x: 1 / x
+#     def grad(x, gz):
+#         return -gz
+#     def c_foreach((x_i, ), (z_i, )):
+#         return "z_i = 1 / x_i;"
+
+# inv_elemwise_inplace = inv_elemwise.inplace_version()
+
+
+# ## Dot product ##
+
+# class dot(omega_op):
+#     @staticmethod
+#     def _output_shape(xshape, yshape):
+#         if len(xshape) == 0: # x is a scalar
+#             shape = yshape
+#         else:
+#             if len(yshape) >= 2: #y is a matrix or tensor
+#                 assert xshape[-1] == yshape[-2]
+#                 shape = tuple(xshape[:-1]+ yshape[:-2]+yshape[-1:])
+#             elif len(yshape)==1: #y is vector
+#                 assert xshape[-1] == yshape[-1]
+#                 shape = tuple(xshape[:-1])
+#             else:                #y is a scalar
+#                 shape = xshape
+#         return shape
+
+#     impl = numpy.dot
+#     def grad(x, y, gz):
+#         return dot(gz, transpose(y)), dot(transpose(x), gz)
+#     def refresh(self, alloc=False):
+#         x,y = self.inputs
+#         shape = self._output_shape(x.shape, y.shape)
+#         dtype = upcast(x.dtype, y.dtype)
+#         if self.out.data is not None \
+#                 and self.out.shape == shape \
+#                 and self.out.dtype == dtype:
+#                     return  #everything is ok
+#         if alloc or self.out.data is not None: #data should be allocated
+#             self.out.data = None
+#             self.out.shape = shape
+#             self.out.dtype = dtype
+#             self.out.alloc()
+#         else:
+#             self.out.shape = shape
+#             self.out.dtype = dtype
+#     def c_support_code(self):
+#         return blas.cblas_header_text()
+#     def c_libs(self):
+#         return blas.ldflags()
+#     def c_impl((_x, _y), (_z, )):
+#         return blas.gemm_code('', '1.0', '0.0')
+
+
+
+# ## Transposition ##
+
+# class transpose(omega_op):
+#     def view_map(self): return {self.out: [self.inputs[0]]}
+#     impl = numpy.transpose
+#     def grad(x, gz):
+#         return transpose_copy(gz)
+#     def refresh_shape(self):
+#         rval = list(self.inputs[0].shape)
+#         rval.reverse()
+#         return rval
+#     def refresh_dtype(self):
+#         return  self.inputs[0].dtype
+#     def c_impl((x, ), (xt, )):
+#         return """
+#         const int l = x->nd;
+#         // The user must ensure that all references to
+#         //xt->data go through xt, or there's going to be trouble..
+#         int refcheck = 0;
+
+#           if (x == xt)
+#             {
+#               return -1;
+#             }
+#           if (refcheck)
+#             {
+#               int refcnt =  PyArray_REFCOUNT(xt);
+#                 if ((refcnt > 2)  // you might think this should be 1.. but this works
+#                     //|| (xt->base != NULL)
+#                     || (xt->weakreflist != NULL))
+#                   {
+#                     PyErr_SetString(PyExc_ValueError,
+#                                         "cannot resize an array that has "\\
+#                                         "been referenced or is referencing\\n"\\
+#                                         "another array in this way.  Use the "\\
+#                                         "resize function");
+#                     return -2;
+#                   }
+#             }
+
+#         if (xt->nd != x->nd)
+#         {
+#             // this technique comes from PyArray_Resize()
+#             npy_intp * dimptr = (npy_intp*)PyDimMem_RENEW(xt->dimensions, 2 * x->nd);
+#             if (!dimptr)
+#             {
+#                   PyErr_NoMemory();
+#                   return 1;
+#             }
+#             xt->nd = x->nd;
+#             xt->dimensions = dimptr;
+#             xt->strides = dimptr + x->nd;
+#         }
+#         //copy x's dimensions and strides
+#         for (int i = 0; i < l; ++i)
+#         {
+#             xt->dimensions[i] = x->dimensions[l-i-1];
+#             xt->strides[i] = x->strides[l-i-1];
+#         }
+
+#         // point directly at b's type descriptor
+#         Py_INCREF(x->descr);
+#         Py_DECREF(xt->descr);
+#         xt->descr = x->descr;
+
+#         // name x as a base of xt, increment its refcount
+#         if ( xt->base != (PyObject*)x)
+#         {
+#           Py_INCREF(x);
+#           if ((xt->base) && (xt->base != Py_None)) 
+#             {
+#               Py_DECREF(xt->base);
+#             }
+#           xt->base = (PyObject*)x;
+#         }
    
-        // mark xt as not owning its data
-        if (PyArray_CHKFLAGS(xt, NPY_OWNDATA))
-          {
-            PyDataMem_FREE(xt->data);
-            xt->flags &= ~NPY_OWNDATA;
-          }
-        xt->data = x->data;
+#         // mark xt as not owning its data
+#         if (PyArray_CHKFLAGS(xt, NPY_OWNDATA))
+#           {
+#             PyDataMem_FREE(xt->data);
+#             xt->flags &= ~NPY_OWNDATA;
+#           }
+#         xt->data = x->data;

-        // this function is described in 
-        // ~/zzz.NOBACKUP/pub/src/numpy-1.0.3.1/numpy/core/src/arrayobject.c:1890
-        PyArray_UpdateFlags(xt, NPY_CONTIGUOUS|NPY_FORTRAN|NPY_ALIGNED|NPY_WRITEABLE); 
+#         // this function is described in 
+#         // ~/zzz.NOBACKUP/pub/src/numpy-1.0.3.1/numpy/core/src/arrayobject.c:1890
+#         PyArray_UpdateFlags(xt, NPY_CONTIGUOUS|NPY_FORTRAN|NPY_ALIGNED|NPY_WRITEABLE); 

-        /*
-          TODO
-          What should be done with the weakreflist ?
-        */
-    """
+#         /*
+#           TODO
+#           What should be done with the weakreflist ?
+#         */
+#     """

-def transpose_copy(x):
-    return array_copy(transpose(x))
+# def transpose_copy(x):
+#     return array_copy(transpose(x))


-## Copy ##
+# ## Copy ##

-class array_copy(elemwise):
-    impl = numpy.array
-    grad = lambda x, gz: gz
-    def c_foreach((x_i, ), (z_i, )):
-        return "z_i = x_i;"
+# class array_copy(elemwise):
+#     impl = numpy.array
+#     grad = lambda x, gz: gz
+#     def c_foreach((x_i, ), (z_i, )):
+#         return "z_i = x_i;"


-## Power ##
+# ## Power ##

-class sqr(elemwise):
-    def impl(x):
-        return x * x
-    def grad(x, gz):
-        return scale(mul_elemwise(x, gz), 2.0)
-    def c_foreach((x_i, ), (z_i, )):
-        return "z_i = x_i * x_i;"
+# class sqr(elemwise):
+#     def impl(x):
+#         return x * x
+#     def grad(x, gz):
+#         return scale(mul_elemwise(x, gz), 2.0)
+#     def c_foreach((x_i, ), (z_i, )):
+#         return "z_i = x_i * x_i;"

-sqr_inplace = sqr.inplace_version()
-sqr_inplace.set_impl(lambda x: x.__imul__(x))
+# sqr_inplace = sqr.inplace_version()
+# sqr_inplace.set_impl(lambda x: x.__imul__(x))




-class sqrt(elemwise):
-    impl = numpy.sqrt
-    def grad(x, gz):
-        return scale(div(gz, sqrt(x)), 0.5)
-    def c_foreach((x_i, ), (z_i, )):
-        return "z_i = pow(x_i, 0.5);"
+# class sqrt(elemwise):
+#     impl = numpy.sqrt
+#     def grad(x, gz):
+#         return scale(div(gz, sqrt(x)), 0.5)
+#     def c_foreach((x_i, ), (z_i, )):
+#         return "z_i = pow(x_i, 0.5);"

-sqrt_inplace = sqrt.inplace_version()
-sqrt_inplace.set_impl(lambda x: x.__ipow__(0.5))
+# sqrt_inplace = sqrt.inplace_version()
+# sqrt_inplace.set_impl(lambda x: x.__ipow__(0.5))


-class exp(elemwise):
-    def impl(x): return numpy.exp(x)
-    def grad(x, gz): return gz * exp(x)
-    def c_foreach((x_i, ), (z_i, )): return "z_i = exp(x_i);"
+# class exp(elemwise):
+#     def impl(x): return numpy.exp(x)
+#     def grad(x, gz): return gz * exp(x)
+#     def c_foreach((x_i, ), (z_i, )): return "z_i = exp(x_i);"
    
-class log(elemwise):
-    def impl(x): return numpy.log(x)
-    def grad(x, gz): return gz / x
-    def c_foreach((x_i, ), (z_i, )): return "z_i = log(x_i);"
-
-class log2(elemwise):
-    def impl(x): return numpy.log2(x)
-    def grad(x, gz): return gz / (x * numpy.log(2))
-    def c_foreach((x_i, ), (z_i, )): return "z_i = log2(x_i);"
-
-class pow_elemwise(elemwise):
-    impl = assert_same_shapes(numpy.ndarray.__pow__)
-    def grad(x, s, gz):
-        raise NotImplemented # no gs
-        return gz * s * (pow_elemwise(x, s-1.0))
-    def c_foreach((x_i, s_i), (z_i, )):
-        return "z_i = pow(x_i, s_i)"
-
-pow_elemwise_inplace = pow_elemwise.inplace_version()
-pow_elemwise_inplace.set_impl(assert_same_shapes(numpy.ndarray.__ipow__))
-
-class pow_scalar_l(tensor_scalar_op):
-    impl = tensor_scalar_impl(lambda x, y: numpy.ndarray.__pow__(y, x))
-    def grad(x, s, gz):
-        raise NotImplemented # no gs
-        return gz * x * (pow_scalar_l(s,x-1.0))
-    c_expr = "pow(a, x_i)"
-
-class pow_scalar_r(tensor_scalar_op):
-    impl = tensor_scalar_impl(numpy.ndarray.__pow__)
-    def grad(x, s, gz):
-        gx = gz * s * (pow_scalar_r(x,s-1.0))
-        gs = sum(gz * pow_scalar_r(x,s) * log(x))
-        return gx, gs
-    c_expr = "pow(x_i, a)"
-
-pow_scalar_r_inplace = pow_scalar_r.inplace_version()
-pow_scalar_r_inplace.set_impl(tensor_scalar_impl(numpy.ndarray.__ipow__))
-
-
-## Others ##
-
-class minmax(elemwise):
-    nout = 2
-    def impl(x):
-        return x.min, x.max
-    def specs(x):
-        return [(numpy.ndarray, x[1], ())] * 2
-#     def alloc((x, ), (_min, _max)):
-#         _min.data = numpy.ndarray((), x.dtype)
-#         _max.data = numpy.ndarray((), x.dtype)
-    def c_init((x, ), (_min, _max)):
-        raise NotImplementedError
-        return """
-        _x_dtype min = _x[0];
-        _x_dtype max = _x[0];
-        """
-    def c_foreach((x, ), (_min, _max)):
-        return """
-        if (x < min) min = x;
-        if (x > max) max = x;
-        """
-    def c_finalize((x, ), (_min, _max)):
-        return """
-        _min[0] = min;
-        _max[0] = max;
-        """
+# class log(elemwise):
+#     def impl(x): return numpy.log(x)
+#     def grad(x, gz): return gz / x
+#     def c_foreach((x_i, ), (z_i, )): return "z_i = log(x_i);"
+
+# class log2(elemwise):
+#     def impl(x): return numpy.log2(x)
+#     def grad(x, gz): return gz / (x * numpy.log(2))
+#     def c_foreach((x_i, ), (z_i, )): return "z_i = log2(x_i);"
+
+# class pow_elemwise(elemwise):
+#     impl = assert_same_shapes(numpy.ndarray.__pow__)
+#     def grad(x, s, gz):
+#         raise NotImplemented # no gs
+#         return gz * s * (pow_elemwise(x, s-1.0))
+#     def c_foreach((x_i, s_i), (z_i, )):
+#         return "z_i = pow(x_i, s_i)"
+
+# pow_elemwise_inplace = pow_elemwise.inplace_version()
+# pow_elemwise_inplace.set_impl(assert_same_shapes(numpy.ndarray.__ipow__))
+
+# class pow_scalar_l(tensor_scalar_op):
+#     impl = tensor_scalar_impl(lambda x, y: numpy.ndarray.__pow__(y, x))
+#     def grad(x, s, gz):
+#         raise NotImplemented # no gs
+#         return gz * x * (pow_scalar_l(s,x-1.0))
+#     c_expr = "pow(a, x_i)"
+
+# class pow_scalar_r(tensor_scalar_op):
+#     impl = tensor_scalar_impl(numpy.ndarray.__pow__)
+#     def grad(x, s, gz):
+#         gx = gz * s * (pow_scalar_r(x,s-1.0))
+#         gs = sum(gz * pow_scalar_r(x,s) * log(x))
+#         return gx, gs
+#     c_expr = "pow(x_i, a)"
+
+# pow_scalar_r_inplace = pow_scalar_r.inplace_version()
+# pow_scalar_r_inplace.set_impl(tensor_scalar_impl(numpy.ndarray.__ipow__))
+
+
+# ## Others ##
+
+# class minmax(elemwise):
+#     nout = 2
+#     def impl(x):
+#         return x.min, x.max
+#     def specs(x):
+#         return [(numpy.ndarray, x[1], ())] * 2
+# #     def alloc((x, ), (_min, _max)):
+# #         _min.data = numpy.ndarray((), x.dtype)
+# #         _max.data = numpy.ndarray((), x.dtype)
+#     def c_init((x, ), (_min, _max)):
+#         raise NotImplementedError
+#         return """
+#         _x_dtype min = _x[0];
+#         _x_dtype max = _x[0];
+#         """
+#     def c_foreach((x, ), (_min, _max)):
+#         return """
+#         if (x < min) min = x;
+#         if (x > max) max = x;
+#         """
+#     def c_finalize((x, ), (_min, _max)):
+#         return """
+#         _min[0] = min;
+#         _max[0] = max;
+#         """
+
+
+# class fill(elemwise):
+#     impl = lambda model, value: (model * 0) + value
+#     def c_init((model, value), (z, )):
+#         return "value_dtype value0 = ((value_dtype*)PyArray_DATA(value))[0];"
+#     def c_foreach((model_i, value), (z_i, )):
+#         return "z_i = value0;"
+
+# fill_inplace = fill.inplace_version()
+
+# class sum(elemwise):
+#     impl = numpy.sum
+#     def grad(x, gz):
+#         return fill(x, gz)
+#     def refresh_shape(self):
+#         return ()
+#     def c_init((x, ), (sum, )):
+#         return "sum_dtype* sump = ((sum_dtype*)PyArray_DATA(sum)); sump[0] = 0;"
+#     def c_foreach((x_i, ), (sum, )):
+#         return "sump[0] += x_i;"
+
+# class ones_like(elemwise):
+#     impl = numpy.ones_like
+#     def grad(x, gz): return Undefined
+
+# class zeros_like(elemwise):
+#     impl = numpy.zeros_like
+#     def grad(x, gz): return Undefined
+
+# ## Array slicing ##
+
+# class get_slice(omega_op):
+#     def view_map(self): return {self.out: [self.inputs[0]]}
+#     def impl(x, item): 
+#         rval = x.__getitem__(item)
+#         #print 'get_slice running', rval
+#         return rval
+#     def grad(x, gz): raise NotImplemented
+#     def refresh_shape(self): 
+#         x,item = self.inputs
+#         rval = x.data.__getitem__(item.data).shape 
+#         #print 'refresh_shape', rval
+#         return rval
+#     def refresh_dtype(self):
+#         return self.inputs[0].data.dtype
+


-class fill(elemwise):
-    impl = lambda model, value: (model * 0) + value
-    def c_init((model, value), (z, )):
-        return "value_dtype value0 = ((value_dtype*)PyArray_DATA(value))[0];"
-    def c_foreach((model_i, value), (z_i, )):
-        return "z_i = value0;"
-
-fill_inplace = fill.inplace_version()
-
-class sum(elemwise):
-    impl = numpy.sum
-    def grad(x, gz):
-        return fill(x, gz)
-    def refresh_shape(self):
-        return ()
-    def c_init((x, ), (sum, )):
-        return "sum_dtype* sump = ((sum_dtype*)PyArray_DATA(sum)); sump[0] = 0;"
-    def c_foreach((x_i, ), (sum, )):
-        return "sump[0] += x_i;"
-
-class ones_like(elemwise):
-    impl = numpy.ones_like
-    def grad(x, gz): return Undefined
-
-class zeros_like(elemwise):
-    impl = numpy.zeros_like
-    def grad(x, gz): return Undefined
-
-## Array slicing ##
-
-class get_slice(omega_op):
-    def view_map(self): return {self.out: [self.inputs[0]]}
-    def impl(x, item): 
-        rval = x.__getitem__(item)
-        #print 'get_slice running', rval
-        return rval
-    def grad(x, gz): raise NotImplemented
-    def refresh_shape(self): 
-        x,item = self.inputs
-        rval = x.data.__getitem__(item.data).shape 
-        #print 'refresh_shape', rval
-        return rval
-    def refresh_dtype(self):
-        return self.inputs[0].data.dtype
+from gof import modes
+modes.make_constructors(globals())


 add = scalar_switch(add_elemwise, add_scalar, add_scalar)
@@ -630,21 +718,18 @@ sub_inplace = scalar_switch(sub_elemwise_inplace, sub_scalar_r_inplace)
 mul = scalar_switch(mul_elemwise, scale, scale)
 mul_inplace = scalar_switch(mul_elemwise_inplace, scale_inplace)

-div = scalar_switch(div_elemwise, div_scalar_r, div_scalar_l)
-div_inplace = scalar_switch(div_elemwise_inplace, div_scalar_r_inplace)
-
-pow = scalar_switch(pow_elemwise, pow_scalar_r, pow_scalar_l)
-pow_inplace = scalar_switch(pow_elemwise_inplace, pow_scalar_r_inplace)
-
-
-
-
-
-
-
-
+# div = scalar_switch(div_elemwise, div_scalar_r, div_scalar_l)
+# div_inplace = scalar_switch(div_elemwise_inplace, div_scalar_r_inplace)

+# pow = scalar_switch(pow_elemwise, pow_scalar_r, pow_scalar_l)
+# pow_inplace = scalar_switch(pow_elemwise_inplace, pow_scalar_r_inplace)

+Tensor.__add__ = add
+Tensor.__sub__ = sub
+Tensor.__mul__ = mul
+Tensor.__iadd__ = add_inplace
+Tensor.__isub__ = sub_inplace
+Tensor.__imul__ = mul_inplace