bla

bc361363 · Olivier Breuleux · a8cfe8f2 · bc361363 · bc361363 · bc361363
--- a/compile.py
+++ b/compile.py
@@ -11,7 +11,7 @@ import core
 def to_func(inputs, outputs):
-    print gof.Env(inputs, outputs)
+#    print gof.Env(inputs, outputs).io_toposort()
    p = prog(inputs, outputs)
    print p.env
    def f(*args):
@@ -42,7 +42,7 @@ class prog(gof.Prog):
                          inputs,
                          outputs,
                          opt.optimizer,
-                          gof.link.thunk_linker,
+                          gof.link.perform_linker,
                          [])
 #        core.pop_mode()

--- a/core.py
+++ b/core.py
@@ -71,7 +71,7 @@ def wrap(x):
    elif isinstance(x, Proxy):
        return wrap(x._obj)
    else:
-        return input(x)
+        return literal(x)
 #     elif isinstance(x, numpy.ndarray):
 #         return NumpyR(x)
 #     elif isinstance(x, (int, float)):
@@ -82,8 +82,10 @@ def wrap(x):
 def literal(x):
    try:
        present = x in gof.literals_db
+        hashable = True
    except TypeError: # x is unhashable
        present = False
+        hashable = False
    if present:
        return gof.literals_db.get(x)
@@ -95,7 +97,10 @@ def literal(x):
        raise TypeError("%s is already a result." % x)
    else:
        return PythonR(x, constant = True)
+    if hashable:
        gof.literals_db[x] = ret
    return ret
@@ -103,9 +108,19 @@ def literal(x):
 inplace = gof.Destroyer
 view = gof.Viewer
+def assert_same_shapes(impl):
+    def ret(x, *rest):
+        shape = x.shape
+        for other in rest:
+            if other.shape != shape:
+                raise TypeError("The dimensions of the inputs do not match.")
+        return impl(x, *rest)
+    return ret
 class omega_op(gof.PythonOp):
+    broadcast_op = False
    @staticmethod
    def __clsinit__(cls, name, bases, dct):
        # make grad a static method
@@ -114,6 +129,10 @@ class omega_op(gof.PythonOp):
            grad = grad.im_func
        cls.grad = staticmethod(grad)
+        # adjust impl
+        if cls.broadcast_op:
+            cls.impl = assert_same_shapes(cls.impl)
        # make impl a static method
        gof.PythonOp.__clsinit__(cls, name, bases, dct)
@@ -135,6 +154,15 @@ class omega_op(gof.PythonOp):
        return UNDEFINED
+def scalar_switch(x, y, normal_f, scalar_f):
+    x, y = wrap(x), wrap(y)
+    if x.constant and not x.data.shape:
+        return scalar_f(y, x)
+    if y.constant and not y.data.shape:
+        return scalar_f(x, y)
+    return normal_f(x, y)
 class NumpyR(gof.PythonR):
    def set_value(self, value):
@@ -145,29 +173,25 @@ class NumpyR(gof.PythonR):
        else:
            self.data = numpy.array(value)
-    def  __add__(self, y): return  add(self, y)
+    def  __add__(self, y): return scalar_switch(self, y, add, add_scalar)
-    def __radd__(self, x): return  add(x, self)
+    def __radd__(self, x): return scalar_switch(x, self, add, add_scalar)
-    def __iadd__(self, y): return iadd(self, y)
+    def __iadd__(self, y): return scalar_switch(self, y, iadd, iadd_scalar)
-    def  __sub__(self, y): return  sub(self, y)
-    def __rsub__(self, x): return  sub(x, self)
-    def __isub__(self, y): return isub(self, y)
-    def  __mul__(self, y): return  mul(self, y)
+    def  __sub__(self, y): return scalar_switch(self, y, sub, sub_scalar)
-    def __rmul__(self, x): return  mul(x, self)
+    def __rsub__(self, x): return scalar_switch(x, self, sub, sub_scalar)
-    def __imul__(self, y): return imul(self, y)
+    def __isub__(self, y): return scalar_switch(self, y, isub, isub_scalar)
-    def  __div__(self, y): return  div(self, y)
+    def  __mul__(self, y): return scalar_switch(self, y, mul, scale)
-    def __rdiv__(self, x): return  div(x, self)
+    def __rmul__(self, x): return scalar_switch(x, self, mul, scale)
-    def __idiv__(self, y): return idiv(self, y)
+    def __imul__(self, y): return scalar_switch(self, y, imul, iscale)
-    def  __mod__(self, y): return  mod(self, y)
+    def  __div__(self, y): return scalar_switch(self, y, div, inv_scale)
-    def __rmod__(self, x): return  mod(x, self)
+    def __rdiv__(self, x): return scalar_switch(x, self, div, inv_scale)
-    def __imod__(self, y): return imod(self, y)    
+    def __idiv__(self, y): return scalar_switch(self, y, idiv, iinv_scale)
-    def  __pow__(self, y): return  pow(self, y)
+    def  __pow__(self, y): return scalar_switch(self, y, pow_elemwise, pow)
-    def __rpow__(self, x): return  pow(x, self)
+    def __rpow__(self, x): return scalar_switch(x, self, pow_elemwise, pow)
-    def __ipow__(self, y): return ipow(self, y)
+    def __ipow__(self, y): return scalar_switch(self, y, ipow_elemwise, ipow)
    def __neg__(self):     return neg(self)
@@ -194,6 +218,7 @@ ones = wrap_producer(numpy.ones)
 ## Addition ##
 class proto_add(omega_op):
+    broadcast_op = True
    def grad(x, y, gz):
        return gz
@@ -203,6 +228,9 @@ class add(proto_add):
 class iadd(proto_add, inplace):
    impl = numpy.ndarray.__iadd__
+class add_scalar(omega_op):
+    impl = numpy.ndarray.__add__
 class proto_twice(omega_op):
    def grad(x, gz):
@@ -230,6 +258,12 @@ class sub(proto_sub):
 class isub(proto_sub, inplace):
    impl = numpy.ndarray.__isub__
+class sub_scalar(omega_op):
+    impl = numpy.ndarray.__sub__
+class isub_scalar(omega_op, inplace):
+    impl = numpy.ndarray.__isub__
 ## Element-wise multiplication ##
@@ -252,7 +286,7 @@ class sqr(proto_sqr):
    impl = lambda x: numpy.multiply(x, x)
 class isqr(proto_sqr, inplace):
-    impl = lambda x: x.__imul__(x),
+    impl = lambda x: x.__imul__(x)
 class proto_sqrt(omega_op):
@@ -284,17 +318,23 @@ class div(proto_div):
 class idiv(proto_div, inplace):
    impl = numpy.ndarray.__idiv__
+class inv_scale(omega_op):
+    impl = numpy.ndarray.__div__
+class iinv_scale(omega_op, inplace):
+    impl = numpy.ndarray.__idiv__
 ## Scaling ##
-class proto_scal(omega_op):
+class proto_scale(omega_op):
    def grad(x, a, gz):
        return scal(a, gz), sum(mul(x, gz))
-class scal(omega_op):
+class scale(omega_op):
    impl = numpy.ndarray.__mul__
-class iscal(omega_op, inplace):
+class iscale(omega_op, inplace):
    impl = numpy.ndarray.__imul__
@@ -335,6 +375,30 @@ class array_copy(omega_op):
    grad = lambda x, gz: gz
+## Power ##
+class proto_pow(omega_op):
+    def grad(x, y, gz):
+        pass
+class pow(proto_pow):
+    impl = numpy.ndarray.__pow__
+class ipow(proto_pow, inplace):
+    impl = numpy.ndarray.__ipow__
+class proto_pow_elemwise(omega_op):
+    def grad(x, y, gz):
+        pass
+class pow_elemwise(proto_pow_elemwise):
+    impl = numpy.ndarray.__pow__
+class ipow_elemwise(proto_pow_elemwise, inplace):
+    impl = numpy.ndarray.__ipow__
 ## Others ##
 class minmax(omega_op):
@@ -342,6 +406,14 @@ class minmax(omega_op):
    def impl(x):
        return x.min, x.max
+class fill(omega_op):
+    impl = lambda model, value: (model * 0) + value
+class sum(omega_op):
+    impl = numpy.sum
+    def grad(x, gz):
+        return fill(x, gz)
 # array_copy = wrapper("copy",
 #                      numpy.array,

--- a/grad.py
+++ b/grad.py
@@ -31,6 +31,7 @@ def expand_grad(i, o, cost_derivs):
 def grad(cost, wrt, cost_grad = 1.0):
+    assert core.current_mode() != 'eval'
 #    cost, wrt = core.wrap(cost), core.wrap(wrt)
    cost_derivs = expand_grad([wrt], [cost], {cost: core.wrap(cost_grad)})
 #     print wrt

--- a/opt.py
+++ b/opt.py
@@ -44,7 +44,7 @@ opts = [
    ['twice_to_itwice',             op_sub(twice, itwice)],
-    ['mulxx_to_twice',              pattern_opt((mul, 'x', 'x'),
+    ['mulxx_to_sqr',                pattern_opt((mul, 'x', 'x'),
                                                (sqr, 'x'))],
    ['sqr_to_isqr',                 op_sub(sqr, isqr)],
@@ -65,3 +65,16 @@ export_opts(opts) # publish the optimizations performed under individual names
 optimizer = gof.PythonOpt(gof.MergeOptMerge(gof.SeqOptimizer([opt for name, opt in opts])))
+#optimizer = gof.PythonOpt(gof.SeqOptimizer([opt for name, opt in opts]))
+#[isub(1.0, mul(0.1, iadd(transpose(dot(transpose(*2 -> sigmoid(dot(0.0, 1.0))), *4 -> mul(mul(neg(scal(mul(*3 -> sub(0.0, *1 -> sigmoid(dot(*2, transpose(1.0)))), fill(isqr(*3), 1.0)), 2.0)), *1), sub(1, *1)))), dot(transpose(0.0), mul(mul(dot(*4, 1.0), *2), sub(1, *2))))))]
+#[isub(1.0, mul(0.1, iadd(dot(transpose(0.0), mul(mul(dot(*4 -> mul(mul(neg(scal(mul(*1 -> sub(0.0, *2 -> sigmoid(dot(*3 -> sigmoid(dot(0.0, 1.0)), transpose(1.0)))), fill(sqr(*1), 1.0)), 2.0)), *2), sub(1, *2)), 1.0), *3), sub(1, *3))), transpose(dot(transpose(*3), *4)))))]
+#[isub(1.0, mul(0.1, iadd(dot(transpose(0.0), mul(mul(dot(*2 -> mul(mul(neg(scal(mul(*4 -> sub(0.0, *1 -> sigmoid(dot(*3 -> sigmoid(dot(0.0, 1.0)), transpose(1.0)))), fill(sqr(*4), 1.0)), 2.0)), *1), sub(1, *1)), 1.0), *3), sub(1, *3))), transpose(dot(transpose(*3), *2)))))]
+# [ sqr(sub(0.0, sigmoid(dot(sigmoid(dot(0.0, 1.0)), transpose(1.0)))))]
+# [isqr(sub(0.0, sigmoid(dot(sigmoid(dot(0.0, 1.0)), transpose(1.0)))))]
--- a/test.py
+++ b/test.py
@@ -63,32 +63,32 @@ import grad
 ############################
-#core.build_mode()
+# #core.build_mode()
-dim = core.wrap(())
+# dim = core.wrap(())
-dim2 = core.wrap((2, 2))
+# dim2 = core.wrap((2, 2))
-a = core.zeros(dim, dtype='int32') #(core.NumpyR(numpy.ones((3, 3))))
+# a = core.zeros(dim, dtype='int32') #(core.NumpyR(numpy.ones((3, 3))))
-b = core.ones(dim2, 'int32') #(core.NumpyR(numpy.ones((3, 3))))
+# b = core.ones(dim2, 'int32') #(core.NumpyR(numpy.ones((3, 3))))
-c = core.zeros(dim, dtype='int32')
+# c = core.zeros(dim, dtype='int32')
-d = a + (b + b) + c + numpy.ones(())
+# d = a + (b + b) + c + numpy.ones(())
-e = d + (b * c)
+# e = d + (b * c)
-#core.pop_mode()
+# #core.pop_mode()
-print e
+# print e
-#print e
+# #print e
-#print gof.graph.ops([dim], [e])
+# #print gof.graph.ops([dim], [e])
-#1/0
+# #1/0
-#print gof.Env([dim], [e])
+# #print gof.Env([dim], [e])
-#f = compile.to_func([dim], [e])
+# #f = compile.to_func([dim], [e])
-# f = compile.to_func([a, b, c], [e])
+# # f = compile.to_func([a, b, c], [e])
-# print f(1, 2, 3)
+# # print f(1, 2, 3)
-# #print f((2,2))
+# # #print f((2,2))
 ############################
@@ -129,29 +129,88 @@ print e
 # core.print_graph(r)
+############################
+# def dataset_1hot(x, targ, n):
+#     """Return an looping iterator over 1-hot vectors
+#     This function is a generator for the integers range(n) that works by
+#     side-effect on the numpy ndarray mat.
+#     On each iteration, mat is set (in-place) to the next element of an infinite
+#     sequence of 1-hot vectors.
+#     """
+#     assert targ.size == 1
+#     for i in xrange(n):
+#         idx = i % x.shape[1]
+#         x[:] = 0
+#         x[0,idx] = 1
+#         targ[0] = idx
+#         yield i
+# class sigmoid(core.omega_op):
+#     def impl(x):
+#         return 1.0 / (1.0 + numpy.exp(-x))
+#     def grad(x, gz):
+#         return gz * sigmoid(x) * (1 - sigmoid(x))
+# x = core.zeros((1, 10))
+# w = core.input(numpy.random.rand(10, 15))
+# #print x.data, w.data
+# def autoassociator(w, x):
+#     forward = sigmoid(core.dot(sigmoid(core.dot(x, w)), w.T))
+#     rec_error = core.sum(core.sqr(x - forward))
+#     w -= 0.1 * grad.grad(rec_error, w)
+#     return w, rec_error
+# w2, rec_error = core.build(autoassociator, w, x)
+# f = compile.to_func([w, x], [w2, rec_error])
+# for i in dataset_1hot(x.data, numpy.ndarray((1, )), 10000):
+#     w2, rec_error = f(w.data, x.data)
+#     if not(i % 1000):
+#         print rec_error
-class sigmoid(core.omega_op):
+# print "done!"
-    def impl(x):
+# print w.data
-        return 1.0 / (1.0 + numpy.exp(-x))
-    def grad(x, gz):
-        return gz * sigmoid(x) * (1 - sigmoid(x))
-def autoassociator(w, x):
-    forward = sigmoid(core.dot(w.T, sigmoid(core.dot(w, x))))
-    w -= 0.01 * grad.grad(core.sqr(x - forward(x)), w)
-w = core.input(numpy.random.rand())
+# # 1 = mul(mul(neg(scal(mul(sub(0.736213102665, sigmoid(*3)), 1.0), 2.0)), sigmoid(*3)), sub(1, sigmoid(*3)))
-x = core.input(numpy.random.rand())
+# # 2 = transpose(0.11474051836)
+# # 3 = dot(*2, *5)
+# # 4 = dot(0.11474051836, 0.736213102665)
+# # 5 = sigmoid(*4)
+# # add(transpose(dot(*1, transpose(*5))), dot(mul(mul(dot(transpose(*2), *1), sigmoid(*4)), sub(1, sigmoid(*4))), transpose(0.736213102665)))
+############################
+# def fun():
+#     a = core.NumpyR(numpy.zeros(()) + 200)
+# #    b = numpy.ones(())
+# #    a = a * core.sqrt(core.isqr(a))
+#     a = a * core.isqr(a)
+#     return a
+# f = core.build(fun)
+# g = compile.to_func(gof.graph.inputs([f]), [f])
+############################
-for i in xrange(10000):
+print core.ones((2, 2)) + 1
-    autoassociator(w, dataset.next())
+print numpy.ones((2, 2)) ** numpy.ones((2, 2))
-# 1 = mul(mul(neg(scal(mul(sub(0.736213102665, sigmoid(*3)), 1.0), 2.0)), sigmoid(*3)), sub(1, sigmoid(*3)))
-# 2 = transpose(0.11474051836)
-# 3 = dot(*2, *5)
-# 4 = dot(0.11474051836, 0.736213102665)
-# 5 = sigmoid(*4)
-# add(transpose(dot(*1, transpose(*5))), dot(mul(mul(dot(transpose(*2), *1), sigmoid(*4)), sub(1, sigmoid(*4))), transpose(0.736213102665)))