merge

上级 86cb7339
...@@ -108,18 +108,9 @@ def literal(x): ...@@ -108,18 +108,9 @@ def literal(x):
inplace = gof.Destroyer inplace = gof.Destroyer
view = gof.Viewer view = gof.Viewer
def assert_same_shapes(impl):
def ret(x, *rest):
shape = x.shape
for other in rest:
if other.shape != shape:
raise TypeError("The dimensions of the inputs do not match.")
return impl(x, *rest)
return ret
class omega_op(gof.PythonOp): class omega_op(gof.PythonOp):
broadcast_op = False forbid_broadcast = False
@staticmethod @staticmethod
def __clsinit__(cls, name, bases, dct): def __clsinit__(cls, name, bases, dct):
...@@ -129,9 +120,9 @@ class omega_op(gof.PythonOp): ...@@ -129,9 +120,9 @@ class omega_op(gof.PythonOp):
grad = grad.im_func grad = grad.im_func
cls.grad = staticmethod(grad) cls.grad = staticmethod(grad)
# adjust impl # # adjust impl
if cls.broadcast_op: # if cls.forbid_broadcast:
cls.impl = assert_same_shapes(cls.impl) # cls.impl = assert_same_shapes(cls.impl)
# make impl a static method # make impl a static method
gof.PythonOp.__clsinit__(cls, name, bases, dct) gof.PythonOp.__clsinit__(cls, name, bases, dct)
...@@ -154,13 +145,15 @@ class omega_op(gof.PythonOp): ...@@ -154,13 +145,15 @@ class omega_op(gof.PythonOp):
return UNDEFINED return UNDEFINED
def scalar_switch(x, y, normal_f, scalar_f): def scalar_switch(normal_f, scalar_f, scalar_f_reverse):
x, y = wrap(x), wrap(y) def f(x, y):
if x.constant and not x.data.shape: x, y = wrap(x), wrap(y)
return scalar_f(y, x) if x.constant and not x.data.shape:
if y.constant and not y.data.shape: return scalar_f_reverse(y, x)
return scalar_f(x, y) if y.constant and not y.data.shape:
return normal_f(x, y) return scalar_f(x, y)
return normal_f(x, y)
return f
class NumpyR(gof.PythonR): class NumpyR(gof.PythonR):
...@@ -173,25 +166,25 @@ class NumpyR(gof.PythonR): ...@@ -173,25 +166,25 @@ class NumpyR(gof.PythonR):
else: else:
self.data = numpy.array(value) self.data = numpy.array(value)
def __add__(self, y): return scalar_switch(self, y, add, add_scalar) def __add__(self, y): return add(self, y)
def __radd__(self, x): return scalar_switch(x, self, add, add_scalar) def __radd__(self, x): return add(x, self)
def __iadd__(self, y): return scalar_switch(self, y, iadd, iadd_scalar) def __iadd__(self, y): return iadd(self, y)
def __sub__(self, y): return scalar_switch(self, y, sub, sub_scalar) def __sub__(self, y): return sub(self, y)
def __rsub__(self, x): return scalar_switch(x, self, sub, sub_scalar) def __rsub__(self, x): return sub(x, self)
def __isub__(self, y): return scalar_switch(self, y, isub, isub_scalar) def __isub__(self, y): return isub(self, y)
def __mul__(self, y): return scalar_switch(self, y, mul, scale) def __mul__(self, y): return mul(self, y)
def __rmul__(self, x): return scalar_switch(x, self, mul, scale) def __rmul__(self, x): return mul(x, self)
def __imul__(self, y): return scalar_switch(self, y, imul, iscale) def __imul__(self, y): return imul(self, y)
def __div__(self, y): return scalar_switch(self, y, div, inv_scale) def __div__(self, y): return div(self, y)
def __rdiv__(self, x): return scalar_switch(x, self, div, inv_scale) def __rdiv__(self, x): return div(x, self)
def __idiv__(self, y): return scalar_switch(self, y, idiv, iinv_scale) def __idiv__(self, y): return idiv(self, y)
def __pow__(self, y): return scalar_switch(self, y, pow_elemwise, pow) def __pow__(self, y): return pow(self, y)
def __rpow__(self, x): return scalar_switch(x, self, pow_elemwise, pow) def __rpow__(self, x): return pow(x, self)
def __ipow__(self, y): return scalar_switch(self, y, ipow_elemwise, ipow) def __ipow__(self, y): return ipow(self, y)
def __neg__(self): return neg(self) def __neg__(self): return neg(self)
...@@ -215,29 +208,53 @@ zeros = wrap_producer(numpy.zeros) ...@@ -215,29 +208,53 @@ zeros = wrap_producer(numpy.zeros)
ones = wrap_producer(numpy.ones) ones = wrap_producer(numpy.ones)
# Wrapper to ensure that all inputs to the function impl have the same size (foils numpy's broadcasting)
def assert_same_shapes(impl):
def ret(x, *rest):
shape = x.shape
for other in rest:
if other.shape != shape:
raise TypeError("The dimensions of the inputs do not match.")
return impl(x, *rest)
return ret
# Wrapper to ensure that the last input to impl is a scalar
def tensor_scalar_op(impl):
def ret(x, a):
if a.shape:
raise TypeError("The second argument to %s must be a scalar." % impl)
return impl(x, a)
return ret
## Addition ## ## Addition ##
class proto_add(omega_op): class proto_add_elemwise(omega_op):
broadcast_op = True
def grad(x, y, gz): def grad(x, y, gz):
return gz return gz
class add(proto_add): class add_elemwise(proto_add_elemwise):
impl = numpy.ndarray.__add__ impl = assert_same_shapes(numpy.ndarray.__add__)
class iadd_elemwise(proto_add_elemwise, inplace):
impl = assert_same_shapes(numpy.ndarray.__iadd__)
class iadd(proto_add, inplace):
impl = numpy.ndarray.__iadd__
class add_scalar(omega_op): class proto_add_scalar(omega_op):
impl = numpy.ndarray.__add__ def grad(x, a, gz):
return gz, sum(gz)
class add_scalar(proto_add_scalar):
impl = tensor_scalar_op(numpy.ndarray.__add__)
class iadd_scalar(omega_op): class iadd_scalar(proto_add_scalar, inplace):
impl = numpy.ndarray.__iadd__ impl = tensor_scalar_op(numpy.ndarray.__iadd__)
class proto_twice(omega_op): class proto_twice(omega_op):
def grad(x, gz): def grad(x, gz):
return scal(gz, 2.0) return scale(gz, 2.0)
class twice(proto_twice): class twice(proto_twice):
def impl(x): def impl(x):
...@@ -251,39 +268,56 @@ class itwice(proto_twice, inplace): ...@@ -251,39 +268,56 @@ class itwice(proto_twice, inplace):
## Subtraction ## ## Subtraction ##
class proto_sub(omega_op): class proto_sub_elemwise(omega_op):
def grad(x, y, gz): def grad(x, y, gz):
return gz, -gz return gz, -gz
class sub(proto_sub): class sub_elemwise(proto_sub_elemwise):
impl = numpy.ndarray.__sub__ impl = assert_same_shapes(numpy.ndarray.__sub__)
class isub_elemwise(proto_sub_elemwise, inplace):
impl = assert_same_shapes(numpy.ndarray.__isub__)
class isub(proto_sub, inplace): def sub_scalar_r(x, a):
impl = numpy.ndarray.__isub__ return add_scalar(x, -a)
class sub_scalar(omega_op): def sub_scalar_l(x, a):
impl = numpy.ndarray.__sub__ return add_scalar(-x, a)
class isub_scalar(omega_op, inplace): def isub_scalar_r(x, a):
impl = numpy.ndarray.__isub__ return iadd_scalar(x, -a)
def isub_scalar_l(x, a):
return iadd_scalar(-x, a)
## Element-wise multiplication ## ## Element-wise multiplication ##
class proto_mul(omega_op): class proto_mul_elemwise(omega_op):
def grad(x, y, gz): def grad(x, y, gz):
return mul(y, gz), mul(x, gz) return mul(y, gz), mul(x, gz)
class mul(proto_mul): class mul_elemwise(proto_mul_elemwise):
impl = numpy.ndarray.__mul__ impl = assert_same_shapes(numpy.ndarray.__mul__)
class imul_elemwise(proto_mul_elemwise, inplace):
impl = assert_same_shapes(numpy.ndarray.__imul__)
class proto_scale(omega_op):
def grad(x, a, gz):
return scale(a, gz), sum(mul_elemwise(x, gz))
class scale(proto_scale):
impl = tensor_scalar_op(numpy.ndarray.__mul__)
class imul(proto_mul, inplace): class iscale(proto_scale, inplace):
impl = numpy.ndarray.__imul__ impl = tensor_scalar_op(numpy.ndarray.__imul__)
class proto_sqr(omega_op): class proto_sqr(omega_op):
def grad(x, gz): def grad(x, gz):
return scale(mul(x, gz), 2.0) return scale(mul_elemwise(x, gz), 2.0)
class sqr(proto_sqr): class sqr(proto_sqr):
impl = lambda x: numpy.multiply(x, x) impl = lambda x: numpy.multiply(x, x)
...@@ -311,47 +345,57 @@ class exp(omega_op): ...@@ -311,47 +345,57 @@ class exp(omega_op):
## Element-wise division ## ## Element-wise division ##
class proto_div(omega_op): class proto_div_elemwise(omega_op):
def grad(x, y, gz): def grad(x, y, gz):
return div(gz, y), -div(mul(x, gz), sqr(y)) return div(gz, y), -div(mul(x, gz), sqr(y))
class div(proto_div): class div_elemwise(proto_div_elemwise):
impl = numpy.ndarray.__div__ impl = assert_same_shapes(numpy.ndarray.__div__)
class idiv(proto_div, inplace): class idiv_elemwise(proto_div_elemwise, inplace):
impl = numpy.ndarray.__idiv__ impl = assert_same_shapes(numpy.ndarray.__idiv__)
class inv_scale(omega_op):
impl = numpy.ndarray.__div__
class iinv_scale(omega_op, inplace): def div_scalar_r(x, a):
impl = numpy.ndarray.__idiv__ return scale(x, inv_elemwise(a))
def div_scalar_l(x, a):
return scale(inv_elemwise(x), a)
## Scaling ## def idiv_scalar_r(x, a):
return iscale(x, inv_elemwise(a))
class proto_scale(omega_op): def idiv_scalar_l(x, a):
def grad(x, a, gz): return iscale(inv_elemwise(x), a)
return scale(a, gz), sum(mul(x, gz))
class scale(omega_op):
impl = numpy.ndarray.__mul__
class iscale(omega_op, inplace):
impl = numpy.ndarray.__imul__ ## Scaling ##
class proto_neg(omega_op): class proto_neg(omega_op):
def grad(x, gz): def grad(x, gz):
return -gz return -gz
class neg(omega_op): class neg(proto_neg):
impl = numpy.ndarray.__neg__ impl = numpy.ndarray.__neg__
class ineg(omega_op, inplace): class ineg(proto_neg, inplace):
impl = lambda x: x.__imul__(-1) impl = lambda x: x.__imul__(-1)
class proto_inv_elemwise(omega_op):
def grad(x, gz):
raise NotImplemented
class inv_elemwise(omega_op):
impl = lambda x: 1 / x
class iinv_elemwise(omega_op, inplace):
def impl(x):
x[:] = 1 / x
## Dot product ## ## Dot product ##
class dot(omega_op): class dot(omega_op):
...@@ -427,3 +471,21 @@ class sum(omega_op): ...@@ -427,3 +471,21 @@ class sum(omega_op):
add = scalar_switch(add_elemwise, add_scalar, add_scalar)
iadd = scalar_switch(iadd_elemwise, iadd_scalar, iadd_scalar)
sub = scalar_switch(sub_elemwise, sub_scalar_r, sub_scalar_l)
isub = scalar_switch(isub_elemwise, isub_scalar_r, isub_scalar_l)
mul = scalar_switch(mul_elemwise, scale, scale)
imul = scalar_switch(imul_elemwise, iscale, iscale)
div = scalar_switch(div_elemwise, div_scalar_r, div_scalar_l)
idiv = scalar_switch(idiv_elemwise, idiv_scalar_r, idiv_scalar_l)
# pow = scalar_switch(pow_elemwise, pow_scalar_r, pow_scalar_l)
# ipow = scalar_switch(ipow_elemwise, ipow_scalar_r, ipow_scalar_l)
...@@ -36,25 +36,25 @@ def export_opts(opts): ...@@ -36,25 +36,25 @@ def export_opts(opts):
# List of optimizations to perform. They are listed in the order they are applied. # List of optimizations to perform. They are listed in the order they are applied.
opts = [ opts = [
['double_transpose_eliminator', pattern_opt((transpose, (transpose, 'x')), # ['double_transpose_eliminator', pattern_opt((transpose, (transpose, 'x')),
'x')], # 'x')],
['addxx_to_twice', pattern_opt((add, 'x', 'x'), # ['addxx_to_twice', pattern_opt((add_elemwise, 'x', 'x'),
(twice, 'x'))], # (twice, 'x'))],
['twice_to_itwice', op_sub(twice, itwice)], # ['twice_to_itwice', op_sub(twice, itwice)],
['mulxx_to_sqr', pattern_opt((mul, 'x', 'x'), # ['mulxx_to_sqr', pattern_opt((mul_elemwise, 'x', 'x'),
(sqr, 'x'))], # (sqr, 'x'))],
['sqr_to_isqr', op_sub(sqr, isqr)], # ['sqr_to_isqr', op_sub(sqr, isqr)],
['add_to_iadd', op_sub(add, iadd)], # ['add_to_iadd', op_sub(add_elemwise, iadd_elemwise)],
['add_to_iadd_reverse', pattern_opt((add, 'x', 'y'), # ['add_to_iadd_reverse', pattern_opt((add_elemwise, 'x', 'y'),
(iadd, 'y', 'x'))], # (iadd_elemwise, 'y', 'x'))],
['remove_copies', gof.OpRemover(array_copy)], # ['remove_copies', gof.OpRemover(array_copy)],
[None, gof.DummyRemover] # has to be at the end [None, gof.DummyRemover] # has to be at the end
......
...@@ -131,62 +131,69 @@ import grad ...@@ -131,62 +131,69 @@ import grad
############################ ############################
# def dataset_1hot(x, targ, n): def dataset_1hot(x, targ, n):
# """Return an looping iterator over 1-hot vectors """Return an looping iterator over 1-hot vectors
This function is a generator for the integers range(n) that works by
side-effect on the numpy ndarray mat.
On each iteration, mat is set (in-place) to the next element of an infinite
sequence of 1-hot vectors.
"""
assert targ.size == 1
# This function is a generator for the integers range(n) that works by for i in xrange(n):
# side-effect on the numpy ndarray mat. idx = i % x.shape[1]
# On each iteration, mat is set (in-place) to the next element of an infinite x[:] = 0
# sequence of 1-hot vectors. x[0,idx] = 1
targ[0] = idx
yield i
# """
# assert targ.size == 1
# for i in xrange(n): class sigmoid(core.omega_op):
# idx = i % x.shape[1] def impl(x):
# x[:] = 0 return 1.0 / (1.0 + numpy.exp(-x))
# x[0,idx] = 1 def grad(x, gz):
# targ[0] = idx return gz * sigmoid(x) * (1 - sigmoid(x))
# yield i
numpy.random.seed(1)
# class sigmoid(core.omega_op): x = core.zeros((1, 10))
# def impl(x): w = core.input(numpy.random.rand(10, 15))
# return 1.0 / (1.0 + numpy.exp(-x))
# def grad(x, gz):
# return gz * sigmoid(x) * (1 - sigmoid(x))
# x = numpy.zeros((1, 10))
# w = numpy.random.rand(10, 15)
# x = core.zeros((1, 10)) #print x.data, w.data
# w = core.input(numpy.random.rand(10, 15))
# #print x.data, w.data # import inspect
# def autoassociator(w, x): # def omega_compile(f):
# forward = sigmoid(core.dot(sigmoid(core.dot(x, w)), w.T)) # args, varargs, kwargs, defaults = inspect.getargspec(f)
# rec_error = core.sum(core.sqr(x - forward)) # assert not varargs
# w -= 0.1 * grad.grad(rec_error, w) # assert not kwargs
# return w, rec_error # def ret(*args):
# outputs = core.build(f, *args)
# return compile.prog(args, outputs)
# return ret
# w2, rec_error = core.build(autoassociator, w, x) # @omega_compile
# f = compile.to_func([w, x], [w2, rec_error])
# for i in dataset_1hot(x.data, numpy.ndarray((1, )), 10000): def autoassociator(w, x):
# w2, rec_error = f(w.data, x.data) forward = sigmoid(core.dot(sigmoid(core.dot(x, w)), w.T))
# if not(i % 1000): rec_error = core.sum(core.sqr(x - forward))
# print rec_error w -= 0.1 * grad.grad(rec_error, w)
return w, rec_error
# print "done!" w2, rec_error = core.build(autoassociator, w, x)
# print w.data f = compile.to_func([w, x], [w2, rec_error])
#f = compile.single(w2, rec_error)
for i in dataset_1hot(x.data, numpy.ndarray((1, )), 10000):
w2, rec_error = f(w.data, x.data)
if not(i % 1000):
print rec_error
print "done!"
# # 1 = mul(mul(neg(scal(mul(sub(0.736213102665, sigmoid(*3)), 1.0), 2.0)), sigmoid(*3)), sub(1, sigmoid(*3))) print w.data
# # 2 = transpose(0.11474051836)
# # 3 = dot(*2, *5)
# # 4 = dot(0.11474051836, 0.736213102665)
# # 5 = sigmoid(*4)
# # add(transpose(dot(*1, transpose(*5))), dot(mul(mul(dot(transpose(*2), *1), sigmoid(*4)), sub(1, sigmoid(*4))), transpose(0.736213102665)))
...@@ -209,8 +216,31 @@ import grad ...@@ -209,8 +216,31 @@ import grad
############################ ############################
print core.ones((2, 2)) + 1 # print core.ones((2, 2)) + 1
# print numpy.ones((2, 2)) ** numpy.ones((2, 2))
############################
x = core.ones((2, 2))
y = core.zeros((1, 1))
#print "?", gof.graph.ops([], [x + y])
print x
x + x
print "1", gof.eval_env#.ops()
y + y
print "2", gof.eval_env#.ops()
x + x
print "3", gof.eval_env#.ops()
print numpy.ones((2, 2)) ** numpy.ones((2, 2)) x += (x + x)
print x
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论