cthunks work!!

上级 71ada6d8
import time import time
import gof import gof
import cutils
import core import core
import opt import opt
from copy import copy from copy import copy
def experimental_linker(env, target = None): def experimental_linker(env, target = None):
def fetch(op): def fetch(op):
try: try:
thunk = op.c_thunk() thunk = op.c_thunk_creator()
print "yea %s" % op # print "yea %s" % op
return lambda: cutils.run_cthunk(thunk) return lambda: cutils.run_cthunk(thunk())
except NotImplementedError: except NotImplementedError:
print "nope %s" % op # print "nope %s" % op
return op._perform return op._perform
order = env.toposort() order = env.toposort()
for op in order:
op.refresh()
# for op in order:
# print op
# print 'ispecs: ', [input.spec for input in op.inputs]
# print 'ospecs: ', [output.spec for output in op.outputs]
thunks = [fetch(op) for op in order] thunks = [fetch(op) for op in order]
def ret(): def ret():
for thunk in thunks: for thunk, op in zip(thunks, order):
# print op
# print 'in: ', [id(input.data) for input in op.inputs]
# print 'out:', [id(output.data) for output in op.outputs]
thunk() thunk()
# for thunk in thunks:
# thunk()
if not target: if not target:
return ret return ret
else: else:
...@@ -102,7 +111,7 @@ class prog(gof.Prog): ...@@ -102,7 +111,7 @@ class prog(gof.Prog):
TODO: think about whether orphan computation should be in this function, TODO: think about whether orphan computation should be in this function,
or in self.__call__() or in self.__call__()
""" """
# linker = experimental_linker linker = experimental_linker
new_outputs = gof.mark_outputs_as_destroyed(outputs) new_outputs = gof.mark_outputs_as_destroyed(outputs)
gof.Prog.__init__(self, gof.Prog.__init__(self,
inputs, inputs,
......
...@@ -101,6 +101,7 @@ def cgetspecs(names, vals, converters): ...@@ -101,6 +101,7 @@ def cgetspecs(names, vals, converters):
return d, specs return d, specs
def cgen(name, behavior, names, vals, converters = None): def cgen(name, behavior, names, vals, converters = None):
if not converters: if not converters:
converters = type_spec.default converters = type_spec.default
for converter in converters: for converter in converters:
...@@ -158,15 +159,7 @@ def cgen(name, behavior, names, vals, converters = None): ...@@ -158,15 +159,7 @@ def cgen(name, behavior, names, vals, converters = None):
code += "__STRUCT_P->init();\n" code += "__STRUCT_P->init();\n"
code += "return_val = PyCObject_FromVoidPtrAndDesc((void*)(&%(struct_name)s_executor), __STRUCT_P, %(struct_name)s_destructor);\n" % template code += "return_val = PyCObject_FromVoidPtrAndDesc((void*)(&%(struct_name)s_executor), __STRUCT_P, %(struct_name)s_destructor);\n" % template
return d, code, struct + static, converters return d, names, code, struct + static, converters
def make_static(cls, fname):
f = getattr(cls, fname)
if hasattr(f, 'im_func'):
f = f.im_func
setattr(cls, fname, staticmethod(f))
class omega_op(gof.PythonOp): class omega_op(gof.PythonOp):
...@@ -175,8 +168,8 @@ class omega_op(gof.PythonOp): ...@@ -175,8 +168,8 @@ class omega_op(gof.PythonOp):
@staticmethod @staticmethod
def __clsinit__(cls, name, bases, dct): def __clsinit__(cls, name, bases, dct):
for fname in ['grad', 'c_impl', 'alloc']: for fname in ['grad', 'c_impl']:
make_static(cls, fname) gof.make_static(cls, fname)
# make impl a static method # make impl a static method
gof.PythonOp.__clsinit__(cls, name, bases, dct) gof.PythonOp.__clsinit__(cls, name, bases, dct)
...@@ -202,11 +195,11 @@ class omega_op(gof.PythonOp): ...@@ -202,11 +195,11 @@ class omega_op(gof.PythonOp):
(inames, onames), behavior = self._c_impl() (inames, onames), behavior = self._c_impl()
return cgen(self.__class__.__name__, behavior, inames + onames, self.inputs + self.outputs, converters) return cgen(self.__class__.__name__, behavior, inames + onames, self.inputs + self.outputs, converters)
def _alloc(self): # def _alloc(self):
self.alloc(self.inputs, self.outputs) # self.alloc(self.inputs, self.outputs)
def alloc(inputs, outputs): # def alloc(inputs, outputs):
raise NotImplementedError() # raise NotImplementedError()
def _c_impl(self): def _c_impl(self):
(inames, onames), _1, _2, _3 = inspect.getargspec(self.c_impl) (inames, onames), _1, _2, _3 = inspect.getargspec(self.c_impl)
...@@ -215,11 +208,31 @@ class omega_op(gof.PythonOp): ...@@ -215,11 +208,31 @@ class omega_op(gof.PythonOp):
def c_impl(inputs, outputs): def c_impl(inputs, outputs):
raise NotImplementedError() raise NotImplementedError()
def c_thunk_creator(self):
self.refresh()
d, names, code, struct, converters = self.c_code()
cthunk = object()
module_name = md5.md5(code).hexdigest()
mod = weave.ext_tools.ext_module(module_name)
instantiate = weave.ext_tools.ext_function('instantiate',
code,
names,
local_dict = d, global_dict = {}, type_converters = converters)
instantiate.customize.add_support_code(struct)
mod.add_function(instantiate)
mod.compile(location = 'compiled')
module = __import__("compiled.%s" % module_name, fromlist = [module_name])
def creator():
return module.instantiate(*[x.data for x in self.inputs + self.outputs])
# def creator():
# return weave.inline(code, d.keys(), local_dict = d, global_dict = {}, support_code = struct, type_converters = converters)
return creator
def c_thunk(self): def c_thunk(self):
self._alloc() return self.c_thunk_creator()
d, code, struct, converters = self.c_code()
thunk = weave.inline(code, d.keys(), local_dict = d, global_dict = {}, support_code = struct, type_converters = converters)
return thunk
def c_perform(self): def c_perform(self):
thunk = self.c_thunk() thunk = self.c_thunk()
...@@ -341,62 +354,120 @@ class elemwise(omega_op): ...@@ -341,62 +354,120 @@ class elemwise(omega_op):
@staticmethod @staticmethod
def __clsinit__(cls, name, bases, dct): def __clsinit__(cls, name, bases, dct):
for fname in ['c_init', 'c_foreach', 'c_finalize']: for fname in ['c_init', 'c_foreach', 'c_finalize']:
make_static(cls, fname) gof.make_static(cls, fname)
# make impl, grad, etc. static methods # make impl, grad, etc. static methods
omega_op.__clsinit__(cls, name, bases, dct) omega_op.__clsinit__(cls, name, bases, dct)
def _alloc(self): def _specs(self):
if isinstance(self, inplace):
dmap = self.destroy_map()
else:
dmap = {}
try: try:
return self.alloc(self.inputs, self.outputs) return self.specs(*[input.spec for input in self.inputs])
except NotImplementedError: except NotImplementedError:
(inames, onames), _1, _2, _3 = inspect.getargspec(self.c_foreach) (inames, onames), code = self._c_foreach()
for oname in onames: for oname in onames:
if oname.startswith("_"): if oname.startswith("_"):
raise Exception("cannot infer an allocation policy automatically for variable " \ raise Exception("cannot infer a specification automatically for variable " \
"%s because it is not part of the elementwise loop - "\ "%s because it is not part of the elementwise loop - "\
"please override the alloc method" % oname[1:]) "please override the specs method" % oname[1:])
shape, dtype = None, None shape, dtype = None, None
for iname, input in zip(inames, self.inputs): for iname, input in zip(inames, self.inputs):
if not iname.startswith("_"): if not iname.startswith("_"):
shape = input.data if input.spec:
shape = input.spec[2]
if shape is None: if shape is None:
raise Exception("cannot infer an allocation policy automatically for output variables " \ raise Exception("cannot infer a specification automatically for output variables " \
"because there is no input variable in the loop from which to get the shape") "because there is no input variable in the loop from which to get the shape, "\
"or their shape is unknown")
dtype = upcast(*[input.data.dtype try:
dtype = upcast(*[input.spec[1]
for iname, input in zip(inames, self.inputs) for iname, input in zip(inames, self.inputs)
if isinstance(input.data, numpy.ndarray)]) if isinstance(input, NumpyR)])
except IndexError:
raise Exception("not all numpy inputs are specified")
if isinstance(self, inplace):
dmap = self.destroy_map()
else:
dmap = {}
res = []
for output in self.outputs: for output in self.outputs:
inplace_inputs = dmap.get(output, []) inplace_inputs = dmap.get(output, [])
if inplace_inputs: if inplace_inputs:
assert len(inplace_inputs) == 1 assert len(inplace_inputs) == 1
output.data = inplace_inputs[0].data res.append(inplace_inputs[0].spec)
else:
res.append((numpy.ndarray, dtype, shape))
if self.nout == 1:
return res[0]
else:
return res
def alloc(self, except_list = []):
if isinstance(self, inplace):
dmap = self.destroy_map()
else: else:
output.data = numpy.ndarray(shape, dtype) dmap = {}
gof.PythonOp.alloc(self, except_list = except_list + dmap.keys())
for output, (input, ) in dmap.items():
if output not in except_list:
output.set_value(input.data)
# def _alloc(self):
# if isinstance(self, inplace):
# dmap = self.destroy_map()
# else:
# dmap = {}
# try:
# return self.alloc(self.inputs, self.outputs)
# except NotImplementedError:
# (inames, onames), _1, _2, _3 = inspect.getargspec(self.c_foreach)
# for oname in onames:
# if oname.startswith("_"):
# raise Exception("cannot infer an allocation policy automatically for variable " \
# "%s because it is not part of the elementwise loop - "\
# "please override the alloc method" % oname[1:])
# shape, dtype = None, None
# for iname, input in zip(inames, self.inputs):
# if not iname.startswith("_"):
# shape = input.data
# if shape is None:
# raise Exception("cannot infer an allocation policy automatically for output variables " \
# "because there is no input variable in the loop from which to get the shape")
# dtype = upcast(*[input.data.dtype
# for iname, input in zip(inames, self.inputs)
# if isinstance(input.data, numpy.ndarray)])
# for output in self.outputs:
# inplace_inputs = dmap.get(output, [])
# if inplace_inputs:
# assert len(inplace_inputs) == 1
# output.data = inplace_inputs[0].data
# else:
# output.data = numpy.ndarray(shape, dtype)
def _c_init(self): def _c_init(self):
(inames, onames), _1, _2, _3 = inspect.getargspec(self.c_init) (inames, onames), _1, _2, _3 = inspect.getargspec(self.c_init)
return (inames, onames), self.c_init(self.inputs, self.outputs) return [list(inames), list(onames)], self.c_init(self.inputs, self.outputs)
def c_init(inputs, outputs): def c_init(inputs, outputs):
return "" return ""
def _c_foreach(self): def _c_foreach(self):
(inames, onames), _1, _2, _3 = inspect.getargspec(self.c_foreach) (inames, onames), _1, _2, _3 = inspect.getargspec(self.c_foreach)
return (inames, onames), self.c_foreach(self.inputs, self.outputs) return [list(inames), list(onames)], self.c_foreach(self.inputs, self.outputs)
def c_foreach(inputs, outputs): def c_foreach(inputs, outputs):
return "" return ""
def _c_finalize(self): def _c_finalize(self):
(inames, onames), _1, _2, _3 = inspect.getargspec(self.c_finalize) (inames, onames), _1, _2, _3 = inspect.getargspec(self.c_finalize)
return (inames, onames), self.c_finalize(self.inputs, self.outputs) return [list(inames), list(onames)], self.c_finalize(self.inputs, self.outputs)
def c_finalize(inputs, outputs): def c_finalize(inputs, outputs):
return "" return ""
...@@ -514,8 +585,16 @@ class NumpyR(gof.PythonR): ...@@ -514,8 +585,16 @@ class NumpyR(gof.PythonR):
self.set_value(value.data) self.set_value(value.data)
else: else:
self.data = numpy.array(value) self.data = numpy.array(value)
self.refresh()
self.up_to_date = True self.up_to_date = True
def refresh(self):
if self.data is not UNCOMPUTED:
self.spec = (numpy.ndarray, self.data.dtype, self.data.shape)
def alloc(self):
self.data = numpy.ndarray(self.spec[2], self.spec[1])
def __add__(self, y): return add(self, y) def __add__(self, y): return add(self, y)
def __radd__(self, x): return add(x, self) def __radd__(self, x): return add(x, self)
def __iadd__(self, y): return iadd(self, y) def __iadd__(self, y): return iadd(self, y)
...@@ -576,32 +655,12 @@ def tensor_scalar_impl(impl): ...@@ -576,32 +655,12 @@ def tensor_scalar_impl(impl):
return impl(x, a) return impl(x, a)
return ret return ret
# @omega_op
# def add((x, y), (z, )):
# def grad(gz):
# return gz
# def alloc():
# return numpy.ndarray(x.shape, dtype = x.dtype)
# c_impl = """
# for (int i = 0; i < z.ncols; i++) {
# for (int j = 0; j < z.nrows; j++) {
# z(i, j) = x(i, j) + y(i, j);
# }
# }
# """
class tensor_scalar_op(elemwise): class tensor_scalar_op(elemwise):
def c_init((x, _a), (z, )): def c_init((x, _a), (z, )):
return "_a_dtype a = _a[0];" return "_a_dtype a = _a[0];"
def _c_foreach(self): def _c_foreach(self):
return (('x', '_a'), ('z', )), "z = %s;" % self.c_operation return [['x', '_a'], ['z', ]], "z = %s;" % self.c_expr
## Addition ## ## Addition ##
...@@ -614,18 +673,7 @@ class add_elemwise(elemwise): ...@@ -614,18 +673,7 @@ class add_elemwise(elemwise):
return "z = x + y;" return "z = x + y;"
iadd_elemwise = add_elemwise.inplace_version() iadd_elemwise = add_elemwise.inplace_version()
#iadd_elemwise.impl = assert_same_shapes(numpy.ndarray.__iadd__) iadd_elemwise.set_impl(assert_same_shapes(numpy.ndarray.__iadd__))
# class proto_add_elemwise(omega_op):
# def grad(x, y, gz):
# return gz
# class add_elemwise(proto_add_elemwise):
# impl = assert_same_shapes(numpy.ndarray.__add__)
# class iadd_elemwise(proto_add_elemwise, inplace):
# impl = assert_same_shapes(numpy.ndarray.__iadd__)
class add_scalar(tensor_scalar_op): class add_scalar(tensor_scalar_op):
...@@ -635,18 +683,7 @@ class add_scalar(tensor_scalar_op): ...@@ -635,18 +683,7 @@ class add_scalar(tensor_scalar_op):
c_expr = "x + a" c_expr = "x + a"
iadd_scalar = add_scalar.inplace_version() iadd_scalar = add_scalar.inplace_version()
#iadd_scalar.impl = tensor_scalar_impl(numpy.ndarray.__iadd__) iadd_scalar.set_impl(tensor_scalar_impl(numpy.ndarray.__iadd__))
# class proto_add_scalar(omega_op):
# def grad(x, a, gz):
# return gz, sum(gz)
# class add_scalar(proto_add_scalar):
# impl = tensor_scalar_impl(numpy.ndarray.__add__)
# class iadd_scalar(proto_add_scalar, inplace):
# impl = tensor_scalar_impl(numpy.ndarray.__iadd__)
class twice(elemwise): class twice(elemwise):
def grad(x, gz): def grad(x, gz):
...@@ -659,20 +696,6 @@ class twice(elemwise): ...@@ -659,20 +696,6 @@ class twice(elemwise):
itwice = twice.inplace_version() itwice = twice.inplace_version()
# class proto_twice(omega_op):
# def grad(x, gz):
# return scale(gz, 2.0)
# class twice(proto_twice):
# def impl(x):
# return x + x
# class itwice(proto_twice, inplace):
# def impl(x):
# x += x
# return x
## Subtraction ## ## Subtraction ##
class sub_elemwise(elemwise): class sub_elemwise(elemwise):
...@@ -683,18 +706,7 @@ class sub_elemwise(elemwise): ...@@ -683,18 +706,7 @@ class sub_elemwise(elemwise):
return "z = x - y;" return "z = x - y;"
isub_elemwise = sub_elemwise.inplace_version() isub_elemwise = sub_elemwise.inplace_version()
#isub_elemwise.impl = assert_same_shapes(numpy.ndarray.__isub__) isub_elemwise.set_impl(assert_same_shapes(numpy.ndarray.__isub__))
# class proto_sub_elemwise(omega_op):
# def grad(x, y, gz):
# return gz, -gz
# class sub_elemwise(proto_sub_elemwise):
# impl = assert_same_shapes(numpy.ndarray.__sub__)
# class isub_elemwise(proto_sub_elemwise, inplace):
# impl = assert_same_shapes(numpy.ndarray.__isub__)
def sub_scalar_r(x, a): def sub_scalar_r(x, a):
return add_scalar(x, -a) return add_scalar(x, -a)
...@@ -716,18 +728,7 @@ class mul_elemwise(elemwise): ...@@ -716,18 +728,7 @@ class mul_elemwise(elemwise):
return "z = x * y;" return "z = x * y;"
imul_elemwise = mul_elemwise.inplace_version() imul_elemwise = mul_elemwise.inplace_version()
#imul_elemwise.impl = assert_same_shapes(numpy.ndarray.__imul__) imul_elemwise.set_impl(assert_same_shapes(numpy.ndarray.__imul__))
# class proto_mul_elemwise(omega_op):
# def grad(x, y, gz):
# return mul(y, gz), mul(x, gz)
# class mul_elemwise(proto_mul_elemwise):
# impl = assert_same_shapes(numpy.ndarray.__mul__)
# class imul_elemwise(proto_mul_elemwise, inplace):
# impl = assert_same_shapes(numpy.ndarray.__imul__)
class scale(tensor_scalar_op): class scale(tensor_scalar_op):
...@@ -737,18 +738,7 @@ class scale(tensor_scalar_op): ...@@ -737,18 +738,7 @@ class scale(tensor_scalar_op):
c_expr = "x * a" c_expr = "x * a"
iscale = scale.inplace_version() iscale = scale.inplace_version()
#iscale.impl = tensor_scalar_impl(numpy.ndarray.__imul__) iscale.set_impl(tensor_scalar_impl(numpy.ndarray.__imul__))
# class proto_scale(omega_op):
# def grad(x, a, gz):
# return scale(a, gz), sum(mul_elemwise(x, gz))
# class scale(proto_scale):
# impl = tensor_scalar_impl(numpy.ndarray.__mul__)
# class iscale(proto_scale, inplace):
# impl = tensor_scalar_impl(numpy.ndarray.__imul__)
class sqr(elemwise): class sqr(elemwise):
...@@ -757,21 +747,11 @@ class sqr(elemwise): ...@@ -757,21 +747,11 @@ class sqr(elemwise):
def grad(x, gz): def grad(x, gz):
return scale(mul_elemwise(x, gz), 2.0) return scale(mul_elemwise(x, gz), 2.0)
def c_foreach((x, ), (z, )): def c_foreach((x, ), (z, )):
"z = x * x;" return "z = x * x;"
isqr = sqr.inplace_version() isqr = sqr.inplace_version()
isqr.impl = lambda x: x.__imul__(x) isqr.set_impl(lambda x: x.__imul__(x))
# class proto_sqr(omega_op):
# def grad(x, gz):
# return scale(mul_elemwise(x, gz), 2.0)
# class sqr(proto_sqr):
# impl = lambda x: numpy.multiply(x, x)
# class isqr(proto_sqr, inplace):
# impl = lambda x: x.__imul__(x)
class sqrt(elemwise): class sqrt(elemwise):
...@@ -782,19 +762,9 @@ class sqrt(elemwise): ...@@ -782,19 +762,9 @@ class sqrt(elemwise):
"z = pow(x, 0.5);" "z = pow(x, 0.5);"
isqrt = sqrt.inplace_version() isqrt = sqrt.inplace_version()
isqrt.impl = lambda x: x.__ipow__(0.5) isqrt.set_impl(lambda x: x.__ipow__(0.5))
# class proto_sqrt(omega_op):
# def grad(x, gz):
# return scale(div(gz, sqrt(x)), 0.5)
# class sqrt(proto_sqrt):
# impl = numpy.sqrt
# class isqrt(proto_sqrt, inplace):
# impl = lambda x: x.__ipow__(0.5)
## Exponentiation ## ## Exponentiation ##
...@@ -803,9 +773,6 @@ class exp(elemwise): ...@@ -803,9 +773,6 @@ class exp(elemwise):
def c_foreach((x, ), (z, )): def c_foreach((x, ), (z, )):
return "z = exp(x);" return "z = exp(x);"
# class exp(omega_op):
# impl = numpy.exp
## Element-wise division ## ## Element-wise division ##
...@@ -817,18 +784,7 @@ class div_elemwise(elemwise): ...@@ -817,18 +784,7 @@ class div_elemwise(elemwise):
return "z = x / y;" return "z = x / y;"
idiv_elemwise = div_elemwise.inplace_version() idiv_elemwise = div_elemwise.inplace_version()
#idiv_elemwise.impl = assert_same_shapes(numpy.ndarray.__idiv__) idiv_elemwise.set_impl(assert_same_shapes(numpy.ndarray.__idiv__))
# class proto_div_elemwise(omega_op):
# def grad(x, y, gz):
# return div(gz, y), -div(mul(x, gz), sqr(y))
# class div_elemwise(proto_div_elemwise):
# impl = assert_same_shapes(numpy.ndarray.__div__)
# class idiv_elemwise(proto_div_elemwise, inplace):
# impl = assert_same_shapes(numpy.ndarray.__idiv__)
def div_scalar_r(x, a): def div_scalar_r(x, a):
return scale(x, inv_elemwise(a)) return scale(x, inv_elemwise(a))
...@@ -851,18 +807,7 @@ class neg(elemwise): ...@@ -851,18 +807,7 @@ class neg(elemwise):
return "z = -x;" return "z = -x;"
ineg = neg.inplace_version() ineg = neg.inplace_version()
ineg.impl = lambda x: x.__imul__(-1) ineg.set_impl(lambda x: x.__imul__(-1))
# class proto_neg(omega_op):
# def grad(x, gz):
# return -gz
# class neg(proto_neg):
# impl = numpy.ndarray.__neg__
# class ineg(proto_neg, inplace):
# impl = lambda x: x.__imul__(-1)
class inv_elemwise(elemwise): class inv_elemwise(elemwise):
...@@ -875,25 +820,25 @@ class inv_elemwise(elemwise): ...@@ -875,25 +820,25 @@ class inv_elemwise(elemwise):
iinv_elemwise = inv_elemwise.inplace_version() iinv_elemwise = inv_elemwise.inplace_version()
# class proto_inv_elemwise(omega_op):
# def grad(x, gz):
# raise NotImplemented
# class inv_elemwise(omega_op):
# impl = lambda x: 1 / x
# class iinv_elemwise(omega_op, inplace):
# def impl(x):
# x[:] = 1 / x
## Dot product ## ## Dot product ##
class dot(omega_op): class dot(omega_op):
impl = numpy.dot impl = numpy.dot
def grad(x, y, gz): def grad(x, y, gz):
return dot(gz, transpose(y)), dot(transpose(x), gz) return dot(gz, transpose(y)), dot(transpose(x), gz)
def specs(x, y):
# todo: handle non-matrices!
if len(x[2]) == 0:
shape = y[2]
elif len(y[2]) == 0:
shape = x[2]
elif len(x[2]) == 1:
shape = (y[2][1], )
elif len(y[2]) == 1:
shape = (x[2][0], )
else:
shape = (x[2][0], y[2][1])
return (numpy.ndarray, upcast(x[1], y[1]), shape)
## Transposition ## ## Transposition ##
...@@ -901,6 +846,9 @@ class transpose(omega_op, view): ...@@ -901,6 +846,9 @@ class transpose(omega_op, view):
impl = numpy.transpose impl = numpy.transpose
def grad(x, gz): def grad(x, gz):
return transpose_copy(gz) return transpose_copy(gz)
def specs(x):
# todo: handle non-matrices!
return (numpy.ndarray, x[1], (x[2][1], x[2][0]))
def transpose_copy(x): def transpose_copy(x):
return array_copy(transpose(x)) return array_copy(transpose(x))
...@@ -908,9 +856,11 @@ def transpose_copy(x): ...@@ -908,9 +856,11 @@ def transpose_copy(x):
## Copy ## ## Copy ##
class array_copy(omega_op): class array_copy(elemwise):
impl = numpy.array impl = numpy.array
grad = lambda x, gz: gz grad = lambda x, gz: gz
def c_foreach((x, ), (z, )):
return "z = x;"
## Power ## ## Power ##
...@@ -923,18 +873,7 @@ class pow_elemwise(elemwise): ...@@ -923,18 +873,7 @@ class pow_elemwise(elemwise):
return "z = pow(x, s)" return "z = pow(x, s)"
ipow_elemwise = pow_elemwise.inplace_version() ipow_elemwise = pow_elemwise.inplace_version()
#ipow_elemwise.impl = assert_same_shapes(numpy.ndarray.__ipow__) ipow_elemwise.set_impl(assert_same_shapes(numpy.ndarray.__ipow__))
# class proto_pow(omega_op):
# def grad(x, s, gz):
# return gz * s * (pow_elemwise(x, s-1.0))
# class pow_elemwise(proto_pow):
# impl = assert_same_shapes(numpy.ndarray.__pow__)
# class ipow_elemwise(proto_pow, inplace):
# impl = assert_same_shapes(numpy.ndarray.__ipow__)
class pow_scalar_l(tensor_scalar_op): class pow_scalar_l(tensor_scalar_op):
...@@ -950,23 +889,9 @@ class pow_scalar_r(tensor_scalar_op): ...@@ -950,23 +889,9 @@ class pow_scalar_r(tensor_scalar_op):
c_expr = "pow(x, a)" c_expr = "pow(x, a)"
ipow_scalar_r = pow_scalar_r.inplace_version() ipow_scalar_r = pow_scalar_r.inplace_version()
#ipow_scalar_r.impl = tensor_scalar_impl(numpy.ndarray.__ipow__) ipow_scalar_r.set_impl(tensor_scalar_impl(numpy.ndarray.__ipow__))
# class pow_scalar_l(omega_op):
# impl = tensor_scalar_impl(numpy.ndarray.__pow__)
# def grad(x, s, gz):
# return gz * x * (pow_scalar_l(s,x-1.0))
# class pow_scalar_r(omega_op):
# impl = tensor_scalar_impl(numpy.ndarray.__pow__)
# def grad(x, s, gz):
# return gz * s * (pow_scalar_r(x,s-1.0))
# class ipow_scalar_r(omega_op, inplace):
# impl = tensor_scalar_impl(numpy.ndarray.__ipow__)
# def grad(x, s, gz):
# return gz * s * (pow_scalar_r(x,s-1.0))
## Others ## ## Others ##
...@@ -974,9 +899,11 @@ class minmax(elemwise): ...@@ -974,9 +899,11 @@ class minmax(elemwise):
nout = 2 nout = 2
def impl(x): def impl(x):
return x.min, x.max return x.min, x.max
def alloc((x, ), (_min, _max)): def specs(x):
_min.data = numpy.ndarray((), x.dtype) return [(numpy.ndarray, x[1], ())] * 2
_max.data = numpy.ndarray((), x.dtype) # def alloc((x, ), (_min, _max)):
# _min.data = numpy.ndarray((), x.dtype)
# _max.data = numpy.ndarray((), x.dtype)
def c_init((x, ), (_min, _max)): def c_init((x, ), (_min, _max)):
return """ return """
_x_dtype min = _x[0]; _x_dtype min = _x[0];
...@@ -993,11 +920,6 @@ class minmax(elemwise): ...@@ -993,11 +920,6 @@ class minmax(elemwise):
_max[0] = max; _max[0] = max;
""" """
# class minmax(omega_op):
# nout = 2
# def impl(x):
# return x.min, x.max
class fill(elemwise): class fill(elemwise):
impl = lambda model, value: (model * 0) + value impl = lambda model, value: (model * 0) + value
...@@ -1009,27 +931,20 @@ class fill(elemwise): ...@@ -1009,27 +931,20 @@ class fill(elemwise):
ifill = fill.inplace_version() ifill = fill.inplace_version()
# class fill(omega_op):
# impl = lambda model, value: (model * 0) + value
class sum(elemwise): class sum(elemwise):
impl = numpy.sum impl = numpy.sum
def grad(x, gz): def grad(x, gz):
return fill(x, gz) return fill(x, gz)
def alloc((x, ), (_sum, )): def specs(x):
_sum.data = numpy.ndarray((), dtype = x.data.dtype) return (numpy.ndarray, x[1], ())
# def alloc((x, ), (_sum, )):
# _sum.data = numpy.ndarray((), dtype = x.data.dtype)
def c_init((x, ), (_sum, )): def c_init((x, ), (_sum, )):
return "_sum[0] = 0;" return "_sum[0] = 0;"
def c_foreach((x, ), (_sum, )): def c_foreach((x, ), (_sum, )):
return "_sum[0] += x;" return "_sum[0] += x;"
# class sum(omega_op):
# impl = numpy.sum
# def grad(x, gz):
# return fill(x, gz)
## Array slicing ## ## Array slicing ##
class get_slice(omega_op, view): class get_slice(omega_op, view):
......
...@@ -20,6 +20,8 @@ except ImportError: ...@@ -20,6 +20,8 @@ except ImportError:
} }
""" """
cthunk = object() cthunk = object()
mod = weave.ext_tools.ext_module('cutils_ext') mod = weave.ext_tools.ext_module('cutils_ext')
mod.add_function(weave.ext_tools.ext_function('run_cthunk', single_runner, ['cthunk'])) mod.add_function(weave.ext_tools.ext_function('run_cthunk', single_runner, ['cthunk']))
......
...@@ -22,6 +22,7 @@ __all__ = ['UNCOMPUTED', ...@@ -22,6 +22,7 @@ __all__ = ['UNCOMPUTED',
'PythonOp', 'PythonOp',
'PythonOpt', 'PythonOpt',
'COp', 'COp',
'make_static',
'DualImplOp'] 'DualImplOp']
...@@ -29,6 +30,13 @@ UNCOMPUTED = Keyword("UNCOMPUTED", False) ...@@ -29,6 +30,13 @@ UNCOMPUTED = Keyword("UNCOMPUTED", False)
UNDEFINED = Keyword("UNDEFINED", False) UNDEFINED = Keyword("UNDEFINED", False)
def make_static(cls, fname):
f = getattr(cls, fname)
if hasattr(f, 'im_func'):
f = f.im_func
setattr(cls, fname, staticmethod(f))
class ForbidConstantOverwrite(features.Listener, features.Constraint): class ForbidConstantOverwrite(features.Listener, features.Constraint):
def __init__(self, env): def __init__(self, env):
...@@ -75,13 +83,14 @@ class ForbidConstantOverwrite(features.Listener, features.Constraint): ...@@ -75,13 +83,14 @@ class ForbidConstantOverwrite(features.Listener, features.Constraint):
class PythonR(Result): class PythonR(Result):
__slots__ = ['data', 'constant', 'up_to_date'] __slots__ = ['data', 'spec', 'constant', 'up_to_date']
def __init__(self, x = None, constant = False): def __init__(self, x = None, constant = False):
self.constant = False self.constant = False
self.set_value(x) self.set_value(x)
self.constant = constant self.constant = constant
self.up_to_date = True self.up_to_date = True
self.spec = None
def set_value(self, value): def set_value(self, value):
if self.constant: if self.constant:
...@@ -93,6 +102,7 @@ class PythonR(Result): ...@@ -93,6 +102,7 @@ class PythonR(Result):
else: else:
self.data = value self.data = value
self.up_to_date = True self.up_to_date = True
self.refresh()
def __str__(self): def __str__(self):
return str(self.data) return str(self.data)
...@@ -100,6 +110,12 @@ class PythonR(Result): ...@@ -100,6 +110,12 @@ class PythonR(Result):
def __repr__(self): def __repr__(self):
return repr(self.data) return repr(self.data)
def refresh(self):
self.spec = id(self.data)
def alloc(self):
raise TypeError("Cannot allocate following this specification.")
def perform(self): def perform(self):
if self.owner: if self.owner:
self.owner.perform() self.owner.perform()
...@@ -112,22 +128,19 @@ class PythonR(Result): ...@@ -112,22 +128,19 @@ class PythonR(Result):
class PythonOp(Op): class PythonOp(Op):
__metaclass__ = ClsInit __metaclass__ = ClsInit
__mode__ = ['build_eval']
nout = 1 nout = 1
@staticmethod @staticmethod
def __clsinit__(cls, name, bases, dct): def __clsinit__(cls, name, bases, dct):
# make impl a static method # make impl a static method
impl = cls.impl cls.set_impl(cls.impl)
if hasattr(cls.impl, 'im_func'): make_static(cls, 'specs')
impl = impl.im_func
cls.impl = staticmethod(impl)
def __new__(cls, *inputs, **kwargs): def __new__(cls, *inputs, **kwargs):
op = Op.__new__(cls) op = Op.__new__(cls)
op.__init__(*inputs) op.__init__(*inputs)
mode = kwargs.get('mode', None) or cls.current_mode() mode = kwargs.get('mode', None) or current_mode()
if mode == 'eval': if mode == 'eval':
op.perform() op.perform()
if op.nout == 1: if op.nout == 1:
...@@ -148,33 +161,6 @@ class PythonOp(Op): ...@@ -148,33 +161,6 @@ class PythonOp(Op):
for input in self.inputs: for input in self.inputs:
assert isinstance(input, PythonR) assert isinstance(input, PythonR)
@classmethod
def current_mode(cls):
return cls.__mode__[-1]
@classmethod
def set_mode(cls, mode):
cls.__mode__.append(mode)
@classmethod
def build_mode(cls):
cls.set_mode('build')
@classmethod
def eval_mode(cls):
cls.set_mode('eval')
@classmethod
def build_eval_mode(cls):
cls.set_mode('build_eval')
@classmethod
def pop_mode(cls):
if len(cls.__mode__) == 1:
raise Exception("There's only one mode left on the stack.")
else:
cls.__mode__.pop()
def gen_outputs(self): def gen_outputs(self):
return [PythonR() for i in xrange(self.nout)] return [PythonR() for i in xrange(self.nout)]
...@@ -270,9 +256,47 @@ class PythonOp(Op): ...@@ -270,9 +256,47 @@ class PythonOp(Op):
def _impl(self): def _impl(self):
return self.impl(*[input.data for input in self.inputs]) return self.impl(*[input.data for input in self.inputs])
@classmethod
def set_impl(cls, impl):
make_static(cls, 'impl')
# impl = cls.impl
# if hasattr(cls.impl, 'im_func'):
# impl = impl.im_func
# cls.impl = staticmethod(impl)
def impl(*args): def impl(*args):
raise NotImplementedError("This op has no implementation.") raise NotImplementedError("This op has no implementation.")
def _specs(self):
return self.specs(*[input.spec for input in self.inputs])
def specs(*inputs):
raise NotImplementedError("This op cannot infer the specs of its outputs.")
def refresh(self, except_list = []):
for input in self.inputs:
input.refresh()
change = self._propagate_specs()
if change:
self.alloc(except_list)
return change
def _propagate_specs(self):
specs = self._specs()
if self.nout == 1:
specs = [specs]
change = False
for output, spec in zip(self.outputs, specs):
if output.spec != spec:
output.spec = spec
change = True
return change
def alloc(self, except_list = []):
for output in self.outputs:
if output not in except_list:
output.alloc()
__require__ = ForbidConstantOverwrite __require__ = ForbidConstantOverwrite
def __copy__(self): def __copy__(self):
...@@ -297,16 +321,30 @@ class PythonOp(Op): ...@@ -297,16 +321,30 @@ class PythonOp(Op):
return op[0].owner return op[0].owner
return op.owner return op.owner
__mode__ = ['build_eval']
def current_mode():
return __mode__[-1]
def set_mode(mode):
__mode__.append(mode)
def build_mode():
set_mode('build')
def eval_mode():
set_mode('eval')
def build_eval_mode():
set_mode('build_eval')
def pop_mode():
if len(__mode__) == 1:
raise Exception("There's only one mode left on the stack.")
else:
__mode__.pop()
current_mode = PythonOp.current_mode
set_mode = PythonOp.set_mode
build_mode = PythonOp.build_mode
eval_mode = PythonOp.eval_mode
build_eval_mode = PythonOp.build_eval_mode
pop_mode = PythonOp.pop_mode
class PythonOpt(opt.Optimizer): class PythonOpt(opt.Optimizer):
...@@ -315,9 +353,9 @@ class PythonOpt(opt.Optimizer): ...@@ -315,9 +353,9 @@ class PythonOpt(opt.Optimizer):
self.opt = opt self.opt = opt
def optimize(self, env): def optimize(self, env):
PythonOp.build_mode() build_mode()
self.opt.optimize(env) self.opt.optimize(env)
PythonOp.pop_mode() pop_mode()
......
...@@ -153,7 +153,7 @@ class Op(object): ...@@ -153,7 +153,7 @@ class Op(object):
self.set_output(i, previous, False) self.set_output(i, previous, False)
def refresh(self, allow_changes = False): def repair(self, allow_changes = False):
""" """
This function attempts to repair all inputs that are broken This function attempts to repair all inputs that are broken
links by calling set_input on the new Result that replaced links by calling set_input on the new Result that replaced
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论