finalized a decent version of elemwise

上级 c40eadf8
...@@ -3,6 +3,7 @@ import gof ...@@ -3,6 +3,7 @@ import gof
from gof import current_mode, set_mode, build_mode, eval_mode, build_eval_mode, pop_mode, UNCOMPUTED, UNDEFINED, PythonR from gof import current_mode, set_mode, build_mode, eval_mode, build_eval_mode, pop_mode, UNCOMPUTED, UNDEFINED, PythonR
import type_spec import type_spec
import cutils
import numpy import numpy
import weakref import weakref
...@@ -21,37 +22,12 @@ def build(f, *args, **kwargs): ...@@ -21,37 +22,12 @@ def build(f, *args, **kwargs):
pop_mode() pop_mode()
return r return r
class Proxy(object):
__slots__ = ['_obj']
def __init__(self, obj = None):
self._obj = obj
def __getattribute__(self, attr):
if attr in ['__class__', '_obj']:
return object.__getattribute__(self, attr)
else:
return getattr(object.__getattribute__(self, '_obj'), attr)
def __setattr__(self, attr, value):
if attr in ['_obj']:
object.__setattr__(self, attr, value)
else:
setattr(self._obj, attr, value)
def __delattr__(self, attr):
delattr(self._obj, attr)
def as_string(*rs): def as_string(*rs):
s = gof.graph.as_string(gof.graph.inputs(rs), rs) s = gof.graph.as_string(gof.graph.inputs(rs), rs)
if len(rs) == 1: if len(rs) == 1:
return s[1:-1] return s[1:-1]
else: else:
return s return s
# return str(gof.Env(gof.graph.inputs([r]), [r]))[1:-1]
def print_graph(*rs): def print_graph(*rs):
print as_string(*rs) print as_string(*rs)
...@@ -77,8 +53,6 @@ def wrap(x): ...@@ -77,8 +53,6 @@ def wrap(x):
return x return x
elif isinstance(x, omega_op): elif isinstance(x, omega_op):
return x.out return x.out
elif isinstance(x, Proxy):
return wrap(x._obj)
else: else:
return literal(x) return literal(x)
...@@ -126,20 +100,22 @@ def cgetspecs(names, vals, converters): ...@@ -126,20 +100,22 @@ def cgetspecs(names, vals, converters):
specs = weave.ext_tools.assign_variable_types(names, d, type_converters = converters) #, auto_downcast = 0) specs = weave.ext_tools.assign_variable_types(names, d, type_converters = converters) #, auto_downcast = 0)
return d, specs return d, specs
def cgen(name, behavior, inames, ivals, onames, ovals, converters = None): def cgen(name, behavior, names, vals, converters = None):
if not converters: if not converters:
converters = type_spec.default converters = type_spec.default
for converter in converters: for converter in converters:
assert isinstance(converter, type_spec.omega_type_converter_extension) assert isinstance(converter, type_spec.omega_type_converter_extension)
d, specs = cgetspecs(inames + onames, ivals + ovals, converters) d, specs = cgetspecs(names, vals, converters)
template = {} template = {}
template['name'] = name template['name'] = name
template['code'] = behavior template['code'] = behavior
template['members'] = "\n".join([spec.struct_members_code() for spec in specs]) template['members'] = "".join([spec.struct_members_code() for spec in specs])
template['support'] = "\n".join([spec.struct_support_code() for spec in specs]) template['support'] = "".join([spec.struct_support_code() for spec in specs])
template['typedefs'] = "\n".join([spec.struct_typedefs() for spec in specs]) template['typedefs'] = "".join([spec.struct_typedefs() for spec in specs])
template['incref'] = "".join(["Py_INCREF(py_%s);\n" % spec.name for spec in specs if spec.use_ref_count])
template['decref'] = "".join(["Py_DECREF(py_%s);\n" % spec.name for spec in specs if spec.use_ref_count])
template['struct_contents'] = """ template['struct_contents'] = """
%(typedefs)s %(typedefs)s
...@@ -148,23 +124,41 @@ def cgen(name, behavior, inames, ivals, onames, ovals, converters = None): ...@@ -148,23 +124,41 @@ def cgen(name, behavior, inames, ivals, onames, ovals, converters = None):
%(support)s %(support)s
void execute(void) { void init(void) {
%(incref)s
}
void cleanup(void) {
%(decref)s
}
int execute(void) {
%(code)s %(code)s
return 0;
} }
""" % template """ % template
template['md5'] = md5.md5(template['struct_contents']).hexdigest() template['md5'] = md5.md5(template['struct_contents']).hexdigest()
template['struct_name'] = "_omega_%(name)s_%(md5)s" % template template['struct_name'] = "_omega_%(name)s_%(md5)s" % template
struct = "struct %(struct_name)s { %(struct_contents)s\n};" % template struct = "struct %(struct_name)s { %(struct_contents)s\n};" % template
code = "%(struct_name)s* __STRUCT_P = &%(struct_name)s();\n" % template static = """
code += "\n".join([spec.struct_import_code() for spec in specs]) int %(struct_name)s_executor(%(struct_name)s* self) {
code += "\n__STRUCT_P->execute();\n" return self->execute();
code += "return_val = 10;" }
code += "\n//%(md5)s" % template
void %(struct_name)s_destructor(void* executor, void* self) {
((%(struct_name)s*)self)->cleanup();
free(self);
}
""" % template
code = "%(struct_name)s* __STRUCT_P = new %(struct_name)s();\n" % template
code += "".join([spec.struct_import_code() for spec in specs])
code += "__STRUCT_P->init();\n"
code += "return_val = PyCObject_FromVoidPtrAndDesc((void*)(&%(struct_name)s_executor), __STRUCT_P, %(struct_name)s_destructor);\n" % template
return d, code, struct, converters return d, code, struct + static, converters
def make_static(cls, fname): def make_static(cls, fname):
...@@ -205,9 +199,8 @@ class omega_op(gof.PythonOp): ...@@ -205,9 +199,8 @@ class omega_op(gof.PythonOp):
return UNDEFINED return UNDEFINED
def c_code(self, converters = None): def c_code(self, converters = None):
behavior = self.c_impl(self.inputs, self.outputs) (inames, onames), behavior = self._c_impl()
(inames, onames), _1, _2, _3 = inspect.getargspec(self.c_impl) return cgen(self.__class__.__name__, behavior, inames + onames, self.inputs + self.outputs, converters)
return cgen(self.__class__.__name__, behavior, inames, self.inputs, onames, self.outputs, converters)
def _c_alloc(self): def _c_alloc(self):
self.c_alloc(self.inputs, self.outputs) self.c_alloc(self.inputs, self.outputs)
...@@ -215,21 +208,25 @@ class omega_op(gof.PythonOp): ...@@ -215,21 +208,25 @@ class omega_op(gof.PythonOp):
def c_alloc(inputs, outputs): def c_alloc(inputs, outputs):
raise NotImplementedError() raise NotImplementedError()
def _c_impl(self):
(inames, onames), _1, _2, _3 = inspect.getargspec(self.c_impl)
return (inames, onames), self.c_impl(self.inputs, self.outputs)
def c_impl(inputs, outputs): def c_impl(inputs, outputs):
raise NotImplementedError() raise NotImplementedError()
def c_thunk(self): def c_thunk(self):
self._c_alloc() self._c_alloc()
d, code, struct, converters = self.c_code() d, code, struct, converters = self.c_code()
def thunk(): thunk = weave.inline(code, d.keys(), local_dict = d, global_dict = {}, support_code = struct, type_converters = converters)
weave.inline(code, d.keys(), local_dict = d, global_dict = {}, support_code = struct, type_converters = converters)
return thunk return thunk
def c_perform(self): def c_perform(self):
self.c_thunk()() thunk = self.c_thunk()
cutils.run_cthunk(thunk)
def elemwise_wrap(beforeloop, inloop, afterloop, loop_vars, writable_loop_vars): def elemwise_wrap_old(beforeloop, inloop, afterloop, loop_vars, writable_loop_vars):
return """ return """
%(beforeloop)s %(beforeloop)s
for (int i = 0; i < N_%(v1)s[0]; i++) { for (int i = 0; i < N_%(v1)s[0]; i++) {
...@@ -249,6 +246,97 @@ def elemwise_wrap(beforeloop, inloop, afterloop, loop_vars, writable_loop_vars): ...@@ -249,6 +246,97 @@ def elemwise_wrap(beforeloop, inloop, afterloop, loop_vars, writable_loop_vars):
inloop = inloop, inloop = inloop,
afterloop = afterloop) afterloop = afterloop)
def elemwise_loopcode(loopcode, init_template, next_template, acquire_template, cleanup_template, loop_vars, writable_loop_vars, aliases):
all_loop_vars = loop_vars + writable_loop_vars
template = dict(
init = "".join([init_template % dict(loop_var = loop_var) for loop_var in all_loop_vars]),
next = "".join([next_template % dict(loop_var = loop_var) for loop_var in all_loop_vars]),
cleanup = "".join([cleanup_template % dict(loop_var = loop_var) for loop_var in all_loop_vars]),
idefs = "".join([("_%(loop_var)s_dtype %(loop_var)s = " + acquire_template + ";\n")
% dict(loop_var = loop_var) for loop_var in loop_vars]),
odefs = "".join([("_%(loop_var)s_dtype& %(loop_var)s = " + acquire_template + ";\n")
% dict(loop_var = loop_var) for loop_var in writable_loop_vars]),
aliasdefs = "".join(["_%(v1)s_dtype %(v1)s = %(v2)s;\n" % dict(v1=v1, v2=v2)
for v1, v2 in aliases.items()]),
loopcode = loopcode
)
code = """
%(init)s
while (__elemwise_size--) {
%(idefs)s
%(odefs)s
%(aliasdefs)s
%(loopcode)s
%(next)s
}
%(cleanup)s
""" % template
return code
def elemwise_wrap(beforeloop, inloop, afterloop, loop_vars, writable_loop_vars, aliases):
general_init = "PyArrayIterObject* _%(loop_var)s_iter = (PyArrayIterObject*)PyArray_IterNew((PyObject*)_%(loop_var)s_array);\n"
# "if (_%(loop_var)s_iter == NULL) {\n" \
# " PyErr_SetString(PyExc_ValueError, \"Could not make an iterator over variable %(loop_var)s.\");\n" \
# " return 1;\n" \
# "}\n"
general_next = "PyArray_ITER_NEXT(_%(loop_var)s_iter);\n"
general_acquire = "*((_%(loop_var)s_dtype*)_%(loop_var)s_iter->dataptr)";
general_cleanup = "if (_%(loop_var)s_iter) Py_DECREF(_%(loop_var)s_iter);\n";
contiguous_init = "_%(loop_var)s_dtype* _%(loop_var)s_iter = (_%(loop_var)s_dtype*)PyArray_DATA(_%(loop_var)s_array);\n"
contiguous_next = "_%(loop_var)s_iter++;\n"
contiguous_acquire = "*_%(loop_var)s_iter"
contiguous_cleanup = ""
all_loop_vars = loop_vars + writable_loop_vars
template = dict(
v1 = (loop_vars + writable_loop_vars)[0],
beforeloop = beforeloop,
general_loop = elemwise_loopcode(
inloop,
general_init, general_next, general_acquire, general_cleanup,
loop_vars, writable_loop_vars, aliases),
contiguous_loop = elemwise_loopcode(
inloop,
contiguous_init, contiguous_next, contiguous_acquire, contiguous_cleanup,
loop_vars, writable_loop_vars, aliases),
contiguity_check = "".join(["all_c_contiguous &= PyArray_ISCARRAY(_%(loop_var)s_array);\n" \
"all_f_contiguous &= PyArray_ISFARRAY(_%(loop_var)s_array);\n" \
% dict(loop_var = loop_var)
for loop_var in all_loop_vars]),
afterloop = afterloop)
code = """
npy_intp __elemwise_size = PyArray_SIZE(_%(v1)s_array);
%(beforeloop)s
bool all_c_contiguous = 1;
bool all_f_contiguous = 1;
%(contiguity_check)s
if (all_c_contiguous || all_f_contiguous) {
%(contiguous_loop)s
}
else {
%(general_loop)s
}
%(afterloop)s
""" % template
print code
return code
def upcast(dtype, *dtypes):
z = numpy.zeros((), dtype = dtype)
for dtype in dtypes:
z = z + numpy.zeros((), dtype = dtype)
return z.dtype
class elemwise(omega_op): class elemwise(omega_op):
...@@ -274,27 +362,44 @@ class elemwise(omega_op): ...@@ -274,27 +362,44 @@ class elemwise(omega_op):
raise Exception("cannot infer an allocation policy automatically for variable " \ raise Exception("cannot infer an allocation policy automatically for variable " \
"%s because it is not part of the elementwise loop - "\ "%s because it is not part of the elementwise loop - "\
"please override the c_alloc method" % oname[1:]) "please override the c_alloc method" % oname[1:])
model = None shape, dtype = None, None
for iname, input in zip(inames, self.inputs): for iname, input in zip(inames, self.inputs):
if not iname.startswith("_"): if not iname.startswith("_"):
model = input.data shape = input.data
if model is None: if shape is None:
raise Exception("cannot infer an allocation policy automatically for output variables " \ raise Exception("cannot infer an allocation policy automatically for output variables " \
"because there is no input variable in the loop from which to get the shape") "because there is no input variable in the loop from which to get the shape")
dtype = upcast(*[input.data.dtype
for iname, input in zip(inames, self.inputs)
if isinstance(input.data, numpy.ndarray)])
for output in self.outputs: for output in self.outputs:
inplace_inputs = dmap.get(output, []) inplace_inputs = dmap.get(output, [])
if inplace_inputs: if inplace_inputs:
assert len(inplace_inputs) == 1 assert len(inplace_inputs) == 1
output.data = inplace_inputs[0].data output.data = inplace_inputs[0].data
else: else:
output.data = numpy.ndarray(model.shape, model.dtype) output.data = numpy.ndarray(shape, dtype)
def _c_init(self):
(inames, onames), _1, _2, _3 = inspect.getargspec(self.c_init)
return (inames, onames), self.c_init(self.inputs, self.outputs)
def c_init(inputs, outputs): def c_init(inputs, outputs):
return "" return ""
def _c_foreach(self):
(inames, onames), _1, _2, _3 = inspect.getargspec(self.c_foreach)
return (inames, onames), self.c_foreach(self.inputs, self.outputs)
def c_foreach(inputs, outputs): def c_foreach(inputs, outputs):
return "" return ""
def _c_finalize(self):
(inames, onames), _1, _2, _3 = inspect.getargspec(self.c_finalize)
return (inames, onames), self.c_finalize(self.inputs, self.outputs)
def c_finalize(inputs, outputs): def c_finalize(inputs, outputs):
return "" return ""
...@@ -306,19 +411,14 @@ class elemwise(omega_op): ...@@ -306,19 +411,14 @@ class elemwise(omega_op):
return "_" + name return "_" + name
try: try:
self.c_impl(self.inputs, self.outputs) self._c_impl()
raise Exception("c_impl is not used by elemwise ops - define behavior in c_foreach instead") raise Exception("c_impl is not used by elemwise ops - define behavior in c_foreach instead")
except NotImplementedError: except NotImplementedError:
pass pass
before = self.c_init(self.inputs, self.outputs) spec_b, before = self._c_init()
during = self.c_foreach(self.inputs, self.outputs) spec_d, during = self._c_foreach()
after = self.c_finalize(self.inputs, self.outputs) spec_a, after = self._c_finalize()
# Get c_init, etc.'s argument names so we can declare them properly in the C code
spec_b = inspect.getargspec(self.c_init)
spec_d = inspect.getargspec(self.c_foreach)
spec_a = inspect.getargspec(self.c_finalize)
# Sanity check - apart from loop vars, variables are shared in the before/during/after parts # Sanity check - apart from loop vars, variables are shared in the before/during/after parts
if before and spec_b != spec_d: if before and spec_b != spec_d:
...@@ -326,17 +426,66 @@ class elemwise(omega_op): ...@@ -326,17 +426,66 @@ class elemwise(omega_op):
if after and spec_a != spec_d: if after and spec_a != spec_d:
raise Exception("The input signature of c_finalize differs from the input signature of c_foreach.") raise Exception("The input signature of c_finalize differs from the input signature of c_foreach.")
(inames, onames), _1, _2, _3 = spec_d (inames, onames) = spec_d
aliases = {}
if isinstance(self, inplace):
dmap = self.destroy_map()
for oname, output in zip(onames, self.outputs):
if not oname.startswith("_"):
for input in dmap.get(output, []):
aliases[inames[self.inputs.index(input)]] = oname
behavior = elemwise_wrap(before, during, after, behavior = elemwise_wrap(before, during, after,
[iname for iname in inames if not iname.startswith("_")], [iname for iname in inames if not iname.startswith("_") and not iname in aliases],
[oname for oname in onames if not oname.startswith("_")]) [oname for oname in onames if not oname.startswith("_")],
aliases)
inames = [mangle(name) for name in inames] inames = [mangle(name) for name in inames]
onames = [mangle(name) for name in onames] onames = [mangle(name) for name in onames]
return cgen(self.__class__.__name__, behavior, inames, self.inputs, onames, self.outputs, converters) return cgen(self.__class__.__name__, behavior, inames + onames, self.inputs + self.outputs, converters)
@classmethod
def inplace_version(cls, dmap = {0: 0}):
(inames, onames), _1, _2, _3 = inspect.getargspec(cls.c_foreach)
for i, oname in enumerate(onames):
if i in dmap:
assert not oname.startswith("_")
class C(cls, inplace):
def destroy_map(self):
ret = cls.destroy_map()
for output, input in self.dmap.items():
ret[self.outputs.index(output)] = [self.inputs.index(input)]
return ret
def _impl(self):
if self.impl is not cls.impl:
# If the user sets his own inplace operation, we use it
return cls._impl(self)
else:
res = cls._impl(self)
if isinstance(res, gof.Result):
res = [res]
else:
res = copy(res)
for output, input in dmap.items():
# The default implementation returned a copy, so we just
# overwrite the original input with the contents of that copy
# This is not meant to be efficient, only correct.
a = self.inputs[input].data
a[:] = res[output]
res[output] = a
if len(res) == 1:
return res[0]
else:
return res
if dmap == {0:0}:
C.__name__ = cls.__name__ + "_inplace" % dmap
else:
C.__name__ = cls.__name__ + "_inplace%s" % dmap
return C
def scalar_switch(normal_f, scalar_f, scalar_f_reverse = None): def scalar_switch(normal_f, scalar_f, scalar_f_reverse = None):
...@@ -419,7 +568,7 @@ def assert_same_shapes(impl): ...@@ -419,7 +568,7 @@ def assert_same_shapes(impl):
return ret return ret
# Wrapper to ensure that the last input to impl is a scalar # Wrapper to ensure that the last input to impl is a scalar
def tensor_scalar_op(impl): def tensor_scalar_impl(impl):
def ret(x, a): def ret(x, a):
if a.shape: if a.shape:
raise ValueError("The second argument to %s must be a scalar." % impl) raise ValueError("The second argument to %s must be a scalar." % impl)
...@@ -449,83 +598,101 @@ def tensor_scalar_op(impl): ...@@ -449,83 +598,101 @@ def tensor_scalar_op(impl):
## Addition ## ## Addition ##
class add(omega_op): class add_elemwise(elemwise):
impl = assert_same_shapes(numpy.ndarray.__add__) impl = assert_same_shapes(numpy.ndarray.__add__)
def grad(x, y, gz): def grad(x, y, gz):
return gz return gz
def alloc(x, y): def c_foreach((x, y), (z, )):
return numpy.ndarray(x.shape, dtype = x.dtype) return "z = x + y;"
def c_impl(x, y, z):
return """
for (int i = 0; i < z.ncols; i++) {
for (int j = 0; j < z.nrows; j++) {
z(i, j) = x(i, j) + y(i, j);
}
}
"""
class proto_add_elemwise(omega_op): iadd_elemwise = add_elemwise.inplace_version()
def grad(x, y, gz): iadd_elemwise.impl = assert_same_shapes(numpy.ndarray.__iadd__)
return gz
class add_elemwise(proto_add_elemwise):
impl = assert_same_shapes(numpy.ndarray.__add__)
class iadd_elemwise(proto_add_elemwise, inplace): # class proto_add_elemwise(omega_op):
impl = assert_same_shapes(numpy.ndarray.__iadd__) # def grad(x, y, gz):
# return gz
# class add_elemwise(proto_add_elemwise):
# impl = assert_same_shapes(numpy.ndarray.__add__)
# class iadd_elemwise(proto_add_elemwise, inplace):
# impl = assert_same_shapes(numpy.ndarray.__iadd__)
class proto_add_scalar(omega_op): class tensor_scalar_op(elemwise):
def c_init((x, _a), (z, )):
return "_a_dtype a = _a[0];"
def _c_foreach(self):
return (('x', '_a'), ('z', )), "z = %s;" % self.c_operation
class add_scalar(tensor_scalar_op):
impl = tensor_scalar_impl(numpy.ndarray.__add__)
def grad(x, a, gz): def grad(x, a, gz):
return gz, sum(gz) return gz, sum(gz)
c_expr = "x + a"
class add_scalar(proto_add_scalar): iadd_scalar = add_scalar.inplace_version()
impl = tensor_scalar_op(numpy.ndarray.__add__) iadd_scalar.impl = tensor_scalar_impl(numpy.ndarray.__iadd__)
# def c_impl(x, s, z):
# """
# if (*__z == NULL) {
# *__z = new ndarray
# }
# ndarray& z = **__z
# """
# return """ # class proto_add_scalar(omega_op):
# z.resize_like(x); # def grad(x, a, gz):
# for (int i = 0; i < z.size(); i++) { # return gz, sum(gz)
# z[i] = x[i] * s;
# }
# return z;
# """
class iadd_scalar(proto_add_scalar, inplace): # class add_scalar(proto_add_scalar):
impl = tensor_scalar_op(numpy.ndarray.__iadd__) # impl = tensor_scalar_impl(numpy.ndarray.__add__)
# class iadd_scalar(proto_add_scalar, inplace):
# impl = tensor_scalar_impl(numpy.ndarray.__iadd__)
class proto_twice(omega_op): class twice(elemwise):
def grad(x, gz): def grad(x, gz):
return scale(gz, 2.0) return scale(gz, 2.0)
class twice(proto_twice):
def impl(x): def impl(x):
return x + x return x + x
def c_foreach((x, ), (z, )):
"z = x + x;"
class itwice(proto_twice, inplace): itwice = twice.inplace_version()
def impl(x):
x += x
return x # class proto_twice(omega_op):
# def grad(x, gz):
# return scale(gz, 2.0)
# class twice(proto_twice):
# def impl(x):
# return x + x
# class itwice(proto_twice, inplace):
# def impl(x):
# x += x
# return x
## Subtraction ## ## Subtraction ##
class proto_sub_elemwise(omega_op): class sub_elemwise(elemwise):
impl = assert_same_shapes(numpy.ndarray.__sub__)
def grad(x, y, gz): def grad(x, y, gz):
return gz, -gz return gz, -gz
def c_foreach((x, y), (z, )):
return "z = x - y;"
class sub_elemwise(proto_sub_elemwise): isub_elemwise = sub_elemwise.inplace_version()
impl = assert_same_shapes(numpy.ndarray.__sub__) isub_elemwise.impl = assert_same_shapes(numpy.ndarray.__isub__)
# class proto_sub_elemwise(omega_op):
# def grad(x, y, gz):
# return gz, -gz
class isub_elemwise(proto_sub_elemwise, inplace): # class sub_elemwise(proto_sub_elemwise):
impl = assert_same_shapes(numpy.ndarray.__isub__) # impl = assert_same_shapes(numpy.ndarray.__sub__)
# class isub_elemwise(proto_sub_elemwise, inplace):
# impl = assert_same_shapes(numpy.ndarray.__isub__)
def sub_scalar_r(x, a): def sub_scalar_r(x, a):
return add_scalar(x, -a) return add_scalar(x, -a)
...@@ -539,67 +706,127 @@ def isub_scalar_r(x, a): ...@@ -539,67 +706,127 @@ def isub_scalar_r(x, a):
## Element-wise multiplication ## ## Element-wise multiplication ##
class proto_mul_elemwise(omega_op): class mul_elemwise(elemwise):
impl = assert_same_shapes(numpy.ndarray.__mul__)
def grad(x, y, gz): def grad(x, y, gz):
return mul(y, gz), mul(x, gz) return mul(y, gz), mul(x, gz)
def c_foreach((x, y), (z, )):
return "z = x * y;"
class mul_elemwise(proto_mul_elemwise): imul_elemwise = mul_elemwise.inplace_version()
impl = assert_same_shapes(numpy.ndarray.__mul__) imul_elemwise.impl = assert_same_shapes(numpy.ndarray.__imul__)
# class proto_mul_elemwise(omega_op):
# def grad(x, y, gz):
# return mul(y, gz), mul(x, gz)
class imul_elemwise(proto_mul_elemwise, inplace): # class mul_elemwise(proto_mul_elemwise):
impl = assert_same_shapes(numpy.ndarray.__imul__) # impl = assert_same_shapes(numpy.ndarray.__mul__)
# class imul_elemwise(proto_mul_elemwise, inplace):
# impl = assert_same_shapes(numpy.ndarray.__imul__)
class proto_scale(omega_op):
class scale(tensor_scalar_op):
impl = tensor_scalar_impl(numpy.ndarray.__mul__)
def grad(x, a, gz): def grad(x, a, gz):
return scale(a, gz), sum(mul_elemwise(x, gz)) return scale(a, gz), sum(mul_elemwise(x, gz))
c_expr = "x * a"
iscale = scale.inplace_version()
iscale.impl = tensor_scalar_impl(numpy.ndarray.__imul__)
class scale(proto_scale): # class proto_scale(omega_op):
impl = tensor_scalar_op(numpy.ndarray.__mul__) # def grad(x, a, gz):
# return scale(a, gz), sum(mul_elemwise(x, gz))
class iscale(proto_scale, inplace): # class scale(proto_scale):
impl = tensor_scalar_op(numpy.ndarray.__imul__) # impl = tensor_scalar_impl(numpy.ndarray.__mul__)
# class iscale(proto_scale, inplace):
# impl = tensor_scalar_impl(numpy.ndarray.__imul__)
class proto_sqr(omega_op):
class sqr(elemwise):
def grad(x, gz): def grad(x, gz):
return scale(mul_elemwise(x, gz), 2.0) return scale(mul_elemwise(x, gz), 2.0)
def impl(x):
return x * x
def c_foreach((x, ), (z, )):
"z = x * x;"
class sqr(proto_sqr): isqr = sqr.inplace_version()
impl = lambda x: numpy.multiply(x, x) isqr.impl = lambda x: x.__imul__(x)
class isqr(proto_sqr, inplace):
impl = lambda x: x.__imul__(x)
# class proto_sqr(omega_op):
# def grad(x, gz):
# return scale(mul_elemwise(x, gz), 2.0)
class proto_sqrt(omega_op): # class sqr(proto_sqr):
# impl = lambda x: numpy.multiply(x, x)
# class isqr(proto_sqr, inplace):
# impl = lambda x: x.__imul__(x)
class sqrt(elemwise):
def grad(x, gz): def grad(x, gz):
return scale(div(gz, sqrt(x)), 0.5) return scale(div(gz, sqrt(x)), 0.5)
class sqrt(proto_sqrt):
impl = numpy.sqrt impl = numpy.sqrt
def c_foreach((x, ), (z, )):
"z = pow(x, 0.5);"
isqrt = sqrt.inplace_version()
isqrt.impl = lambda x: x.__ipow__(0.5)
class isqrt(proto_sqrt, inplace):
impl = lambda x: x.__ipow__(0.5) # class proto_sqrt(omega_op):
# def grad(x, gz):
# return scale(div(gz, sqrt(x)), 0.5)
# class sqrt(proto_sqrt):
# impl = numpy.sqrt
# class isqrt(proto_sqrt, inplace):
# impl = lambda x: x.__ipow__(0.5)
## Exponentiation ## ## Exponentiation ##
class exp(omega_op): class exp(elemwise):
impl = numpy.exp impl = numpy.exp
def c_foreach((x, ), (z, )):
return "z = exp(x);"
# class exp(omega_op):
# impl = numpy.exp
## Element-wise division ## ## Element-wise division ##
class proto_div_elemwise(omega_op): class div_elemwise(elemwise):
impl = assert_same_shapes(numpy.ndarray.__div__)
def grad(x, y, gz): def grad(x, y, gz):
return div(gz, y), -div(mul(x, gz), sqr(y)) return div(gz, y), -div(mul(x, gz), sqr(y))
def c_foreach((x, y), (z, )):
return "z = x / y;"
class div_elemwise(proto_div_elemwise): idiv_elemwise = div_elemwise.inplace_version()
impl = assert_same_shapes(numpy.ndarray.__div__) idiv_elemwise.impl = assert_same_shapes(numpy.ndarray.__idiv__)
# class proto_div_elemwise(omega_op):
# def grad(x, y, gz):
# return div(gz, y), -div(mul(x, gz), sqr(y))
# class div_elemwise(proto_div_elemwise):
# impl = assert_same_shapes(numpy.ndarray.__div__)
class idiv_elemwise(proto_div_elemwise, inplace): # class idiv_elemwise(proto_div_elemwise, inplace):
impl = assert_same_shapes(numpy.ndarray.__idiv__) # impl = assert_same_shapes(numpy.ndarray.__idiv__)
def div_scalar_r(x, a): def div_scalar_r(x, a):
return scale(x, inv_elemwise(a)) return scale(x, inv_elemwise(a))
...@@ -614,28 +841,48 @@ def idiv_scalar_r(x, a): ...@@ -614,28 +841,48 @@ def idiv_scalar_r(x, a):
## Scaling ## ## Scaling ##
class neg(elemwise):
class proto_neg(omega_op): impl = numpy.ndarray.__neg__
def grad(x, gz): def grad(x, gz):
return -gz return -gz
def c_foreach((x, ), (z, )):
return "z = -x;"
class neg(proto_neg): ineg = neg.inplace_version()
impl = numpy.ndarray.__neg__ ineg.impl = lambda x: x.__imul__(-1)
class ineg(proto_neg, inplace):
impl = lambda x: x.__imul__(-1)
# class proto_neg(omega_op):
# def grad(x, gz):
# return -gz
class proto_inv_elemwise(omega_op): # class neg(proto_neg):
def grad(x, gz): # impl = numpy.ndarray.__neg__
raise NotImplemented
# class ineg(proto_neg, inplace):
# impl = lambda x: x.__imul__(-1)
class inv_elemwise(omega_op):
class inv_elemwise(elemwise):
impl = lambda x: 1 / x impl = lambda x: 1 / x
def grad(x, gz):
return -gz
def c_foreach((x, ), (z, )):
return "z = 1 / x;"
iinv_elemwise = inv_elemwise.inplace_version()
class iinv_elemwise(omega_op, inplace):
def impl(x): # class proto_inv_elemwise(omega_op):
x[:] = 1 / x # def grad(x, gz):
# raise NotImplemented
# class inv_elemwise(omega_op):
# impl = lambda x: 1 / x
# class iinv_elemwise(omega_op, inplace):
# def impl(x):
# x[:] = 1 / x
## Dot product ## ## Dot product ##
...@@ -666,46 +913,116 @@ class array_copy(omega_op): ...@@ -666,46 +913,116 @@ class array_copy(omega_op):
## Power ## ## Power ##
class proto_pow(omega_op): class pow_elemwise(elemwise):
impl = assert_same_shapes(numpy.ndarray.__pow__)
def grad(x, s, gz): def grad(x, s, gz):
return gz * s * (pow_elemwise(x, s-1.0)) return gz * s * (pow_elemwise(x, s-1.0))
def c_foreach((x, s), (z, )):
return "z = pow(x, s)"
class pow_elemwise(proto_pow): ipow_elemwise = pow_elemwise.inplace_version()
impl = assert_same_shapes(numpy.ndarray.__pow__) ipow_elemwise.impl = assert_same_shapes(numpy.ndarray.__ipow__)
class ipow_elemwise(proto_pow, inplace):
impl = assert_same_shapes(numpy.ndarray.__ipow__)
# class proto_pow(omega_op):
# def grad(x, s, gz):
# return gz * s * (pow_elemwise(x, s-1.0))
class pow_scalar_l(omega_op): # class pow_elemwise(proto_pow):
impl = tensor_scalar_op(numpy.ndarray.__pow__) # impl = assert_same_shapes(numpy.ndarray.__pow__)
# class ipow_elemwise(proto_pow, inplace):
# impl = assert_same_shapes(numpy.ndarray.__ipow__)
class pow_scalar_l(tensor_scalar_op):
impl = tensor_scalar_impl(lambda x, y: numpy.ndarray.__pow__(y, x))
def grad(x, s, gz): def grad(x, s, gz):
return gz * x * (pow_scalar_l(s,x-1.0)) return gz * x * (pow_scalar_l(s,x-1.0))
c_expr = "pow(a, x)"
class pow_scalar_r(omega_op): class pow_scalar_r(tensor_scalar_op):
impl = tensor_scalar_op(numpy.ndarray.__pow__) impl = tensor_scalar_impl(numpy.ndarray.__pow__)
def grad(x, s, gz): def grad(x, s, gz):
return gz * s * (pow_scalar_r(x,s-1.0)) return gz * s * (pow_scalar_r(x,s-1.0))
c_expr = "pow(x, a)"
class ipow_scalar_r(omega_op, inplace): ipow_scalar_r = pow_scalar_r.inplace_version()
impl = tensor_scalar_op(numpy.ndarray.__ipow__) ipow_scalar_r.impl = tensor_scalar_impl(numpy.ndarray.__ipow__)
def grad(x, s, gz):
return gz * s * (pow_scalar_r(x,s-1.0))
# class pow_scalar_l(omega_op):
# impl = tensor_scalar_impl(numpy.ndarray.__pow__)
# def grad(x, s, gz):
# return gz * x * (pow_scalar_l(s,x-1.0))
# class pow_scalar_r(omega_op):
# impl = tensor_scalar_impl(numpy.ndarray.__pow__)
# def grad(x, s, gz):
# return gz * s * (pow_scalar_r(x,s-1.0))
# class ipow_scalar_r(omega_op, inplace):
# impl = tensor_scalar_impl(numpy.ndarray.__ipow__)
# def grad(x, s, gz):
# return gz * s * (pow_scalar_r(x,s-1.0))
## Others ## ## Others ##
class minmax(omega_op): class minmax(elemwise):
nout = 2 nout = 2
def impl(x): def impl(x):
return x.min, x.max return x.min, x.max
def c_alloc((x, ), (_min, _max)):
_min.data = numpy.ndarray((), x.dtype)
_max.data = numpy.ndarray((), x.dtype)
def c_init((x, ), (_min, _max)):
return """
_x_dtype min = _x[0];
_x_dtype max = _x[0];
"""
def c_foreach((x, ), (_min, _max)):
return """
if (x < min) min = x;
if (x > max) max = x;
"""
def c_finalize((x, ), (_min, _max)):
return """
_min[0] = min;
_max[0] = max;
"""
# class minmax(omega_op):
# nout = 2
# def impl(x):
# return x.min, x.max
class fill(omega_op): class fill(elemwise):
impl = lambda model, value: (model * 0) + value impl = lambda model, value: (model * 0) + value
def c_init((model, _value), (z, )):
return "_z_dtype value = _value[0];"
def c_foreach((model, _value), (z, )):
return "z = value;"
class sum(omega_op): ifill = fill.inplace_version()
impl = numpy.sum
def grad(x, gz):
return fill(x, gz) # class fill(omega_op):
# impl = lambda model, value: (model * 0) + value
class sum(elemwise):
def c_alloc((x, ), (_sum, )):
_sum.data = numpy.ndarray((), dtype = x.data.dtype)
def c_init((x, ), (_sum, )):
return "_sum[0] = 0;"
def c_foreach((x, ), (_sum, )):
return "_sum[0] += x;"
# class sum(omega_op):
# impl = numpy.sum
# def grad(x, gz):
# return fill(x, gz)
## Array slicing ## ## Array slicing ##
......
try:
from cutils_ext import *
except ImportError:
from scipy import weave
single_runner = """
if (!PyCObject_Check(py_cthunk)) {
PyErr_SetString(PyExc_ValueError,
"Argument to run_cthunk must be a PyCObject returned by the c_thunk method of an omega_op.");
return NULL;
}
int (*fn)(void*) = reinterpret_cast<int (*)(void*)>(PyCObject_AsVoidPtr(py_cthunk));
void* it = PyCObject_GetDesc(py_cthunk);
int failure = fn(it);
if (failure) {
return NULL;
}
"""
cthunk = object()
mod = weave.ext_tools.ext_module('cutils_ext')
mod.add_function(weave.ext_tools.ext_function('run_cthunk', single_runner, ['cthunk']))
mod.compile()
from cutils_ext import *
...@@ -238,7 +238,7 @@ class PythonOp(Op): ...@@ -238,7 +238,7 @@ class PythonOp(Op):
if input not in exc: if input not in exc:
self.check_input(input) self.check_input(input)
try: try:
results = self.impl(*[input.data for input in self.inputs]) results = self._impl()
except Exception, e: except Exception, e:
print "Error in %s: %s" % (self, e) print "Error in %s: %s" % (self, e)
raise raise
...@@ -250,7 +250,7 @@ class PythonOp(Op): ...@@ -250,7 +250,7 @@ class PythonOp(Op):
output.set_value(result) output.set_value(result)
def _perform(self): def _perform(self):
results = self.impl(*[input.data for input in self.inputs]) results = self._impl()
if self.nout == 1: if self.nout == 1:
self.out.set_value(results) self.out.set_value(results)
else: else:
...@@ -267,6 +267,9 @@ class PythonOp(Op): ...@@ -267,6 +267,9 @@ class PythonOp(Op):
raise Exception("Uncomputed input: %s in %s" % (input, self)) raise Exception("Uncomputed input: %s in %s" % (input, self))
self.perform() self.perform()
def _impl(self):
return self.impl(*[input.data for input in self.inputs])
def impl(*args): def impl(*args):
raise NotImplementedError("This op has no implementation.") raise NotImplementedError("This op has no implementation.")
......
...@@ -13,28 +13,33 @@ class omega_type_converter_extension: ...@@ -13,28 +13,33 @@ class omega_type_converter_extension:
return [(tvars['c_type'], tvars['name'], tvars['var_convert'])] return [(tvars['c_type'], tvars['name'], tvars['var_convert'])]
def format_provide(self, x): def format_provide(self, x):
return '%s %s = %s;' % x return '%s %s = %s;\n' % x
def declaration_code(self, templatize = 0, inline = 0): def declaration_code(self, templatize = 0, inline = 0):
tvars = self.template_vars(inline=inline) tvars = self.template_vars(inline=inline)
code = '%(py_var)s = %(var_lookup)s;\n' % tvars code = '%(py_var)s = %(var_lookup)s;\n' % tvars
code += '\n'.join([self.format_provide(export) for export in self.provides()]) code += ''.join([self.format_provide(export) for export in self.provides()])
return code return code
def struct_init_code(self):
return "Py_INCREF(py_%s);" % self.name
def struct_cleanup_code(self):
return "Py_DECREF(py_%s);" % self.name
def struct_members_code(self): def struct_members_code(self):
return '\n'.join(['%s_type %s;' % (name, name) for c_type, name, init in self.provides()]) res = "PyObject* py_%s;\n" % self.name
return res + ''.join(['%s_type %s;\n' % (name, name) for c_type, name, init in self.provides()])
def struct_import_code(self): def struct_import_code(self):
return '\n'.join(['__STRUCT_P->%s = %s;' % (name, name) for c_type, name, init in self.provides()]) res = "__STRUCT_P->py_%s = py_%s;\n" % (self.name, self.name)
return res + ''.join(['__STRUCT_P->%s = %s;\n' % (name, name) for c_type, name, init in self.provides()])
def struct_support_code(self): def struct_support_code(self):
return "" return ""
def struct_typedefs(self): def struct_typedefs(self):
return "\n".join(["typedef %s %s_type;" % (c_type, name) for c_type, name, init in self.provides()]) return ''.join(["typedef %s %s_type;\n" % (c_type, name) for c_type, name, init in self.provides()])
# def struct_template_types(self):
# return [("typename %s_type" % name, ) for c_type, name, init in self.provides()]
class int_converter(omega_type_converter_extension, c_spec.int_converter): class int_converter(omega_type_converter_extension, c_spec.int_converter):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论