finalized a decent version of elemwise

上级 c40eadf8
......@@ -3,6 +3,7 @@ import gof
from gof import current_mode, set_mode, build_mode, eval_mode, build_eval_mode, pop_mode, UNCOMPUTED, UNDEFINED, PythonR
import type_spec
import cutils
import numpy
import weakref
......@@ -21,37 +22,12 @@ def build(f, *args, **kwargs):
pop_mode()
return r
class Proxy(object):
__slots__ = ['_obj']
def __init__(self, obj = None):
self._obj = obj
def __getattribute__(self, attr):
if attr in ['__class__', '_obj']:
return object.__getattribute__(self, attr)
else:
return getattr(object.__getattribute__(self, '_obj'), attr)
def __setattr__(self, attr, value):
if attr in ['_obj']:
object.__setattr__(self, attr, value)
else:
setattr(self._obj, attr, value)
def __delattr__(self, attr):
delattr(self._obj, attr)
def as_string(*rs):
s = gof.graph.as_string(gof.graph.inputs(rs), rs)
if len(rs) == 1:
return s[1:-1]
else:
return s
# return str(gof.Env(gof.graph.inputs([r]), [r]))[1:-1]
def print_graph(*rs):
print as_string(*rs)
......@@ -77,8 +53,6 @@ def wrap(x):
return x
elif isinstance(x, omega_op):
return x.out
elif isinstance(x, Proxy):
return wrap(x._obj)
else:
return literal(x)
......@@ -126,20 +100,22 @@ def cgetspecs(names, vals, converters):
specs = weave.ext_tools.assign_variable_types(names, d, type_converters = converters) #, auto_downcast = 0)
return d, specs
def cgen(name, behavior, inames, ivals, onames, ovals, converters = None):
def cgen(name, behavior, names, vals, converters = None):
if not converters:
converters = type_spec.default
for converter in converters:
assert isinstance(converter, type_spec.omega_type_converter_extension)
d, specs = cgetspecs(inames + onames, ivals + ovals, converters)
d, specs = cgetspecs(names, vals, converters)
template = {}
template['name'] = name
template['code'] = behavior
template['members'] = "\n".join([spec.struct_members_code() for spec in specs])
template['support'] = "\n".join([spec.struct_support_code() for spec in specs])
template['typedefs'] = "\n".join([spec.struct_typedefs() for spec in specs])
template['members'] = "".join([spec.struct_members_code() for spec in specs])
template['support'] = "".join([spec.struct_support_code() for spec in specs])
template['typedefs'] = "".join([spec.struct_typedefs() for spec in specs])
template['incref'] = "".join(["Py_INCREF(py_%s);\n" % spec.name for spec in specs if spec.use_ref_count])
template['decref'] = "".join(["Py_DECREF(py_%s);\n" % spec.name for spec in specs if spec.use_ref_count])
template['struct_contents'] = """
%(typedefs)s
......@@ -148,23 +124,41 @@ def cgen(name, behavior, inames, ivals, onames, ovals, converters = None):
%(support)s
void execute(void) {
void init(void) {
%(incref)s
}
void cleanup(void) {
%(decref)s
}
int execute(void) {
%(code)s
return 0;
}
""" % template
template['md5'] = md5.md5(template['struct_contents']).hexdigest()
template['struct_name'] = "_omega_%(name)s_%(md5)s" % template
struct = "struct %(struct_name)s { %(struct_contents)s\n};" % template
code = "%(struct_name)s* __STRUCT_P = &%(struct_name)s();\n" % template
code += "\n".join([spec.struct_import_code() for spec in specs])
code += "\n__STRUCT_P->execute();\n"
code += "return_val = 10;"
code += "\n//%(md5)s" % template
static = """
int %(struct_name)s_executor(%(struct_name)s* self) {
return self->execute();
}
return d, code, struct, converters
void %(struct_name)s_destructor(void* executor, void* self) {
((%(struct_name)s*)self)->cleanup();
free(self);
}
""" % template
code = "%(struct_name)s* __STRUCT_P = new %(struct_name)s();\n" % template
code += "".join([spec.struct_import_code() for spec in specs])
code += "__STRUCT_P->init();\n"
code += "return_val = PyCObject_FromVoidPtrAndDesc((void*)(&%(struct_name)s_executor), __STRUCT_P, %(struct_name)s_destructor);\n" % template
return d, code, struct + static, converters
def make_static(cls, fname):
......@@ -205,15 +199,18 @@ class omega_op(gof.PythonOp):
return UNDEFINED
def c_code(self, converters = None):
behavior = self.c_impl(self.inputs, self.outputs)
(inames, onames), _1, _2, _3 = inspect.getargspec(self.c_impl)
return cgen(self.__class__.__name__, behavior, inames, self.inputs, onames, self.outputs, converters)
(inames, onames), behavior = self._c_impl()
return cgen(self.__class__.__name__, behavior, inames + onames, self.inputs + self.outputs, converters)
def _c_alloc(self):
self.c_alloc(self.inputs, self.outputs)
def c_alloc(inputs, outputs):
raise NotImplementedError()
def _c_impl(self):
(inames, onames), _1, _2, _3 = inspect.getargspec(self.c_impl)
return (inames, onames), self.c_impl(self.inputs, self.outputs)
def c_impl(inputs, outputs):
raise NotImplementedError()
......@@ -221,15 +218,15 @@ class omega_op(gof.PythonOp):
def c_thunk(self):
self._c_alloc()
d, code, struct, converters = self.c_code()
def thunk():
weave.inline(code, d.keys(), local_dict = d, global_dict = {}, support_code = struct, type_converters = converters)
thunk = weave.inline(code, d.keys(), local_dict = d, global_dict = {}, support_code = struct, type_converters = converters)
return thunk
def c_perform(self):
self.c_thunk()()
thunk = self.c_thunk()
cutils.run_cthunk(thunk)
def elemwise_wrap(beforeloop, inloop, afterloop, loop_vars, writable_loop_vars):
def elemwise_wrap_old(beforeloop, inloop, afterloop, loop_vars, writable_loop_vars):
return """
%(beforeloop)s
for (int i = 0; i < N_%(v1)s[0]; i++) {
......@@ -249,6 +246,97 @@ def elemwise_wrap(beforeloop, inloop, afterloop, loop_vars, writable_loop_vars):
inloop = inloop,
afterloop = afterloop)
def elemwise_loopcode(loopcode, init_template, next_template, acquire_template, cleanup_template, loop_vars, writable_loop_vars, aliases):
all_loop_vars = loop_vars + writable_loop_vars
template = dict(
init = "".join([init_template % dict(loop_var = loop_var) for loop_var in all_loop_vars]),
next = "".join([next_template % dict(loop_var = loop_var) for loop_var in all_loop_vars]),
cleanup = "".join([cleanup_template % dict(loop_var = loop_var) for loop_var in all_loop_vars]),
idefs = "".join([("_%(loop_var)s_dtype %(loop_var)s = " + acquire_template + ";\n")
% dict(loop_var = loop_var) for loop_var in loop_vars]),
odefs = "".join([("_%(loop_var)s_dtype& %(loop_var)s = " + acquire_template + ";\n")
% dict(loop_var = loop_var) for loop_var in writable_loop_vars]),
aliasdefs = "".join(["_%(v1)s_dtype %(v1)s = %(v2)s;\n" % dict(v1=v1, v2=v2)
for v1, v2 in aliases.items()]),
loopcode = loopcode
)
code = """
%(init)s
while (__elemwise_size--) {
%(idefs)s
%(odefs)s
%(aliasdefs)s
%(loopcode)s
%(next)s
}
%(cleanup)s
""" % template
return code
def elemwise_wrap(beforeloop, inloop, afterloop, loop_vars, writable_loop_vars, aliases):
general_init = "PyArrayIterObject* _%(loop_var)s_iter = (PyArrayIterObject*)PyArray_IterNew((PyObject*)_%(loop_var)s_array);\n"
# "if (_%(loop_var)s_iter == NULL) {\n" \
# " PyErr_SetString(PyExc_ValueError, \"Could not make an iterator over variable %(loop_var)s.\");\n" \
# " return 1;\n" \
# "}\n"
general_next = "PyArray_ITER_NEXT(_%(loop_var)s_iter);\n"
general_acquire = "*((_%(loop_var)s_dtype*)_%(loop_var)s_iter->dataptr)";
general_cleanup = "if (_%(loop_var)s_iter) Py_DECREF(_%(loop_var)s_iter);\n";
contiguous_init = "_%(loop_var)s_dtype* _%(loop_var)s_iter = (_%(loop_var)s_dtype*)PyArray_DATA(_%(loop_var)s_array);\n"
contiguous_next = "_%(loop_var)s_iter++;\n"
contiguous_acquire = "*_%(loop_var)s_iter"
contiguous_cleanup = ""
all_loop_vars = loop_vars + writable_loop_vars
template = dict(
v1 = (loop_vars + writable_loop_vars)[0],
beforeloop = beforeloop,
general_loop = elemwise_loopcode(
inloop,
general_init, general_next, general_acquire, general_cleanup,
loop_vars, writable_loop_vars, aliases),
contiguous_loop = elemwise_loopcode(
inloop,
contiguous_init, contiguous_next, contiguous_acquire, contiguous_cleanup,
loop_vars, writable_loop_vars, aliases),
contiguity_check = "".join(["all_c_contiguous &= PyArray_ISCARRAY(_%(loop_var)s_array);\n" \
"all_f_contiguous &= PyArray_ISFARRAY(_%(loop_var)s_array);\n" \
% dict(loop_var = loop_var)
for loop_var in all_loop_vars]),
afterloop = afterloop)
code = """
npy_intp __elemwise_size = PyArray_SIZE(_%(v1)s_array);
%(beforeloop)s
bool all_c_contiguous = 1;
bool all_f_contiguous = 1;
%(contiguity_check)s
if (all_c_contiguous || all_f_contiguous) {
%(contiguous_loop)s
}
else {
%(general_loop)s
}
%(afterloop)s
""" % template
print code
return code
def upcast(dtype, *dtypes):
z = numpy.zeros((), dtype = dtype)
for dtype in dtypes:
z = z + numpy.zeros((), dtype = dtype)
return z.dtype
class elemwise(omega_op):
......@@ -274,27 +362,44 @@ class elemwise(omega_op):
raise Exception("cannot infer an allocation policy automatically for variable " \
"%s because it is not part of the elementwise loop - "\
"please override the c_alloc method" % oname[1:])
model = None
shape, dtype = None, None
for iname, input in zip(inames, self.inputs):
if not iname.startswith("_"):
model = input.data
if model is None:
shape = input.data
if shape is None:
raise Exception("cannot infer an allocation policy automatically for output variables " \
"because there is no input variable in the loop from which to get the shape")
dtype = upcast(*[input.data.dtype
for iname, input in zip(inames, self.inputs)
if isinstance(input.data, numpy.ndarray)])
for output in self.outputs:
inplace_inputs = dmap.get(output, [])
if inplace_inputs:
assert len(inplace_inputs) == 1
output.data = inplace_inputs[0].data
else:
output.data = numpy.ndarray(model.shape, model.dtype)
output.data = numpy.ndarray(shape, dtype)
def _c_init(self):
(inames, onames), _1, _2, _3 = inspect.getargspec(self.c_init)
return (inames, onames), self.c_init(self.inputs, self.outputs)
def c_init(inputs, outputs):
return ""
def _c_foreach(self):
(inames, onames), _1, _2, _3 = inspect.getargspec(self.c_foreach)
return (inames, onames), self.c_foreach(self.inputs, self.outputs)
def c_foreach(inputs, outputs):
return ""
def _c_finalize(self):
(inames, onames), _1, _2, _3 = inspect.getargspec(self.c_finalize)
return (inames, onames), self.c_finalize(self.inputs, self.outputs)
def c_finalize(inputs, outputs):
return ""
......@@ -306,37 +411,81 @@ class elemwise(omega_op):
return "_" + name
try:
self.c_impl(self.inputs, self.outputs)
self._c_impl()
raise Exception("c_impl is not used by elemwise ops - define behavior in c_foreach instead")
except NotImplementedError:
pass
before = self.c_init(self.inputs, self.outputs)
during = self.c_foreach(self.inputs, self.outputs)
after = self.c_finalize(self.inputs, self.outputs)
# Get c_init, etc.'s argument names so we can declare them properly in the C code
spec_b = inspect.getargspec(self.c_init)
spec_d = inspect.getargspec(self.c_foreach)
spec_a = inspect.getargspec(self.c_finalize)
spec_b, before = self._c_init()
spec_d, during = self._c_foreach()
spec_a, after = self._c_finalize()
# Sanity check - apart from loop vars, variables are shared in the before/during/after parts
if before and spec_b != spec_d:
raise Exception("The input signature of c_init differs from the input signature of c_foreach.")
if after and spec_a != spec_d:
raise Exception("The input signature of c_finalize differs from the input signature of c_foreach.")
(inames, onames), _1, _2, _3 = spec_d
(inames, onames) = spec_d
aliases = {}
if isinstance(self, inplace):
dmap = self.destroy_map()
for oname, output in zip(onames, self.outputs):
if not oname.startswith("_"):
for input in dmap.get(output, []):
aliases[inames[self.inputs.index(input)]] = oname
behavior = elemwise_wrap(before, during, after,
[iname for iname in inames if not iname.startswith("_")],
[oname for oname in onames if not oname.startswith("_")])
[iname for iname in inames if not iname.startswith("_") and not iname in aliases],
[oname for oname in onames if not oname.startswith("_")],
aliases)
inames = [mangle(name) for name in inames]
onames = [mangle(name) for name in onames]
return cgen(self.__class__.__name__, behavior, inames, self.inputs, onames, self.outputs, converters)
return cgen(self.__class__.__name__, behavior, inames + onames, self.inputs + self.outputs, converters)
@classmethod
def inplace_version(cls, dmap = {0: 0}):
(inames, onames), _1, _2, _3 = inspect.getargspec(cls.c_foreach)
for i, oname in enumerate(onames):
if i in dmap:
assert not oname.startswith("_")
class C(cls, inplace):
def destroy_map(self):
ret = cls.destroy_map()
for output, input in self.dmap.items():
ret[self.outputs.index(output)] = [self.inputs.index(input)]
return ret
def _impl(self):
if self.impl is not cls.impl:
# If the user sets his own inplace operation, we use it
return cls._impl(self)
else:
res = cls._impl(self)
if isinstance(res, gof.Result):
res = [res]
else:
res = copy(res)
for output, input in dmap.items():
# The default implementation returned a copy, so we just
# overwrite the original input with the contents of that copy
# This is not meant to be efficient, only correct.
a = self.inputs[input].data
a[:] = res[output]
res[output] = a
if len(res) == 1:
return res[0]
else:
return res
if dmap == {0:0}:
C.__name__ = cls.__name__ + "_inplace" % dmap
else:
C.__name__ = cls.__name__ + "_inplace%s" % dmap
return C
def scalar_switch(normal_f, scalar_f, scalar_f_reverse = None):
......@@ -419,7 +568,7 @@ def assert_same_shapes(impl):
return ret
# Wrapper to ensure that the last input to impl is a scalar
def tensor_scalar_op(impl):
def tensor_scalar_impl(impl):
def ret(x, a):
if a.shape:
raise ValueError("The second argument to %s must be a scalar." % impl)
......@@ -449,83 +598,101 @@ def tensor_scalar_op(impl):
## Addition ##
class add(omega_op):
class add_elemwise(elemwise):
impl = assert_same_shapes(numpy.ndarray.__add__)
def grad(x, y, gz):
return gz
def alloc(x, y):
return numpy.ndarray(x.shape, dtype = x.dtype)
def c_impl(x, y, z):
return """
for (int i = 0; i < z.ncols; i++) {
for (int j = 0; j < z.nrows; j++) {
z(i, j) = x(i, j) + y(i, j);
}
}
"""
def c_foreach((x, y), (z, )):
return "z = x + y;"
class proto_add_elemwise(omega_op):
def grad(x, y, gz):
return gz
iadd_elemwise = add_elemwise.inplace_version()
iadd_elemwise.impl = assert_same_shapes(numpy.ndarray.__iadd__)
class add_elemwise(proto_add_elemwise):
impl = assert_same_shapes(numpy.ndarray.__add__)
class iadd_elemwise(proto_add_elemwise, inplace):
impl = assert_same_shapes(numpy.ndarray.__iadd__)
# class proto_add_elemwise(omega_op):
# def grad(x, y, gz):
# return gz
# class add_elemwise(proto_add_elemwise):
# impl = assert_same_shapes(numpy.ndarray.__add__)
# class iadd_elemwise(proto_add_elemwise, inplace):
# impl = assert_same_shapes(numpy.ndarray.__iadd__)
class tensor_scalar_op(elemwise):
def c_init((x, _a), (z, )):
return "_a_dtype a = _a[0];"
def _c_foreach(self):
return (('x', '_a'), ('z', )), "z = %s;" % self.c_operation
class proto_add_scalar(omega_op):
class add_scalar(tensor_scalar_op):
impl = tensor_scalar_impl(numpy.ndarray.__add__)
def grad(x, a, gz):
return gz, sum(gz)
c_expr = "x + a"
class add_scalar(proto_add_scalar):
impl = tensor_scalar_op(numpy.ndarray.__add__)
# def c_impl(x, s, z):
# """
# if (*__z == NULL) {
# *__z = new ndarray
# }
# ndarray& z = **__z
# """
# return """
# z.resize_like(x);
# for (int i = 0; i < z.size(); i++) {
# z[i] = x[i] * s;
# }
# return z;
# """
class iadd_scalar(proto_add_scalar, inplace):
impl = tensor_scalar_op(numpy.ndarray.__iadd__)
class proto_twice(omega_op):
iadd_scalar = add_scalar.inplace_version()
iadd_scalar.impl = tensor_scalar_impl(numpy.ndarray.__iadd__)
# class proto_add_scalar(omega_op):
# def grad(x, a, gz):
# return gz, sum(gz)
# class add_scalar(proto_add_scalar):
# impl = tensor_scalar_impl(numpy.ndarray.__add__)
# class iadd_scalar(proto_add_scalar, inplace):
# impl = tensor_scalar_impl(numpy.ndarray.__iadd__)
class twice(elemwise):
def grad(x, gz):
return scale(gz, 2.0)
class twice(proto_twice):
def impl(x):
return x + x
def c_foreach((x, ), (z, )):
"z = x + x;"
class itwice(proto_twice, inplace):
def impl(x):
x += x
return x
itwice = twice.inplace_version()
# class proto_twice(omega_op):
# def grad(x, gz):
# return scale(gz, 2.0)
# class twice(proto_twice):
# def impl(x):
# return x + x
# class itwice(proto_twice, inplace):
# def impl(x):
# x += x
# return x
## Subtraction ##
class proto_sub_elemwise(omega_op):
class sub_elemwise(elemwise):
impl = assert_same_shapes(numpy.ndarray.__sub__)
def grad(x, y, gz):
return gz, -gz
def c_foreach((x, y), (z, )):
return "z = x - y;"
class sub_elemwise(proto_sub_elemwise):
impl = assert_same_shapes(numpy.ndarray.__sub__)
isub_elemwise = sub_elemwise.inplace_version()
isub_elemwise.impl = assert_same_shapes(numpy.ndarray.__isub__)
# class proto_sub_elemwise(omega_op):
# def grad(x, y, gz):
# return gz, -gz
# class sub_elemwise(proto_sub_elemwise):
# impl = assert_same_shapes(numpy.ndarray.__sub__)
class isub_elemwise(proto_sub_elemwise, inplace):
impl = assert_same_shapes(numpy.ndarray.__isub__)
# class isub_elemwise(proto_sub_elemwise, inplace):
# impl = assert_same_shapes(numpy.ndarray.__isub__)
def sub_scalar_r(x, a):
return add_scalar(x, -a)
......@@ -539,67 +706,127 @@ def isub_scalar_r(x, a):
## Element-wise multiplication ##
class proto_mul_elemwise(omega_op):
class mul_elemwise(elemwise):
impl = assert_same_shapes(numpy.ndarray.__mul__)
def grad(x, y, gz):
return mul(y, gz), mul(x, gz)
def c_foreach((x, y), (z, )):
return "z = x * y;"
class mul_elemwise(proto_mul_elemwise):
impl = assert_same_shapes(numpy.ndarray.__mul__)
imul_elemwise = mul_elemwise.inplace_version()
imul_elemwise.impl = assert_same_shapes(numpy.ndarray.__imul__)
# class proto_mul_elemwise(omega_op):
# def grad(x, y, gz):
# return mul(y, gz), mul(x, gz)
class imul_elemwise(proto_mul_elemwise, inplace):
impl = assert_same_shapes(numpy.ndarray.__imul__)
# class mul_elemwise(proto_mul_elemwise):
# impl = assert_same_shapes(numpy.ndarray.__mul__)
# class imul_elemwise(proto_mul_elemwise, inplace):
# impl = assert_same_shapes(numpy.ndarray.__imul__)
class proto_scale(omega_op):
class scale(tensor_scalar_op):
impl = tensor_scalar_impl(numpy.ndarray.__mul__)
def grad(x, a, gz):
return scale(a, gz), sum(mul_elemwise(x, gz))
c_expr = "x * a"
iscale = scale.inplace_version()
iscale.impl = tensor_scalar_impl(numpy.ndarray.__imul__)
class scale(proto_scale):
impl = tensor_scalar_op(numpy.ndarray.__mul__)
# class proto_scale(omega_op):
# def grad(x, a, gz):
# return scale(a, gz), sum(mul_elemwise(x, gz))
class iscale(proto_scale, inplace):
impl = tensor_scalar_op(numpy.ndarray.__imul__)
# class scale(proto_scale):
# impl = tensor_scalar_impl(numpy.ndarray.__mul__)
# class iscale(proto_scale, inplace):
# impl = tensor_scalar_impl(numpy.ndarray.__imul__)
class proto_sqr(omega_op):
class sqr(elemwise):
def grad(x, gz):
return scale(mul_elemwise(x, gz), 2.0)
def impl(x):
return x * x
def c_foreach((x, ), (z, )):
"z = x * x;"
class sqr(proto_sqr):
impl = lambda x: numpy.multiply(x, x)
isqr = sqr.inplace_version()
isqr.impl = lambda x: x.__imul__(x)
class isqr(proto_sqr, inplace):
impl = lambda x: x.__imul__(x)
# class proto_sqr(omega_op):
# def grad(x, gz):
# return scale(mul_elemwise(x, gz), 2.0)
class proto_sqrt(omega_op):
# class sqr(proto_sqr):
# impl = lambda x: numpy.multiply(x, x)
# class isqr(proto_sqr, inplace):
# impl = lambda x: x.__imul__(x)
class sqrt(elemwise):
def grad(x, gz):
return scale(div(gz, sqrt(x)), 0.5)
class sqrt(proto_sqrt):
impl = numpy.sqrt
def c_foreach((x, ), (z, )):
"z = pow(x, 0.5);"
class isqrt(proto_sqrt, inplace):
impl = lambda x: x.__ipow__(0.5)
isqrt = sqrt.inplace_version()
isqrt.impl = lambda x: x.__ipow__(0.5)
# class proto_sqrt(omega_op):
# def grad(x, gz):
# return scale(div(gz, sqrt(x)), 0.5)
# class sqrt(proto_sqrt):
# impl = numpy.sqrt
# class isqrt(proto_sqrt, inplace):
# impl = lambda x: x.__ipow__(0.5)
## Exponentiation ##
class exp(omega_op):
class exp(elemwise):
impl = numpy.exp
def c_foreach((x, ), (z, )):
return "z = exp(x);"
# class exp(omega_op):
# impl = numpy.exp
## Element-wise division ##
class proto_div_elemwise(omega_op):
class div_elemwise(elemwise):
impl = assert_same_shapes(numpy.ndarray.__div__)
def grad(x, y, gz):
return div(gz, y), -div(mul(x, gz), sqr(y))
def c_foreach((x, y), (z, )):
return "z = x / y;"
class div_elemwise(proto_div_elemwise):
impl = assert_same_shapes(numpy.ndarray.__div__)
idiv_elemwise = div_elemwise.inplace_version()
idiv_elemwise.impl = assert_same_shapes(numpy.ndarray.__idiv__)
# class proto_div_elemwise(omega_op):
# def grad(x, y, gz):
# return div(gz, y), -div(mul(x, gz), sqr(y))
class idiv_elemwise(proto_div_elemwise, inplace):
impl = assert_same_shapes(numpy.ndarray.__idiv__)
# class div_elemwise(proto_div_elemwise):
# impl = assert_same_shapes(numpy.ndarray.__div__)
# class idiv_elemwise(proto_div_elemwise, inplace):
# impl = assert_same_shapes(numpy.ndarray.__idiv__)
def div_scalar_r(x, a):
return scale(x, inv_elemwise(a))
......@@ -614,28 +841,48 @@ def idiv_scalar_r(x, a):
## Scaling ##
class proto_neg(omega_op):
class neg(elemwise):
impl = numpy.ndarray.__neg__
def grad(x, gz):
return -gz
def c_foreach((x, ), (z, )):
return "z = -x;"
class neg(proto_neg):
impl = numpy.ndarray.__neg__
ineg = neg.inplace_version()
ineg.impl = lambda x: x.__imul__(-1)
class ineg(proto_neg, inplace):
impl = lambda x: x.__imul__(-1)
# class proto_neg(omega_op):
# def grad(x, gz):
# return -gz
# class neg(proto_neg):
# impl = numpy.ndarray.__neg__
# class ineg(proto_neg, inplace):
# impl = lambda x: x.__imul__(-1)
class proto_inv_elemwise(omega_op):
def grad(x, gz):
raise NotImplemented
class inv_elemwise(omega_op):
class inv_elemwise(elemwise):
impl = lambda x: 1 / x
def grad(x, gz):
return -gz
def c_foreach((x, ), (z, )):
return "z = 1 / x;"
class iinv_elemwise(omega_op, inplace):
def impl(x):
x[:] = 1 / x
iinv_elemwise = inv_elemwise.inplace_version()
# class proto_inv_elemwise(omega_op):
# def grad(x, gz):
# raise NotImplemented
# class inv_elemwise(omega_op):
# impl = lambda x: 1 / x
# class iinv_elemwise(omega_op, inplace):
# def impl(x):
# x[:] = 1 / x
## Dot product ##
......@@ -666,46 +913,116 @@ class array_copy(omega_op):
## Power ##
class proto_pow(omega_op):
class pow_elemwise(elemwise):
impl = assert_same_shapes(numpy.ndarray.__pow__)
def grad(x, s, gz):
return gz * s * (pow_elemwise(x, s-1.0))
def c_foreach((x, s), (z, )):
return "z = pow(x, s)"
class pow_elemwise(proto_pow):
impl = assert_same_shapes(numpy.ndarray.__pow__)
ipow_elemwise = pow_elemwise.inplace_version()
ipow_elemwise.impl = assert_same_shapes(numpy.ndarray.__ipow__)
# class proto_pow(omega_op):
# def grad(x, s, gz):
# return gz * s * (pow_elemwise(x, s-1.0))
class ipow_elemwise(proto_pow, inplace):
impl = assert_same_shapes(numpy.ndarray.__ipow__)
# class pow_elemwise(proto_pow):
# impl = assert_same_shapes(numpy.ndarray.__pow__)
# class ipow_elemwise(proto_pow, inplace):
# impl = assert_same_shapes(numpy.ndarray.__ipow__)
class pow_scalar_l(omega_op):
impl = tensor_scalar_op(numpy.ndarray.__pow__)
class pow_scalar_l(tensor_scalar_op):
impl = tensor_scalar_impl(lambda x, y: numpy.ndarray.__pow__(y, x))
def grad(x, s, gz):
return gz * x * (pow_scalar_l(s,x-1.0))
c_expr = "pow(a, x)"
class pow_scalar_r(omega_op):
impl = tensor_scalar_op(numpy.ndarray.__pow__)
class pow_scalar_r(tensor_scalar_op):
impl = tensor_scalar_impl(numpy.ndarray.__pow__)
def grad(x, s, gz):
return gz * s * (pow_scalar_r(x,s-1.0))
c_expr = "pow(x, a)"
class ipow_scalar_r(omega_op, inplace):
impl = tensor_scalar_op(numpy.ndarray.__ipow__)
def grad(x, s, gz):
return gz * s * (pow_scalar_r(x,s-1.0))
ipow_scalar_r = pow_scalar_r.inplace_version()
ipow_scalar_r.impl = tensor_scalar_impl(numpy.ndarray.__ipow__)
# class pow_scalar_l(omega_op):
# impl = tensor_scalar_impl(numpy.ndarray.__pow__)
# def grad(x, s, gz):
# return gz * x * (pow_scalar_l(s,x-1.0))
# class pow_scalar_r(omega_op):
# impl = tensor_scalar_impl(numpy.ndarray.__pow__)
# def grad(x, s, gz):
# return gz * s * (pow_scalar_r(x,s-1.0))
# class ipow_scalar_r(omega_op, inplace):
# impl = tensor_scalar_impl(numpy.ndarray.__ipow__)
# def grad(x, s, gz):
# return gz * s * (pow_scalar_r(x,s-1.0))
## Others ##
class minmax(omega_op):
class minmax(elemwise):
nout = 2
def impl(x):
return x.min, x.max
def c_alloc((x, ), (_min, _max)):
_min.data = numpy.ndarray((), x.dtype)
_max.data = numpy.ndarray((), x.dtype)
def c_init((x, ), (_min, _max)):
return """
_x_dtype min = _x[0];
_x_dtype max = _x[0];
"""
def c_foreach((x, ), (_min, _max)):
return """
if (x < min) min = x;
if (x > max) max = x;
"""
def c_finalize((x, ), (_min, _max)):
return """
_min[0] = min;
_max[0] = max;
"""
# class minmax(omega_op):
# nout = 2
# def impl(x):
# return x.min, x.max
class fill(omega_op):
class fill(elemwise):
impl = lambda model, value: (model * 0) + value
def c_init((model, _value), (z, )):
return "_z_dtype value = _value[0];"
def c_foreach((model, _value), (z, )):
return "z = value;"
class sum(omega_op):
impl = numpy.sum
def grad(x, gz):
return fill(x, gz)
ifill = fill.inplace_version()
# class fill(omega_op):
# impl = lambda model, value: (model * 0) + value
class sum(elemwise):
def c_alloc((x, ), (_sum, )):
_sum.data = numpy.ndarray((), dtype = x.data.dtype)
def c_init((x, ), (_sum, )):
return "_sum[0] = 0;"
def c_foreach((x, ), (_sum, )):
return "_sum[0] += x;"
# class sum(omega_op):
# impl = numpy.sum
# def grad(x, gz):
# return fill(x, gz)
## Array slicing ##
......
try:
from cutils_ext import *
except ImportError:
from scipy import weave
single_runner = """
if (!PyCObject_Check(py_cthunk)) {
PyErr_SetString(PyExc_ValueError,
"Argument to run_cthunk must be a PyCObject returned by the c_thunk method of an omega_op.");
return NULL;
}
int (*fn)(void*) = reinterpret_cast<int (*)(void*)>(PyCObject_AsVoidPtr(py_cthunk));
void* it = PyCObject_GetDesc(py_cthunk);
int failure = fn(it);
if (failure) {
return NULL;
}
"""
cthunk = object()
mod = weave.ext_tools.ext_module('cutils_ext')
mod.add_function(weave.ext_tools.ext_function('run_cthunk', single_runner, ['cthunk']))
mod.compile()
from cutils_ext import *
......@@ -238,7 +238,7 @@ class PythonOp(Op):
if input not in exc:
self.check_input(input)
try:
results = self.impl(*[input.data for input in self.inputs])
results = self._impl()
except Exception, e:
print "Error in %s: %s" % (self, e)
raise
......@@ -250,7 +250,7 @@ class PythonOp(Op):
output.set_value(result)
def _perform(self):
results = self.impl(*[input.data for input in self.inputs])
results = self._impl()
if self.nout == 1:
self.out.set_value(results)
else:
......@@ -267,6 +267,9 @@ class PythonOp(Op):
raise Exception("Uncomputed input: %s in %s" % (input, self))
self.perform()
def _impl(self):
return self.impl(*[input.data for input in self.inputs])
def impl(*args):
raise NotImplementedError("This op has no implementation.")
......
......@@ -13,28 +13,33 @@ class omega_type_converter_extension:
return [(tvars['c_type'], tvars['name'], tvars['var_convert'])]
def format_provide(self, x):
return '%s %s = %s;' % x
return '%s %s = %s;\n' % x
def declaration_code(self, templatize = 0, inline = 0):
tvars = self.template_vars(inline=inline)
code = '%(py_var)s = %(var_lookup)s;\n' % tvars
code += '\n'.join([self.format_provide(export) for export in self.provides()])
code += ''.join([self.format_provide(export) for export in self.provides()])
return code
def struct_init_code(self):
return "Py_INCREF(py_%s);" % self.name
def struct_cleanup_code(self):
return "Py_DECREF(py_%s);" % self.name
def struct_members_code(self):
return '\n'.join(['%s_type %s;' % (name, name) for c_type, name, init in self.provides()])
res = "PyObject* py_%s;\n" % self.name
return res + ''.join(['%s_type %s;\n' % (name, name) for c_type, name, init in self.provides()])
def struct_import_code(self):
return '\n'.join(['__STRUCT_P->%s = %s;' % (name, name) for c_type, name, init in self.provides()])
res = "__STRUCT_P->py_%s = py_%s;\n" % (self.name, self.name)
return res + ''.join(['__STRUCT_P->%s = %s;\n' % (name, name) for c_type, name, init in self.provides()])
def struct_support_code(self):
return ""
def struct_typedefs(self):
return "\n".join(["typedef %s %s_type;" % (c_type, name) for c_type, name, init in self.provides()])
# def struct_template_types(self):
# return [("typename %s_type" % name, ) for c_type, name, init in self.provides()]
return ''.join(["typedef %s %s_type;\n" % (c_type, name) for c_type, name, init in self.provides()])
class int_converter(omega_type_converter_extension, c_spec.int_converter):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论