提交 430999a9 authored 作者: Olivier Breuleux's avatar Olivier Breuleux

inline c stuff, elemwise!

上级 321230ad
...@@ -2,8 +2,13 @@ ...@@ -2,8 +2,13 @@
import gof import gof
from gof import current_mode, set_mode, build_mode, eval_mode, build_eval_mode, pop_mode, UNCOMPUTED, UNDEFINED, PythonR from gof import current_mode, set_mode, build_mode, eval_mode, build_eval_mode, pop_mode, UNCOMPUTED, UNDEFINED, PythonR
import type_spec
import numpy import numpy
import weakref import weakref
import inspect
import md5
from scipy import weave
from copy import copy as pycopy from copy import copy as pycopy
...@@ -135,6 +140,72 @@ def literal(x): ...@@ -135,6 +140,72 @@ def literal(x):
inplace = gof.Destroyer inplace = gof.Destroyer
view = gof.Viewer view = gof.Viewer
def cgen(name, behavior, inames, ivals, onames, ovals, converters = None):
if not converters:
converters = type_spec.default
for converter in converters:
assert isinstance(converter, type_spec.omega_type_converter_extension)
d = {}
for name, value in zip(inames + onames, ivals + ovals):
d[name] = value.data
# print inames + onames
# print d
# print [x.__class__ for x in converters]
specs = weave.ext_tools.assign_variable_types(inames + onames, d, type_converters = converters) #, auto_downcast = 0)
template = {}
template['name'] = name
template['code'] = behavior
template['members'] = "\n".join([spec.struct_members_code() for spec in specs])
template['decl'] = "\n".join([spec.struct_declaration_code() for spec in specs])
# types = [spec.struct_template_types() for spec in specs]
# template['types'] = ", ".join([", ".join([c_type for c_type, name, init in spec.provides()]) for spec in specs])
# template['typenames'] = ", ".join([spec.struct_template_code() for spec in specs])
template['support'] = "\n".join([spec.struct_support_code() for spec in specs])
template['typedefs'] = "\n".join([spec.struct_typedefs() for spec in specs])
template['struct_contents'] = """
%(typedefs)s
%(members)s
%(support)s
void execute(void) {
%(decl)s
%(code)s
}
""" % template
template['md5'] = md5.md5(template['struct_contents']).hexdigest()
template['struct_name'] = "_omega_%(name)s_%(md5)s" % template
struct = "struct %(struct_name)s { %(struct_contents)s\n};" % template
# code = "_omega_%(name)s<%(types)s>* __STRUCT_P = new _omega_%(name)s();\n" % template
# code = "_omega_%(name)s<%(types)s>* __STRUCT_P = &_omega_%(name)s<%(types)s>();\n" % template
code = "%(struct_name)s* __STRUCT_P = &%(struct_name)s();\n" % template
code += "\n".join([spec.struct_import_code() for spec in specs])
code += "\n__STRUCT_P->execute();\n"
code += "return_val = 10;"
code += "\n//%(md5)s" % template
print struct
print code
for spec in specs:
print spec.declaration_code()
print d
res = weave.inline(code, inames+onames, local_dict = d, global_dict = {}, support_code = struct, type_converters = converters)
return res, None
class omega_op(gof.PythonOp): class omega_op(gof.PythonOp):
forbid_broadcast = False forbid_broadcast = False
...@@ -153,6 +224,12 @@ class omega_op(gof.PythonOp): ...@@ -153,6 +224,12 @@ class omega_op(gof.PythonOp):
c_impl = c_impl.im_func c_impl = c_impl.im_func
cls.c_impl = staticmethod(c_impl) cls.c_impl = staticmethod(c_impl)
# make c_alloc a static method
c_alloc = cls.c_alloc
if hasattr(c_alloc, 'im_func'):
c_alloc = c_alloc.im_func
cls.c_alloc = staticmethod(c_alloc)
# # adjust impl # # adjust impl
# if cls.forbid_broadcast: # if cls.forbid_broadcast:
# cls.impl = assert_same_shapes(cls.impl) # cls.impl = assert_same_shapes(cls.impl)
...@@ -177,42 +254,194 @@ class omega_op(gof.PythonOp): ...@@ -177,42 +254,194 @@ class omega_op(gof.PythonOp):
def grad(*args): def grad(*args):
return UNDEFINED return UNDEFINED
def __create_c_code(self): def create_c_code(self, converters = None):
behavior = self.c_impl(self.inputs, self.outputs) behavior = self.c_impl(self.inputs, self.outputs)
(inames, onames), _1, _2, _3 = inspect.getargspec(self.c_impl) (inames, onames), _1, _2, _3 = inspect.getargspec(self.c_impl)
struct = """ return cgen(self.__class__.__name__, behavior, inames, self.inputs, onames, self.outputs, converters)
struct _omega_%(name)s {
_omega_%(name)s() {}
## return code, struct
void extract(void) {
} # behavior = self.c_impl(self.inputs, self.outputs)
void execute(void) { # (inames, onames), _1, _2, _3 = inspect.getargspec(self.c_impl)
%(code)s # d = {}
} # for name, value in zip(inames + onames, self.inputs + self.outputs):
void sync(void) { # d[name] = value.data
# converters = [omega_array_converter()] + weave.converters.default
} # specs = assign_variable_types(inames + onames, d, type_converters = converters) #, auto_downcast = 0)
};
""" % self.__class__.__name__, behavior # # itypes = [isinstance(input.data, numpy.ndarray) and num_to_c_types[input.data.dtype.char] for input in self.inputs]
# # otypes = [num_to_c_types[output.data.dtype.char] for output in self.outputs]
# tvars_list = [spec.template_vars() for spec in specs]
# template = {}
# template['name'] = self.__class__.__name__
# template['code'] = behavior
# template['members'] = ";\n".join([" %(name)s"])
# template['decl'] = ""
# template['typespecs'] = ", ".join(["%(name)s_type" % spec.name for spec in specs] +
# ["%(name)s_num_type" % tvars['name'] for tvars in tvars_list if 'num_type' in tvars])
# struct = """
# template<%(typespecs)s>
# struct _omega_%(name)s {
# %(members)s
# _omega_%(name)s() {}
# void execute(void) {
# %(decl)s
# %(code)s
# }
# };
# """ % template
def _c_alloc(self):
self.c_alloc(self.inputs, self.outputs)
def c_alloc(self): def c_alloc(inputs, outputs):
raise Exception("Cannot allocate output arrays for this Op.") raise NotImplementedError()
def c_impl(inputs, outputs): def c_impl(inputs, outputs):
raise NotImplementedError() raise NotImplementedError()
def c_thunk(self): def c_thunk(self):
self.c_alloc() self._c_alloc()
if self.c_module: if self.c_module:
a a
else: else:
aaaaaa
def c_perform(self): def c_perform(self):
self.c_thunk()() self.c_thunk()()
def elemwise_wrap(beforeloop, inloop, afterloop, inames, onames):
return """
%(beforeloop)s
for (int i = 0; i < N_%(v1)s[0]; i++) {
for (int j = 0; j < N_%(v1)s[1]; j++) {
%(idefs)s
%(odefs)s
%(inloop)s
}
}
%(afterloop)s
""" % dict(v1 = (inames + onames)[0],
idefs = "\n".join(["_%s_dtype %s = _%s2(i, j);" % (iname, iname, iname.upper()) for iname in inames if not iname.startswith("_")]),
odefs = "\n".join(["_%s_dtype& %s = _%s2(i, j);" % (oname, oname, oname.upper()) for oname in onames if not oname.startswith("_")]),
beforeloop = beforeloop,
inloop = inloop,
afterloop = afterloop)
class elemwise(omega_op):
@staticmethod
def __clsinit__(cls, name, bases, dct):
# make c_init a static method
c_init = cls.c_init
if hasattr(c_init, 'im_func'):
c_init = c_init.im_func
cls.c_init = staticmethod(c_init)
# make c_foreach a static method
c_foreach = cls.c_foreach
if hasattr(c_foreach, 'im_func'):
c_foreach = c_foreach.im_func
cls.c_foreach = staticmethod(c_foreach)
# make c_finalize a static method
c_finalize = cls.c_finalize
if hasattr(c_finalize, 'im_func'):
c_finalize = c_finalize.im_func
cls.c_finalize = staticmethod(c_finalize)
# # adjust impl
# if cls.forbid_broadcast:
# cls.impl = assert_same_shapes(cls.impl)
# make impl a static method
omega_op.__clsinit__(cls, name, bases, dct)
def _c_alloc(self):
if isinstance(self, inplace):
dmap = self.destroy_map()
else:
dmap = {}
try:
return self.c_alloc(self.inputs, self.outputs)
except NotImplementedError:
(inames, onames), _1, _2, _3 = inspect.getargspec(self.c_foreach)
for oname in onames:
if oname.startswith("_"):
raise Exception("cannot infer an allocation policy automatically for variable " \
"%s because it is not part of the elementwise loop - "\
"please override the c_alloc method" % oname[1:])
model = None
for iname, input in zip(inames, self.inputs):
if not iname.startswith("_"):
model = input.data
if model is None:
raise Exception("cannot infer an allocation policy automatically for output variables " \
"because there is no input variable in the loop from which to get the shape")
for output in self.outputs:
inplace_inputs = dmap.get(output, [])
if inplace_inputs:
assert len(inplace_inputs) == 1
output.data = inplace_inputs[0].data
else:
output.data = numpy.ndarray(model.shape, model.dtype)
def c_init(inputs, outputs):
return ""
def c_foreach(inputs, outputs):
return ""
def c_finalize(inputs, outputs):
return ""
def create_c_code(self, converters = None):
def mangle(name):
if name.startswith("_"):
return name#[1:]
else:
return "_" + name
try:
self.c_impl(self.inputs, self.outputs)
raise Exception("c_impl is not used by elemwise ops - define behavior in c_foreach instead")
except NotImplementedError:
pass
before = self.c_init(self.inputs, self.outputs)
during = self.c_foreach(self.inputs, self.outputs)
after = self.c_finalize(self.inputs, self.outputs)
spec_b = inspect.getargspec(self.c_init)
spec_d = inspect.getargspec(self.c_foreach)
spec_a = inspect.getargspec(self.c_finalize)
if before and spec_b != spec_d:
raise Exception("The input signature of c_init differs from the input signature of c_foreach.")
if after and spec_a != spec_d:
raise Exception("The input signature of c_finalize differs from the input signature of c_foreach.")
(inames, onames), _1, _2, _3 = spec_d
behavior = elemwise_wrap(before, during, after, inames, onames)
inames = [mangle(name) for name in inames]
onames = [mangle(name) for name in onames]
return cgen(self.__class__.__name__, behavior, inames, self.inputs, onames, self.outputs, converters)
def scalar_switch(normal_f, scalar_f, scalar_f_reverse = None): def scalar_switch(normal_f, scalar_f, scalar_f_reverse = None):
def f(x, y): def f(x, y):
x, y = wrap(x), wrap(y) x, y = wrap(x), wrap(y)
......
from scipy.weave import c_spec, standard_array_spec
class omega_type_converter_extension:
def provides(self):
"""
Returns a list of (c_type, name, init_code) tuples that represent variables
the type converter provides to the user's code.
"""
tvars = self.template_vars()
return [(tvars['c_type'], tvars['name'], tvars['var_convert'])]
def format_provide(self, x):
return '%s %s = %s;' % x
def declaration_code(self, templatize = 0, inline = 0):
tvars = self.template_vars(inline=inline)
code = '%(py_var)s = %(var_lookup)s;\n' % tvars
code += '\n'.join([self.format_provide(export) for export in self.provides()])
return code
def struct_members_code(self):
return '\n'.join(['%s_type %s;' % (name, name) for c_type, name, init in self.provides()])
def struct_import_code(self):
return '\n'.join(['__STRUCT_P->%s = %s;' % (name, name) for c_type, name, init in self.provides()])
def struct_support_code(self):
return ""
def struct_declaration_code(self):
return ""
def struct_typedefs(self):
return "\n".join(["typedef %s %s_type;" % (c_type, name) for c_type, name, init in self.provides()])
# def struct_template_types(self):
# return [("typename %s_type" % name, ) for c_type, name, init in self.provides()]
class int_converter(omega_type_converter_extension, c_spec.int_converter):
pass
class float_converter(omega_type_converter_extension, c_spec.float_converter):
pass
class complex_converter(omega_type_converter_extension, c_spec.complex_converter):
pass
class unicode_converter(omega_type_converter_extension, c_spec.unicode_converter):
def provides(self):
tvars = self.template_vars()
return omega_type_converter_extension.provides() + [('int', 'N%(name)s' % tvars, 'PyUnicode_GET_SIZE(%(py_var)s)' % tvars)]
class string_converter(omega_type_converter_extension, c_spec.string_converter):
pass
class list_converter(omega_type_converter_extension, c_spec.list_converter):
pass
class dict_converter(omega_type_converter_extension, c_spec.dict_converter):
pass
class tuple_converter(omega_type_converter_extension, c_spec.tuple_converter):
pass
class file_converter(omega_type_converter_extension, c_spec.file_converter):
pass
class instance_converter(omega_type_converter_extension, c_spec.instance_converter):
pass
class array_converter(omega_type_converter_extension, standard_array_spec.array_converter):
def provides(self):
tvars = self.template_vars()
ret = []
ret.append((tvars['c_type'], tvars['array_name'], tvars['var_convert']))
ret.append(('npy_intp*', 'N%(name)s' % tvars, '%(array_name)s->dimensions' % tvars))
ret.append(('npy_intp*', 'S%(name)s' % tvars, '%(array_name)s->strides' % tvars))
ret.append(('int', 'D%(name)s' % tvars, '%(array_name)s->nd' % tvars))
ret.append(('%(num_type)s*' % tvars, '%(name)s' % tvars, '(%(num_type)s*) %(array_name)s->data' % tvars))
return ret
def declaration_code(self, templatize = 0, inline = 0):
tvars = self.template_vars(inline=inline)
tvars['cap_name'] = self.name.upper()
prov = self.provides()
code = '%(py_var)s = %(var_lookup)s;\n' % tvars
code += "\n".join(self.format_provide(export) for export in prov[:1])
code += '\nconversion_numpy_check_type(%(array_name)s,%(num_typecode)s,"%(name)s");\n' % tvars
code += "\n".join(self.format_provide(export) for export in prov[1:])
return code
def struct_support_code(self, templatize = 0, inline = 0):
tvars = self.template_vars(inline=inline)
cap_name = self.name.upper()
tvars['cap_name'] = cap_name
code = 'inline %(num_type)s& %(cap_name)s1(int i) { return (*((%(num_type)s*)(%(array_name)s->data + (i)*S%(name)s[0])));}\n' \
'inline %(num_type)s& %(cap_name)s2(int i, int j) { return (*((%(num_type)s*)(%(array_name)s->data + (i)*S%(name)s[0] + (j)*S%(name)s[1])));}\n' \
'inline %(num_type)s& %(cap_name)s3(int i, int j, int k) { return (*((%(num_type)s*)(%(array_name)s->data + (i)*S%(name)s[0] + (j)*S%(name)s[1] + (k)*S%(name)s[2])));}\n' \
'inline %(num_type)s& %(cap_name)s4(int i, int j, int k, int l) { return (*((%(num_type)s*)(%(array_name)s->data + (i)*S%(name)s[0] + (j)*S%(name)s[1] + (k)*S%(name)s[2] + (l)*S%(name)s[3])));}\n'
return code % tvars
def struct_typedefs(self):
tvars = self.template_vars()
return omega_type_converter_extension.struct_typedefs(self) + "\n" + "typedef %(num_type)s %(name)s_dtype;" % tvars
# return "\n".join(["typedef %s %s_type;" % (c_type, name)])
# def struct_template_types(self):
# tvars = self.template_vars()
# return [("typename %s_type" % name, c_type) for c_type, name, init in self.provides()] + [("typename %s_dtype" % self.name, tvars['num_type'])]
default = [array_converter(),
int_converter(),
float_converter(),
complex_converter(),
unicode_converter(),
string_converter(),
list_converter(),
dict_converter(),
tuple_converter(),
file_converter(),
instance_converter()]
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论