提交 58a674a3 authored 作者: Olivier Breuleux's avatar Olivier Breuleux

yeaaaaaa

上级 57c452cf
...@@ -5,3 +5,4 @@ syntax: glob ...@@ -5,3 +5,4 @@ syntax: glob
*.so *.so
*~ *~
\#*\# \#*\#
compiled/*.cpp
...@@ -10,9 +10,10 @@ from copy import copy ...@@ -10,9 +10,10 @@ from copy import copy
def experimental_linker(env, target = None): def experimental_linker(env, target = None):
def fetch(op): def fetch(op):
try: try:
thunk = op.c_thunk_creator() factory = op.c_thunk_factory()
# print "yea %s" % op # print "yea %s" % op
return lambda: cutils.run_cthunk(thunk()) thunk = factory()
return lambda: cutils.run_cthunk(thunk)
except NotImplementedError: except NotImplementedError:
# print "nope %s" % op # print "nope %s" % op
return op._perform return op._perform
...@@ -25,19 +26,19 @@ def experimental_linker(env, target = None): ...@@ -25,19 +26,19 @@ def experimental_linker(env, target = None):
# print 'ospecs: ', [output.spec for output in op.outputs] # print 'ospecs: ', [output.spec for output in op.outputs]
thunks = [fetch(op) for op in order] thunks = [fetch(op) for op in order]
def ret(): def ret():
for thunk, op in zip(thunks, order): # print "=================================================="
# for thunk, op in zip(thunks, order):
# print op # print op
# print 'in: ', [id(input.data) for input in op.inputs] # print 'in: ', [id(input.data) for input in op.inputs]
# print 'out:', [id(output.data) for output in op.outputs] # print 'out:', [id(output.data) for output in op.outputs]
thunk()
# for thunk in thunks:
# thunk() # thunk()
for thunk in thunks:
thunk()
if not target: if not target:
return ret return ret
else: else:
raise NotImplementedError("Cannot write thunk representation to a file.") raise NotImplementedError("Cannot write thunk representation to a file.")
class profile_linker: class profile_linker:
def __init__(self, env): def __init__(self, env):
self.order = env.toposort() self.order = env.toposort()
......
...@@ -40,7 +40,9 @@ def input(x): ...@@ -40,7 +40,9 @@ def input(x):
if isinstance(x, numpy.ndarray): if isinstance(x, numpy.ndarray):
return NumpyR(x) return NumpyR(x)
elif isinstance(x, (int, float)): elif isinstance(x, (int, float)):
return NumpyR(numpy.array(x)) z = numpy.zeros((), dtype = 'float32')
z += x
return NumpyR(z)
elif isinstance(x, gof.Result): elif isinstance(x, gof.Result):
raise TypeError("%s is already a result." % x) raise TypeError("%s is already a result." % x)
else: else:
...@@ -192,23 +194,27 @@ class omega_op(gof.PythonOp): ...@@ -192,23 +194,27 @@ class omega_op(gof.PythonOp):
return UNDEFINED return UNDEFINED
def c_code(self, converters = None): def c_code(self, converters = None):
(inames, onames), behavior = self._c_impl() (inames, onames) = self.variable_names()
behavior = self._c_impl()
return cgen(self.__class__.__name__, behavior, inames + onames, self.inputs + self.outputs, converters) return cgen(self.__class__.__name__, behavior, inames + onames, self.inputs + self.outputs, converters)
# def _alloc(self): def c_headers(self):
# self.alloc(self.inputs, self.outputs) return []
# def alloc(inputs, outputs): def c_libs(self):
# raise NotImplementedError() return []
def _c_impl(self): def variable_names(self):
(inames, onames), _1, _2, _3 = inspect.getargspec(self.c_impl) (inames, onames), _1, _2, _3 = inspect.getargspec(self.c_impl)
return (inames, onames), self.c_impl(self.inputs, self.outputs) return (inames, onames)
def _c_impl(self):
return self.c_impl(self.inputs, self.outputs)
def c_impl(inputs, outputs): def c_impl(inputs, outputs):
raise NotImplementedError() raise NotImplementedError()
def c_thunk_creator(self): def c_thunk_factory(self):
self.refresh() self.refresh()
d, names, code, struct, converters = self.c_code() d, names, code, struct, converters = self.c_code()
...@@ -218,12 +224,23 @@ class omega_op(gof.PythonOp): ...@@ -218,12 +224,23 @@ class omega_op(gof.PythonOp):
instantiate = weave.ext_tools.ext_function('instantiate', instantiate = weave.ext_tools.ext_function('instantiate',
code, code,
names, names,
local_dict = d, global_dict = {}, type_converters = converters) local_dict = d,
global_dict = {},
type_converters = converters)
instantiate.customize.add_support_code(struct) instantiate.customize.add_support_code(struct)
instantiate.customize.add_extra_compile_arg("-O3")
instantiate.customize.add_extra_compile_arg("-ffast-math")
instantiate.customize.add_extra_compile_arg("-falign-loops=4")
# instantiate.customize.add_extra_compile_arg("-mfpmath=sse")
for header in self.c_headers():
instantiate.customize.add_header(header)
for lib in self.c_libs():
instantiate.customize.add_library(lib)
mod.add_function(instantiate) mod.add_function(instantiate)
mod.compile(location = 'compiled') mod.compile(location = 'compiled')
module = __import__("compiled.%s" % module_name, fromlist = [module_name]) module = __import__("compiled.%s" % module_name, {}, {}, [module_name])
def creator(): def creator():
return module.instantiate(*[x.data for x in self.inputs + self.outputs]) return module.instantiate(*[x.data for x in self.inputs + self.outputs])
...@@ -239,25 +256,25 @@ class omega_op(gof.PythonOp): ...@@ -239,25 +256,25 @@ class omega_op(gof.PythonOp):
cutils.run_cthunk(thunk) cutils.run_cthunk(thunk)
def elemwise_wrap_old(beforeloop, inloop, afterloop, loop_vars, writable_loop_vars): # def elemwise_wrap_old(beforeloop, inloop, afterloop, loop_vars, writable_loop_vars):
return """ # return """
%(beforeloop)s # %(beforeloop)s
for (int i = 0; i < N_%(v1)s[0]; i++) { # for (int i = 0; i < N_%(v1)s[0]; i++) {
for (int j = 0; j < N_%(v1)s[1]; j++) { # for (int j = 0; j < N_%(v1)s[1]; j++) {
%(idefs)s # %(idefs)s
%(odefs)s # %(odefs)s
%(inloop)s # %(inloop)s
} # }
} # }
%(afterloop)s # %(afterloop)s
""" % dict(v1 = (loop_vars + writable_loop_vars)[0], # """ % dict(v1 = (loop_vars + writable_loop_vars)[0],
idefs = "\n".join(["_%s_dtype %s = _%s2(i, j);" % (loop_var, loop_var, loop_var.upper()) # idefs = "\n".join(["_%s_dtype %s = _%s2(i, j);" % (loop_var, loop_var, loop_var.upper())
for loop_var in loop_vars]), # for loop_var in loop_vars]),
odefs = "\n".join(["_%s_dtype& %s = _%s2(i, j);" % (writable_loop_var, writable_loop_var, writable_loop_var.upper()) # odefs = "\n".join(["_%s_dtype& %s = _%s2(i, j);" % (writable_loop_var, writable_loop_var, writable_loop_var.upper())
for writable_loop_var in writable_loop_vars]), # for writable_loop_var in writable_loop_vars]),
beforeloop = beforeloop, # beforeloop = beforeloop,
inloop = inloop, # inloop = inloop,
afterloop = afterloop) # afterloop = afterloop)
def elemwise_loopcode(loopcode, init_template, next_template, acquire_template, cleanup_template, loop_vars, writable_loop_vars, aliases): def elemwise_loopcode(loopcode, init_template, next_template, acquire_template, cleanup_template, loop_vars, writable_loop_vars, aliases):
all_loop_vars = loop_vars + writable_loop_vars all_loop_vars = loop_vars + writable_loop_vars
...@@ -266,11 +283,11 @@ def elemwise_loopcode(loopcode, init_template, next_template, acquire_template, ...@@ -266,11 +283,11 @@ def elemwise_loopcode(loopcode, init_template, next_template, acquire_template,
init = "".join([init_template % dict(loop_var = loop_var) for loop_var in all_loop_vars]), init = "".join([init_template % dict(loop_var = loop_var) for loop_var in all_loop_vars]),
next = "".join([next_template % dict(loop_var = loop_var) for loop_var in all_loop_vars]), next = "".join([next_template % dict(loop_var = loop_var) for loop_var in all_loop_vars]),
cleanup = "".join([cleanup_template % dict(loop_var = loop_var) for loop_var in all_loop_vars]), cleanup = "".join([cleanup_template % dict(loop_var = loop_var) for loop_var in all_loop_vars]),
idefs = "".join([("_%(loop_var)s_dtype %(loop_var)s = " + acquire_template + ";\n") idefs = "".join([("%(loop_var)s_dtype %(loop_var)s_i = " + acquire_template + ";\n")
% dict(loop_var = loop_var) for loop_var in loop_vars]), % dict(loop_var = loop_var) for loop_var in loop_vars]),
odefs = "".join([("_%(loop_var)s_dtype& %(loop_var)s = " + acquire_template + ";\n") odefs = "".join([("%(loop_var)s_dtype& %(loop_var)s_i = " + acquire_template + ";\n")
% dict(loop_var = loop_var) for loop_var in writable_loop_vars]), % dict(loop_var = loop_var) for loop_var in writable_loop_vars]),
aliasdefs = "".join(["_%(v1)s_dtype %(v1)s = %(v2)s;\n" % dict(v1=v1, v2=v2) aliasdefs = "".join(["%(v1)s_dtype %(v1)s_i = %(v2)s_i;\n" % dict(v1=v1, v2=v2)
for v1, v2 in aliases.items()]), for v1, v2 in aliases.items()]),
loopcode = loopcode loopcode = loopcode
) )
...@@ -291,18 +308,18 @@ def elemwise_loopcode(loopcode, init_template, next_template, acquire_template, ...@@ -291,18 +308,18 @@ def elemwise_loopcode(loopcode, init_template, next_template, acquire_template,
def elemwise_wrap(beforeloop, inloop, afterloop, loop_vars, writable_loop_vars, aliases): def elemwise_wrap(beforeloop, inloop, afterloop, loop_vars, writable_loop_vars, aliases):
general_init = "PyArrayIterObject* _%(loop_var)s_iter = (PyArrayIterObject*)PyArray_IterNew((PyObject*)_%(loop_var)s_array);\n" general_init = "PyArrayIterObject* %(loop_var)s_iter = (PyArrayIterObject*)PyArray_IterNew((PyObject*)%(loop_var)s);\n"
# "if (_%(loop_var)s_iter == NULL) {\n" \ # "if (%(loop_var)s_iter == NULL) {\n" \
# " PyErr_SetString(PyExc_ValueError, \"Could not make an iterator over variable %(loop_var)s.\");\n" \ # " PyErr_SetString(PyExc_ValueError, \"Could not make an iterator over variable %(loop_var)s.\");\n" \
# " return 1;\n" \ # " return 1;\n" \
# "}\n" # "}\n"
general_next = "PyArray_ITER_NEXT(_%(loop_var)s_iter);\n" general_next = "PyArray_ITER_NEXT(%(loop_var)s_iter);\n"
general_acquire = "*((_%(loop_var)s_dtype*)_%(loop_var)s_iter->dataptr)"; general_acquire = "*((%(loop_var)s_dtype*)%(loop_var)s_iter->dataptr)";
general_cleanup = "if (_%(loop_var)s_iter) Py_DECREF(_%(loop_var)s_iter);\n"; general_cleanup = "if (%(loop_var)s_iter) Py_DECREF(%(loop_var)s_iter);\n";
contiguous_init = "_%(loop_var)s_dtype* _%(loop_var)s_iter = (_%(loop_var)s_dtype*)PyArray_DATA(_%(loop_var)s_array);\n" contiguous_init = "%(loop_var)s_dtype* __restrict__ %(loop_var)s_iter = (%(loop_var)s_dtype*)PyArray_DATA(%(loop_var)s);\n"
contiguous_next = "_%(loop_var)s_iter++;\n" contiguous_next = "%(loop_var)s_iter++;\n"
contiguous_acquire = "*_%(loop_var)s_iter" contiguous_acquire = "*%(loop_var)s_iter"
contiguous_cleanup = "" contiguous_cleanup = ""
all_loop_vars = loop_vars + writable_loop_vars all_loop_vars = loop_vars + writable_loop_vars
...@@ -317,14 +334,14 @@ def elemwise_wrap(beforeloop, inloop, afterloop, loop_vars, writable_loop_vars, ...@@ -317,14 +334,14 @@ def elemwise_wrap(beforeloop, inloop, afterloop, loop_vars, writable_loop_vars,
inloop, inloop,
contiguous_init, contiguous_next, contiguous_acquire, contiguous_cleanup, contiguous_init, contiguous_next, contiguous_acquire, contiguous_cleanup,
loop_vars, writable_loop_vars, aliases), loop_vars, writable_loop_vars, aliases),
contiguity_check = "".join(["all_c_contiguous &= PyArray_ISCARRAY(_%(loop_var)s_array);\n" \ contiguity_check = "".join(["all_c_contiguous &= PyArray_ISCARRAY(%(loop_var)s);\n" \
"all_f_contiguous &= PyArray_ISFARRAY(_%(loop_var)s_array);\n" \ "all_f_contiguous &= PyArray_ISFARRAY(%(loop_var)s);\n" \
% dict(loop_var = loop_var) % dict(loop_var = loop_var)
for loop_var in all_loop_vars]), for loop_var in all_loop_vars]),
afterloop = afterloop) afterloop = afterloop)
code = """ code = """
npy_intp __elemwise_size = PyArray_SIZE(_%(v1)s_array); npy_intp __elemwise_size = PyArray_SIZE(%(v1)s);
%(beforeloop)s %(beforeloop)s
bool all_c_contiguous = 1; bool all_c_contiguous = 1;
bool all_f_contiguous = 1; bool all_f_contiguous = 1;
...@@ -363,15 +380,16 @@ class elemwise(omega_op): ...@@ -363,15 +380,16 @@ class elemwise(omega_op):
try: try:
return self.specs(*[input.spec for input in self.inputs]) return self.specs(*[input.spec for input in self.inputs])
except NotImplementedError: except NotImplementedError:
(inames, onames), code = self._c_foreach() inames, onames = self.variable_names()
linames, lonames = self.loop_variables()
for oname in onames: for oname in onames:
if oname.startswith("_"): if oname not in lonames:
raise Exception("cannot infer a specification automatically for variable " \ raise Exception("cannot infer a specification automatically for variable " \
"%s because it is not part of the elementwise loop - "\ "%s because it is not part of the elementwise loop - "\
"please override the specs method" % oname[1:]) "please override the specs method" % oname)
shape, dtype = None, None shape, dtype = None, None
for iname, input in zip(inames, self.inputs): for iname, input in zip(inames, self.inputs):
if not iname.startswith("_"): if iname in linames:
if input.spec: if input.spec:
shape = input.spec[2] shape = input.spec[2]
if shape is None: if shape is None:
...@@ -416,68 +434,60 @@ class elemwise(omega_op): ...@@ -416,68 +434,60 @@ class elemwise(omega_op):
if output not in except_list: if output not in except_list:
output.set_value(input.data) output.set_value(input.data)
@staticmethod
def is_loop_var(name):
return name.endswith("_i")
@staticmethod
def extract_name(name):
if name.endswith("_i"):
return name[:-2]
else:
return name
@classmethod
def variable_names(cls):
(inames, onames), _1, _2, _3 = inspect.getargspec(cls.c_foreach)
spec = ([cls.extract_name(name) for name in inames],
[cls.extract_name(name) for name in onames])
if cls.c_init is not elemwise.c_init:
(inames, onames), _1, _2, _3 = inspect.getargspec(cls.c_init)
assert spec == (list(inames), list(onames))
if cls.c_finalize is not elemwise.c_finalize:
(inames, onames), _1, _2, _3 = inspect.getargspec(cls.c_finalize)
assert spec == (list(inames), list(onames))
return spec
# def _alloc(self): @classmethod
# if isinstance(self, inplace): def loop_variables(cls):
# dmap = self.destroy_map() (inames, onames), _1, _2, _3 = inspect.getargspec(cls.c_foreach)
# else: return ([cls.extract_name(name) for name in inames if cls.is_loop_var(name)],
# dmap = {} [cls.extract_name(name) for name in onames if cls.is_loop_var(name)])
# try:
# return self.alloc(self.inputs, self.outputs)
# except NotImplementedError:
# (inames, onames), _1, _2, _3 = inspect.getargspec(self.c_foreach)
# for oname in onames:
# if oname.startswith("_"):
# raise Exception("cannot infer an allocation policy automatically for variable " \
# "%s because it is not part of the elementwise loop - "\
# "please override the alloc method" % oname[1:])
# shape, dtype = None, None
# for iname, input in zip(inames, self.inputs):
# if not iname.startswith("_"):
# shape = input.data
# if shape is None:
# raise Exception("cannot infer an allocation policy automatically for output variables " \
# "because there is no input variable in the loop from which to get the shape")
# dtype = upcast(*[input.data.dtype
# for iname, input in zip(inames, self.inputs)
# if isinstance(input.data, numpy.ndarray)])
# for output in self.outputs:
# inplace_inputs = dmap.get(output, [])
# if inplace_inputs:
# assert len(inplace_inputs) == 1
# output.data = inplace_inputs[0].data
# else:
# output.data = numpy.ndarray(shape, dtype)
def _c_init(self): def _c_init(self):
(inames, onames), _1, _2, _3 = inspect.getargspec(self.c_init) return self.c_init(self.inputs, self.outputs)
return [list(inames), list(onames)], self.c_init(self.inputs, self.outputs)
def c_init(inputs, outputs): def c_init(inputs, outputs):
return "" return ""
def _c_foreach(self): def _c_foreach(self):
(inames, onames), _1, _2, _3 = inspect.getargspec(self.c_foreach) return self.c_foreach(self.inputs, self.outputs)
return [list(inames), list(onames)], self.c_foreach(self.inputs, self.outputs)
def c_foreach(inputs, outputs): def c_foreach(inputs, outputs):
return "" raise NotImplementedError()
def _c_finalize(self): def _c_finalize(self):
(inames, onames), _1, _2, _3 = inspect.getargspec(self.c_finalize) return self.c_finalize(self.inputs, self.outputs)
return [list(inames), list(onames)], self.c_finalize(self.inputs, self.outputs)
def c_finalize(inputs, outputs): def c_finalize(inputs, outputs):
return "" return ""
def c_code(self, converters = None, elemwise_wrap = elemwise_wrap): def c_code(self, converters = None, elemwise_wrap = elemwise_wrap):
def mangle(name): def mangle(name):
if name.startswith("_"): if name.endswith("_i"):
return name return name[:-2]
else: else:
return "_" + name return name
try: try:
self._c_impl() self._c_impl()
...@@ -485,42 +495,47 @@ class elemwise(omega_op): ...@@ -485,42 +495,47 @@ class elemwise(omega_op):
except NotImplementedError: except NotImplementedError:
pass pass
spec_b, before = self._c_init() before = self._c_init()
spec_d, during = self._c_foreach() during = self._c_foreach()
spec_a, after = self._c_finalize() after = self._c_finalize()
# Sanity check - apart from loop vars, variables are shared in the before/during/after parts # # Sanity check - apart from loop vars, variables are shared in the before/during/after parts
if before and spec_b != spec_d: # if before and spec_b != spec_d:
raise Exception("The input signature of c_init differs from the input signature of c_foreach.") # raise Exception("The input signature of c_init differs from the input signature of c_foreach.")
if after and spec_a != spec_d: # if after and spec_a != spec_d:
raise Exception("The input signature of c_finalize differs from the input signature of c_foreach.") # raise Exception("The input signature of c_finalize differs from the input signature of c_foreach.")
(inames, onames) = spec_d (inames, onames) = self.variable_names()
(linames, lonames) = self.loop_variables()
aliases = {} aliases = {}
if isinstance(self, inplace): if isinstance(self, inplace):
dmap = self.destroy_map() dmap = self.destroy_map()
for oname, output in zip(onames, self.outputs): for oname, output in zip(onames, self.outputs):
if not oname.startswith("_"): if oname in lonames:
for input in dmap.get(output, []): for input in dmap.get(output, []):
aliases[inames[self.inputs.index(input)]] = oname aliases[inames[self.inputs.index(input)]] = oname
behavior = elemwise_wrap(before, during, after, behavior = elemwise_wrap(before, during, after,
[iname for iname in inames if not iname.startswith("_") and not iname in aliases], [name for name in linames if name not in aliases],
[oname for oname in onames if not oname.startswith("_")], lonames,
# [mangle(iname) for iname in inames if iname.endswith("_i") and not iname in aliases],
# [mangle(oname) for oname in onames if oname.endswith("_i")],
aliases) aliases)
inames = [mangle(name) for name in inames] # inames = [mangle(name) for name in inames]
onames = [mangle(name) for name in onames] # onames = [mangle(name) for name in onames]
return cgen(self.__class__.__name__, behavior, inames + onames, self.inputs + self.outputs, converters) return cgen(self.__class__.__name__, behavior, inames + onames, self.inputs + self.outputs, converters)
@classmethod @classmethod
def inplace_version(cls, dmap = {0: 0}): def inplace_version(cls, dmap = {0: 0}):
(inames, onames), _1, _2, _3 = inspect.getargspec(cls.c_foreach) # (inames, onames), _1, _2, _3 = inspect.getargspec(cls._c_foreach)
inames, onames = cls.variable_names()
linames, lonames = cls.loop_variables()
for i, oname in enumerate(onames): for i, oname in enumerate(onames):
if i in dmap: if i in dmap:
assert not oname.startswith("_") assert oname in lonames
class C(cls, inplace): class C(cls, inplace):
def destroy_map(self): def destroy_map(self):
...@@ -577,6 +592,7 @@ def scalar_switch(normal_f, scalar_f, scalar_f_reverse = None): ...@@ -577,6 +592,7 @@ def scalar_switch(normal_f, scalar_f, scalar_f_reverse = None):
class NumpyR(gof.PythonR): class NumpyR(gof.PythonR):
def set_value(self, value): def set_value(self, value):
assert value is not None
if value is None or value is UNCOMPUTED: if value is None or value is UNCOMPUTED:
self.data = UNCOMPUTED self.data = UNCOMPUTED
elif isinstance(value, numpy.ndarray): elif isinstance(value, numpy.ndarray):
...@@ -657,9 +673,15 @@ def tensor_scalar_impl(impl): ...@@ -657,9 +673,15 @@ def tensor_scalar_impl(impl):
class tensor_scalar_op(elemwise): class tensor_scalar_op(elemwise):
def c_init((x, _a), (z, )): def c_init((x, _a), (z, )):
return "_a_dtype a = _a[0];" return "_a_dtype a = ((_a_dtype*)PyArray_DATA(_a))[0];"
@classmethod
def variable_names(cls):
return (['x', '_a'], ['z', ])
@classmethod
def loop_variables(cls):
return (['x', ], ['z', ])
def _c_foreach(self): def _c_foreach(self):
return [['x', '_a'], ['z', ]], "z = %s;" % self.c_expr return "z_i = %s;" % self.c_expr
...@@ -669,8 +691,8 @@ class add_elemwise(elemwise): ...@@ -669,8 +691,8 @@ class add_elemwise(elemwise):
impl = assert_same_shapes(numpy.ndarray.__add__) impl = assert_same_shapes(numpy.ndarray.__add__)
def grad(x, y, gz): def grad(x, y, gz):
return gz return gz
def c_foreach((x, y), (z, )): def c_foreach((x_i, y_i), (z_i, )):
return "z = x + y;" return "z_i = x_i + y_i;"
iadd_elemwise = add_elemwise.inplace_version() iadd_elemwise = add_elemwise.inplace_version()
iadd_elemwise.set_impl(assert_same_shapes(numpy.ndarray.__iadd__)) iadd_elemwise.set_impl(assert_same_shapes(numpy.ndarray.__iadd__))
...@@ -680,7 +702,7 @@ class add_scalar(tensor_scalar_op): ...@@ -680,7 +702,7 @@ class add_scalar(tensor_scalar_op):
impl = tensor_scalar_impl(numpy.ndarray.__add__) impl = tensor_scalar_impl(numpy.ndarray.__add__)
def grad(x, a, gz): def grad(x, a, gz):
return gz, sum(gz) return gz, sum(gz)
c_expr = "x + a" c_expr = "x_i + a"
iadd_scalar = add_scalar.inplace_version() iadd_scalar = add_scalar.inplace_version()
iadd_scalar.set_impl(tensor_scalar_impl(numpy.ndarray.__iadd__)) iadd_scalar.set_impl(tensor_scalar_impl(numpy.ndarray.__iadd__))
...@@ -690,8 +712,8 @@ class twice(elemwise): ...@@ -690,8 +712,8 @@ class twice(elemwise):
return scale(gz, 2.0) return scale(gz, 2.0)
def impl(x): def impl(x):
return x + x return x + x
def c_foreach((x, ), (z, )): def c_foreach((x_i, ), (z_i, )):
"z = x + x;" "z_i = x_i + x_i;"
itwice = twice.inplace_version() itwice = twice.inplace_version()
...@@ -702,8 +724,8 @@ class sub_elemwise(elemwise): ...@@ -702,8 +724,8 @@ class sub_elemwise(elemwise):
impl = assert_same_shapes(numpy.ndarray.__sub__) impl = assert_same_shapes(numpy.ndarray.__sub__)
def grad(x, y, gz): def grad(x, y, gz):
return gz, -gz return gz, -gz
def c_foreach((x, y), (z, )): def c_foreach((x_i, y_i), (z_i, )):
return "z = x - y;" return "z_i = x_i - y_i;"
isub_elemwise = sub_elemwise.inplace_version() isub_elemwise = sub_elemwise.inplace_version()
isub_elemwise.set_impl(assert_same_shapes(numpy.ndarray.__isub__)) isub_elemwise.set_impl(assert_same_shapes(numpy.ndarray.__isub__))
...@@ -724,8 +746,8 @@ class mul_elemwise(elemwise): ...@@ -724,8 +746,8 @@ class mul_elemwise(elemwise):
impl = assert_same_shapes(numpy.ndarray.__mul__) impl = assert_same_shapes(numpy.ndarray.__mul__)
def grad(x, y, gz): def grad(x, y, gz):
return mul(y, gz), mul(x, gz) return mul(y, gz), mul(x, gz)
def c_foreach((x, y), (z, )): def c_foreach((x_i, y_i), (z_i, )):
return "z = x * y;" return "z_i = x_i * y_i;"
imul_elemwise = mul_elemwise.inplace_version() imul_elemwise = mul_elemwise.inplace_version()
imul_elemwise.set_impl(assert_same_shapes(numpy.ndarray.__imul__)) imul_elemwise.set_impl(assert_same_shapes(numpy.ndarray.__imul__))
...@@ -735,7 +757,7 @@ class scale(tensor_scalar_op): ...@@ -735,7 +757,7 @@ class scale(tensor_scalar_op):
impl = tensor_scalar_impl(numpy.ndarray.__mul__) impl = tensor_scalar_impl(numpy.ndarray.__mul__)
def grad(x, a, gz): def grad(x, a, gz):
return scale(a, gz), sum(mul_elemwise(x, gz)) return scale(a, gz), sum(mul_elemwise(x, gz))
c_expr = "x * a" c_expr = "x_i * a"
iscale = scale.inplace_version() iscale = scale.inplace_version()
iscale.set_impl(tensor_scalar_impl(numpy.ndarray.__imul__)) iscale.set_impl(tensor_scalar_impl(numpy.ndarray.__imul__))
...@@ -746,8 +768,8 @@ class sqr(elemwise): ...@@ -746,8 +768,8 @@ class sqr(elemwise):
return x * x return x * x
def grad(x, gz): def grad(x, gz):
return scale(mul_elemwise(x, gz), 2.0) return scale(mul_elemwise(x, gz), 2.0)
def c_foreach((x, ), (z, )): def c_foreach((x_i, ), (z_i, )):
return "z = x * x;" return "z_i = x_i * x_i;"
isqr = sqr.inplace_version() isqr = sqr.inplace_version()
isqr.set_impl(lambda x: x.__imul__(x)) isqr.set_impl(lambda x: x.__imul__(x))
...@@ -758,8 +780,8 @@ class sqrt(elemwise): ...@@ -758,8 +780,8 @@ class sqrt(elemwise):
impl = numpy.sqrt impl = numpy.sqrt
def grad(x, gz): def grad(x, gz):
return scale(div(gz, sqrt(x)), 0.5) return scale(div(gz, sqrt(x)), 0.5)
def c_foreach((x, ), (z, )): def c_foreach((x_i, ), (z_i, )):
"z = pow(x, 0.5);" "z_i = pow(x_i, 0.5);"
isqrt = sqrt.inplace_version() isqrt = sqrt.inplace_version()
isqrt.set_impl(lambda x: x.__ipow__(0.5)) isqrt.set_impl(lambda x: x.__ipow__(0.5))
...@@ -770,8 +792,8 @@ isqrt.set_impl(lambda x: x.__ipow__(0.5)) ...@@ -770,8 +792,8 @@ isqrt.set_impl(lambda x: x.__ipow__(0.5))
class exp(elemwise): class exp(elemwise):
impl = numpy.exp impl = numpy.exp
def c_foreach((x, ), (z, )): def c_foreach((x_i, ), (z_i, )):
return "z = exp(x);" return "z_i = exp(x_i);"
## Element-wise division ## ## Element-wise division ##
...@@ -780,8 +802,8 @@ class div_elemwise(elemwise): ...@@ -780,8 +802,8 @@ class div_elemwise(elemwise):
impl = assert_same_shapes(numpy.ndarray.__div__) impl = assert_same_shapes(numpy.ndarray.__div__)
def grad(x, y, gz): def grad(x, y, gz):
return div(gz, y), -div(mul(x, gz), sqr(y)) return div(gz, y), -div(mul(x, gz), sqr(y))
def c_foreach((x, y), (z, )): def c_foreach((x_i, y_i), (z_i, )):
return "z = x / y;" return "z_i = x_i / y_i;"
idiv_elemwise = div_elemwise.inplace_version() idiv_elemwise = div_elemwise.inplace_version()
idiv_elemwise.set_impl(assert_same_shapes(numpy.ndarray.__idiv__)) idiv_elemwise.set_impl(assert_same_shapes(numpy.ndarray.__idiv__))
...@@ -803,8 +825,8 @@ class neg(elemwise): ...@@ -803,8 +825,8 @@ class neg(elemwise):
impl = numpy.ndarray.__neg__ impl = numpy.ndarray.__neg__
def grad(x, gz): def grad(x, gz):
return -gz return -gz
def c_foreach((x, ), (z, )): def c_foreach((x_i, ), (z_i, )):
return "z = -x;" return "z_i = -x_i;"
ineg = neg.inplace_version() ineg = neg.inplace_version()
ineg.set_impl(lambda x: x.__imul__(-1)) ineg.set_impl(lambda x: x.__imul__(-1))
...@@ -814,8 +836,8 @@ class inv_elemwise(elemwise): ...@@ -814,8 +836,8 @@ class inv_elemwise(elemwise):
impl = lambda x: 1 / x impl = lambda x: 1 / x
def grad(x, gz): def grad(x, gz):
return -gz return -gz
def c_foreach((x, ), (z, )): def c_foreach((x_i, ), (z_i, )):
return "z = 1 / x;" return "z_i = 1 / x_i;"
iinv_elemwise = inv_elemwise.inplace_version() iinv_elemwise = inv_elemwise.inplace_version()
...@@ -827,18 +849,92 @@ class dot(omega_op): ...@@ -827,18 +849,92 @@ class dot(omega_op):
def grad(x, y, gz): def grad(x, y, gz):
return dot(gz, transpose(y)), dot(transpose(x), gz) return dot(gz, transpose(y)), dot(transpose(x), gz)
def specs(x, y): def specs(x, y):
# todo: handle non-matrices! # todo: handle all tensors!
if len(x[2]) == 0:
shape = y[2]
elif len(y[2]) == 0:
shape = x[2]
elif len(x[2]) == 1:
shape = (y[2][1], )
elif len(y[2]) == 1:
shape = (x[2][0], )
else:
shape = (x[2][0], y[2][1]) shape = (x[2][0], y[2][1])
return (numpy.ndarray, upcast(x[1], y[1]), shape) return (numpy.ndarray, upcast(x[1], y[1]), shape)
def c_headers(self):
return ["<gsl/gsl_cblas.h>"]
def c_libs(self):
return ["cblas", "atlas", "g2c"]
def c_impl((_x, _y), (_z, )):
dtype = _x.spec[1]
if dtype.char == 'f':
gemm = 'cblas_sgemm'
elif dtype.char == 'd':
gemm = 'cblas_dgemm'
else:
raise NotImplementedError
return """
%(dtype)s a = 1.0;
%(dtype)s b = 0.0;
%(dtype)s* x = (%(dtype)s*)PyArray_DATA(_x);
%(dtype)s* y = (%(dtype)s*)PyArray_DATA(_y);
%(dtype)s* z = (%(dtype)s*)PyArray_DATA(_z);
npy_intp* Nx = _x->dimensions;
npy_intp* Ny = _y->dimensions;
npy_intp* Nz = _z->dimensions;
npy_intp* Sx = _x->strides;
npy_intp* Sy = _y->strides;
npy_intp* Sz = _z->strides;
if ((Nx[0] != Nz[0]) || (Nx[1] != Ny[0]) || (Ny[1] != Nz[1]))
{
PyErr_SetString(PyExc_ValueError, "mat_gemm input array size mismatch");
fprintf(stderr, "Should be calling mat_gemm_general, but quitting instead\\n");
return 1;
}
if ((Sx[0] < 1) || (Sx[1] < 1)
|| (Sy[0] < 1) || (Sy[1] < 1)
|| (Sz[0] < 1) || (Sz[1] < 1))
{
fprintf(stderr, "Should be calling mat_gemm_general, but quitting instead\\n");
return 1;
//return mat_gemm_general(a, A, B, b, C);
}
//TODO: OPTIMIZE for many special cases:
//- gemv
//- ger
//- ddot
//- others?
int unit = 0;
unit |= ((Sx[1] == sizeof(%(dtype)s)) ? 0x0 : (Sx[0] == sizeof(%(dtype)s)) ? 0x1 : 0x2) << 0;
unit |= ((Sy[1] == sizeof(%(dtype)s)) ? 0x0 : (Sy[0] == sizeof(%(dtype)s)) ? 0x1 : 0x2) << 4;
unit |= ((Sz[1] == sizeof(%(dtype)s)) ? 0x0 : (Sz[0] == sizeof(%(dtype)s)) ? 0x1 : 0x2) << 8;
/* create appropriate strides for malformed matrices that are row or column
* vectors
*/
size_t sx_0 = (Nx[0] > 1) ? Sx[0]/sizeof(%(dtype)s) : Nx[1];
size_t sx_1 = (Nx[1] > 1) ? Sx[1]/sizeof(%(dtype)s) : Nx[0];
size_t sy_0 = (Ny[0] > 1) ? Sy[0]/sizeof(%(dtype)s) : Ny[1];
size_t sy_1 = (Ny[1] > 1) ? Sy[1]/sizeof(%(dtype)s) : Ny[0];
size_t sz_0 = (Nz[0] > 1) ? Sz[0]/sizeof(%(dtype)s) : Nz[1];
size_t sz_1 = (Nz[1] > 1) ? Sz[1]/sizeof(%(dtype)s) : Nz[0];
switch(unit)
{
case 0x000: %(gemm)s(CblasRowMajor, CblasNoTrans, CblasNoTrans, Nz[0], Nz[1], Nx[1], a, x, sx_0, y, sy_0, b, z, sz_0); break;
case 0x001: %(gemm)s(CblasRowMajor, CblasTrans, CblasNoTrans, Nz[0], Nz[1], Nx[1], a, x, sx_1, y, sy_0, b, z, sz_0); break;
case 0x010: %(gemm)s(CblasRowMajor, CblasNoTrans, CblasTrans, Nz[0], Nz[1], Nx[1], a, x, sx_0, y, sy_1, b, z, sz_0); break;
case 0x011: %(gemm)s(CblasRowMajor, CblasTrans, CblasTrans, Nz[0], Nz[1], Nx[1], a, x, sx_1, y, sy_1, b, z, sz_0); break;
case 0x100: %(gemm)s(CblasColMajor, CblasTrans, CblasTrans, Nz[0], Nz[1], Nx[1], a, x, sx_0, y, sy_0, b, z, sz_1); break;
case 0x101: %(gemm)s(CblasColMajor, CblasNoTrans, CblasTrans, Nz[0], Nz[1], Nx[1], a, x, sx_1, y, sy_0, b, z, sz_1); break;
case 0x110: %(gemm)s(CblasColMajor, CblasTrans, CblasNoTrans, Nz[0], Nz[1], Nx[1], a, x, sx_0, y, sy_1, b, z, sz_1); break;
case 0x111: %(gemm)s(CblasColMajor, CblasNoTrans, CblasNoTrans, Nz[0], Nz[1], Nx[1], a, x, sx_1, y, sy_1, b, z, sz_1); break;
default:
fprintf(stderr, "Should be calling mat_gemm_general, but quitting instead\\n");
return 1;
};
/* v 1 */
""" % dict(dtype = '_x_dtype', gemm = gemm)
## Transposition ## ## Transposition ##
...@@ -847,8 +943,51 @@ class transpose(omega_op, view): ...@@ -847,8 +943,51 @@ class transpose(omega_op, view):
def grad(x, gz): def grad(x, gz):
return transpose_copy(gz) return transpose_copy(gz)
def specs(x): def specs(x):
# todo: handle non-matrices! # todo: handle all tensors!
return (numpy.ndarray, x[1], (x[2][1], x[2][0])) return (numpy.ndarray, x[1], (x[2][1], x[2][0]))
def c_impl((x, ), (xt, )):
return """
const int l = x->nd;
if (xt->nd != x->nd)
{
// this technique comes from PyArray_Resize()
npy_intp * dimptr = (npy_intp*)PyDimMem_RENEW(xt->dimensions, 2 * x->nd);
if (!dimptr)
{
fprintf(stderr, "%i: %p\\n", __LINE__, dimptr);
assert(!"dammit");
}
xt->nd = x->nd;
xt->dimensions = dimptr;
xt->strides = dimptr + x->nd;
//fprintf(stderr, "transpose: %i %i %i %i\\n", x->dimensions[0], x->dimensions[1], x->strides[0], x->strides[1]);
}
//fprintf(stderr, "%s %i %p\\n", __FILE__, __LINE__, xt->base);
if ( xt->base != (PyObject*)x)
{
//fprintf(stderr, "%i: %p\\n", __LINE__, xt->base);
if ((xt->base) and (xt->base != Py_None)) Py_DECREF(xt->base);
Py_INCREF(x);
xt->base = (PyObject*)x;
}
xt->data = x->data;
for (int i = 0; i < l; ++i)
{
xt->dimensions[i] = x->dimensions[l-i-1];
xt->strides[i] = x->strides[l-i-1];
//fprintf(stderr, "%li\t", x->dimensions[i]);
}
//fprintf(stderr, "\\n");
xt->flags &= ~NPY_OWNDATA;
PyArray_UpdateFlags(xt, NPY_CONTIGUOUS|NPY_FORTRAN|NPY_ALIGNED|NPY_WRITEABLE);
//this function is described in
// ~/zzz.NOBACKUP/pub/src/numpy-1.0.3.1/numpy/core/src/arrayobject.c:1890
return 0;
"""
def transpose_copy(x): def transpose_copy(x):
return array_copy(transpose(x)) return array_copy(transpose(x))
...@@ -859,8 +998,8 @@ def transpose_copy(x): ...@@ -859,8 +998,8 @@ def transpose_copy(x):
class array_copy(elemwise): class array_copy(elemwise):
impl = numpy.array impl = numpy.array
grad = lambda x, gz: gz grad = lambda x, gz: gz
def c_foreach((x, ), (z, )): def c_foreach((x_i, ), (z_i, )):
return "z = x;" return "z_i = x_i;"
## Power ## ## Power ##
...@@ -869,8 +1008,8 @@ class pow_elemwise(elemwise): ...@@ -869,8 +1008,8 @@ class pow_elemwise(elemwise):
impl = assert_same_shapes(numpy.ndarray.__pow__) impl = assert_same_shapes(numpy.ndarray.__pow__)
def grad(x, s, gz): def grad(x, s, gz):
return gz * s * (pow_elemwise(x, s-1.0)) return gz * s * (pow_elemwise(x, s-1.0))
def c_foreach((x, s), (z, )): def c_foreach((x_i, s_i), (z_i, )):
return "z = pow(x, s)" return "z_i = pow(x_i, s_i)"
ipow_elemwise = pow_elemwise.inplace_version() ipow_elemwise = pow_elemwise.inplace_version()
ipow_elemwise.set_impl(assert_same_shapes(numpy.ndarray.__ipow__)) ipow_elemwise.set_impl(assert_same_shapes(numpy.ndarray.__ipow__))
...@@ -880,13 +1019,13 @@ class pow_scalar_l(tensor_scalar_op): ...@@ -880,13 +1019,13 @@ class pow_scalar_l(tensor_scalar_op):
impl = tensor_scalar_impl(lambda x, y: numpy.ndarray.__pow__(y, x)) impl = tensor_scalar_impl(lambda x, y: numpy.ndarray.__pow__(y, x))
def grad(x, s, gz): def grad(x, s, gz):
return gz * x * (pow_scalar_l(s,x-1.0)) return gz * x * (pow_scalar_l(s,x-1.0))
c_expr = "pow(a, x)" c_expr = "pow(a, x_i)"
class pow_scalar_r(tensor_scalar_op): class pow_scalar_r(tensor_scalar_op):
impl = tensor_scalar_impl(numpy.ndarray.__pow__) impl = tensor_scalar_impl(numpy.ndarray.__pow__)
def grad(x, s, gz): def grad(x, s, gz):
return gz * s * (pow_scalar_r(x,s-1.0)) return gz * s * (pow_scalar_r(x,s-1.0))
c_expr = "pow(x, a)" c_expr = "pow(x_i, a)"
ipow_scalar_r = pow_scalar_r.inplace_version() ipow_scalar_r = pow_scalar_r.inplace_version()
ipow_scalar_r.set_impl(tensor_scalar_impl(numpy.ndarray.__ipow__)) ipow_scalar_r.set_impl(tensor_scalar_impl(numpy.ndarray.__ipow__))
...@@ -905,6 +1044,7 @@ class minmax(elemwise): ...@@ -905,6 +1044,7 @@ class minmax(elemwise):
# _min.data = numpy.ndarray((), x.dtype) # _min.data = numpy.ndarray((), x.dtype)
# _max.data = numpy.ndarray((), x.dtype) # _max.data = numpy.ndarray((), x.dtype)
def c_init((x, ), (_min, _max)): def c_init((x, ), (_min, _max)):
raise NotImplementedError
return """ return """
_x_dtype min = _x[0]; _x_dtype min = _x[0];
_x_dtype max = _x[0]; _x_dtype max = _x[0];
...@@ -923,10 +1063,10 @@ class minmax(elemwise): ...@@ -923,10 +1063,10 @@ class minmax(elemwise):
class fill(elemwise): class fill(elemwise):
impl = lambda model, value: (model * 0) + value impl = lambda model, value: (model * 0) + value
def c_init((model, _value), (z, )): def c_init((model, value), (z, )):
return "_z_dtype value = _value[0];" return "value_dtype value0 = ((value_dtype*)PyArray_DATA(value))[0];"
def c_foreach((model, _value), (z, )): def c_foreach((model_i, value), (z_i, )):
return "z = value;" return "z_i = value0;"
ifill = fill.inplace_version() ifill = fill.inplace_version()
...@@ -937,12 +1077,10 @@ class sum(elemwise): ...@@ -937,12 +1077,10 @@ class sum(elemwise):
return fill(x, gz) return fill(x, gz)
def specs(x): def specs(x):
return (numpy.ndarray, x[1], ()) return (numpy.ndarray, x[1], ())
# def alloc((x, ), (_sum, )): def c_init((x, ), (sum, )):
# _sum.data = numpy.ndarray((), dtype = x.data.dtype) return "sum_dtype* sump = ((sum_dtype*)PyArray_DATA(sum)); sump[0] = 0;"
def c_init((x, ), (_sum, )): def c_foreach((x_i, ), (sum, )):
return "_sum[0] = 0;" return "sump[0] += x_i;"
def c_foreach((x, ), (_sum, )):
return "_sum[0] += x;"
## Array slicing ## ## Array slicing ##
......
...@@ -85,7 +85,7 @@ class PythonR(Result): ...@@ -85,7 +85,7 @@ class PythonR(Result):
__slots__ = ['data', 'spec', 'constant', 'up_to_date'] __slots__ = ['data', 'spec', 'constant', 'up_to_date']
def __init__(self, x = None, constant = False): def __init__(self, x = UNCOMPUTED, constant = False):
self.constant = False self.constant = False
self.set_value(x) self.set_value(x)
self.constant = constant self.constant = constant
......
...@@ -4,21 +4,21 @@ from scipy.weave import c_spec, standard_array_spec ...@@ -4,21 +4,21 @@ from scipy.weave import c_spec, standard_array_spec
class omega_type_converter_extension: class omega_type_converter_extension:
def provides(self): # def provides(self):
""" # """
Returns a list of (c_type, name, init_code) tuples that represent variables # Returns a list of (c_type, name, init_code) tuples that represent variables
the type converter provides to the user's code. # the type converter provides to the user's code.
""" # """
tvars = self.template_vars() # return []
return [(tvars['c_type'], tvars['name'], tvars['var_convert'])]
# def format_provide(self, x):
# return '%s %s = %s;\n' % x
def format_provide(self, x):
return '%s %s = %s;\n' % x
def declaration_code(self, templatize = 0, inline = 0): def declaration_code(self, templatize = 0, inline = 0):
tvars = self.template_vars(inline=inline) tvars = self.template_vars(inline=inline)
code = '%(py_var)s = %(var_lookup)s;\n' % tvars code = '%(py_var)s = %(var_lookup)s;\n' % tvars
code += ''.join([self.format_provide(export) for export in self.provides()]) code += '%(c_type)s %(name)s = %(var_convert)s;\n' % tvars
return code return code
def struct_init_code(self): def struct_init_code(self):
...@@ -28,18 +28,21 @@ class omega_type_converter_extension: ...@@ -28,18 +28,21 @@ class omega_type_converter_extension:
return "Py_DECREF(py_%s);" % self.name return "Py_DECREF(py_%s);" % self.name
def struct_members_code(self): def struct_members_code(self):
tvars = self.template_vars()
res = "PyObject* py_%s;\n" % self.name res = "PyObject* py_%s;\n" % self.name
return res + ''.join(['%s_type %s;\n' % (name, name) for c_type, name, init in self.provides()]) res += "%(c_type)s %(name)s;\n" % tvars
return res
def struct_import_code(self): def struct_import_code(self):
res = "__STRUCT_P->py_%s = py_%s;\n" % (self.name, self.name) res = "__STRUCT_P->py_%s = py_%s;\n" % (self.name, self.name)
return res + ''.join(['__STRUCT_P->%s = %s;\n' % (name, name) for c_type, name, init in self.provides()]) res += "__STRUCT_P->%s = %s;\n" % (self.name, self.name)
return res
def struct_support_code(self): def struct_support_code(self):
return "" return ""
def struct_typedefs(self): def struct_typedefs(self):
return ''.join(["typedef %s %s_type;\n" % (c_type, name) for c_type, name, init in self.provides()]) return ""
class int_converter(omega_type_converter_extension, c_spec.int_converter): class int_converter(omega_type_converter_extension, c_spec.int_converter):
...@@ -52,9 +55,10 @@ class complex_converter(omega_type_converter_extension, c_spec.complex_converter ...@@ -52,9 +55,10 @@ class complex_converter(omega_type_converter_extension, c_spec.complex_converter
pass pass
class unicode_converter(omega_type_converter_extension, c_spec.unicode_converter): class unicode_converter(omega_type_converter_extension, c_spec.unicode_converter):
def provides(self): pass
tvars = self.template_vars() # def provides(self):
return omega_type_converter_extension.provides() + [('int', 'N%(name)s' % tvars, 'PyUnicode_GET_SIZE(%(py_var)s)' % tvars)] # tvars = self.template_vars()
# return omega_type_converter_extension.provides() + [('int', 'N%(name)s' % tvars, 'PyUnicode_GET_SIZE(%(py_var)s)' % tvars)]
class string_converter(omega_type_converter_extension, c_spec.string_converter): class string_converter(omega_type_converter_extension, c_spec.string_converter):
pass pass
...@@ -75,39 +79,39 @@ class instance_converter(omega_type_converter_extension, c_spec.instance_convert ...@@ -75,39 +79,39 @@ class instance_converter(omega_type_converter_extension, c_spec.instance_convert
pass pass
class array_converter(omega_type_converter_extension, standard_array_spec.array_converter): class array_converter(omega_type_converter_extension, standard_array_spec.array_converter):
def provides(self): # def provides(self):
tvars = self.template_vars() # tvars = self.template_vars()
ret = [] # ret = []
ret.append((tvars['c_type'], tvars['array_name'], tvars['var_convert'])) # ret.append((tvars['c_type'], tvars['array_name'], tvars['var_convert']))
ret.append(('npy_intp*', 'N%(name)s' % tvars, '%(array_name)s->dimensions' % tvars)) # ret.append(('npy_intp*', 'N%(name)s' % tvars, '%(array_name)s->dimensions' % tvars))
ret.append(('npy_intp*', 'S%(name)s' % tvars, '%(array_name)s->strides' % tvars)) # ret.append(('npy_intp*', 'S%(name)s' % tvars, '%(array_name)s->strides' % tvars))
ret.append(('int', 'D%(name)s' % tvars, '%(array_name)s->nd' % tvars)) # ret.append(('int', 'D%(name)s' % tvars, '%(array_name)s->nd' % tvars))
ret.append(('%(num_type)s*' % tvars, '%(name)s' % tvars, '(%(num_type)s*) %(array_name)s->data' % tvars)) # ret.append(('%(num_type)s*' % tvars, '%(name)s' % tvars, '(%(num_type)s*) %(array_name)s->data' % tvars))
return ret # return ret
def declaration_code(self, templatize = 0, inline = 0): # def declaration_code(self, templatize = 0, inline = 0):
tvars = self.template_vars(inline=inline) # tvars = self.template_vars(inline=inline)
tvars['cap_name'] = self.name.upper() # tvars['cap_name'] = self.name.upper()
prov = self.provides() # prov = self.provides()
code = '%(py_var)s = %(var_lookup)s;\n' % tvars # code = '%(py_var)s = %(var_lookup)s;\n' % tvars
code += "\n".join(self.format_provide(export) for export in prov[:1]) # code += "\n".join(self.format_provide(export) for export in prov[:1])
code += '\nconversion_numpy_check_type(%(array_name)s,%(num_typecode)s,"%(name)s");\n' % tvars # code += '\nconversion_numpy_check_type(%(array_name)s,%(num_typecode)s,"%(name)s");\n' % tvars
code += "\n".join(self.format_provide(export) for export in prov[1:]) # code += "\n".join(self.format_provide(export) for export in prov[1:])
return code # return code
def struct_support_code(self, templatize = 0, inline = 0): # def struct_support_code(self, templatize = 0, inline = 0):
tvars = self.template_vars(inline=inline) # tvars = self.template_vars(inline=inline)
cap_name = self.name.upper() # cap_name = self.name.upper()
tvars['cap_name'] = cap_name # tvars['cap_name'] = cap_name
code = 'inline %(num_type)s& %(cap_name)s1(int i) { return (*((%(num_type)s*)(%(array_name)s->data + (i)*S%(name)s[0])));}\n' \ # code = 'inline %(num_type)s& %(cap_name)s1(int i) { return (*((%(num_type)s*)(%(array_name)s->data + (i)*S%(name)s[0])));}\n' \
'inline %(num_type)s& %(cap_name)s2(int i, int j) { return (*((%(num_type)s*)(%(array_name)s->data + (i)*S%(name)s[0] + (j)*S%(name)s[1])));}\n' \ # 'inline %(num_type)s& %(cap_name)s2(int i, int j) { return (*((%(num_type)s*)(%(array_name)s->data + (i)*S%(name)s[0] + (j)*S%(name)s[1])));}\n' \
'inline %(num_type)s& %(cap_name)s3(int i, int j, int k) { return (*((%(num_type)s*)(%(array_name)s->data + (i)*S%(name)s[0] + (j)*S%(name)s[1] + (k)*S%(name)s[2])));}\n' \ # 'inline %(num_type)s& %(cap_name)s3(int i, int j, int k) { return (*((%(num_type)s*)(%(array_name)s->data + (i)*S%(name)s[0] + (j)*S%(name)s[1] + (k)*S%(name)s[2])));}\n' \
'inline %(num_type)s& %(cap_name)s4(int i, int j, int k, int l) { return (*((%(num_type)s*)(%(array_name)s->data + (i)*S%(name)s[0] + (j)*S%(name)s[1] + (k)*S%(name)s[2] + (l)*S%(name)s[3])));}\n' # 'inline %(num_type)s& %(cap_name)s4(int i, int j, int k, int l) { return (*((%(num_type)s*)(%(array_name)s->data + (i)*S%(name)s[0] + (j)*S%(name)s[1] + (k)*S%(name)s[2] + (l)*S%(name)s[3])));}\n'
return code % tvars # return code % tvars
def struct_typedefs(self): def struct_typedefs(self):
tvars = self.template_vars() tvars = self.template_vars()
return omega_type_converter_extension.struct_typedefs(self) + "\n" + "typedef %(num_type)s %(name)s_dtype;" % tvars return "typedef %(num_type)s %(name)s_dtype;\n" % tvars
# return "\n".join(["typedef %s %s_type;" % (c_type, name)]) # return "\n".join(["typedef %s %s_type;" % (c_type, name)])
# def struct_template_types(self): # def struct_template_types(self):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论