merge

f9b8b7ab · bergstrj@iro.umontreal.ca · 5c5a3d60 · 76cc864a · f9b8b7ab · f9b8b7ab
--- a/blas.py
+++ b/blas.py
@@ -87,10 +87,10 @@ _gemm_code = { 'f': _gemm_code_template % { 'gemm':'cblas_sgemm', 'dtype':'float
                'd': _gemm_code_template % { 'gemm':'cblas_dgemm', 'dtype':'double'}}
 def _gemm_rank2(a, x, y, b, z):
-    weave.inline(_gemm_code[z.dtype.char], 
+    weave.inline(_gemm_code[z.dtype.char],
-            ['a', 'x', 'y', 'b', 'z'], 
+            ['a', 'x', 'y', 'b', 'z'],
-            headers=['<gsl/gsl_cblas.h>'],
+            headers=['"/home/bergstra/cvs/lgcm/omega/cblas.h"'],
-            libraries=['cblas','goto', 'g2c'])
+            libraries=['mkl', 'm'])
 def _gemm(a, x, y, b, z):
    if len(x.shape) == 2 and len(y.shape) == 2:

--- a/core.py
+++ b/core.py
+import os # for building the location of the .omega/omega_compiled cache directory
+import sys # for adding the inline code cache to the include path
 import os
 import sys
@@ -40,8 +42,22 @@ literals_db = {}
 literals_id_db = weakref.WeakValueDictionary()
 #input floating point scalars will be cast to arrays of this type
+# see TRAC(#31)
 default_input_scalar_dtype = 'float64'
+# BLAS Support
+# These should be used by dependent modules to link blas functions.
+# - used by dot(), gemm()
+_blas_headers = ['"/home/bergstra/cvs/lgcm/omega/cblas.h"']
+_blas_libs = ['mkl', 'm']
+# WEAVE CACHE
+#_home_omega = os.path.join(os.getenv('HOME'), '.omega')
+_home_omega = os.path.join('/home/bergstra/.omega')
+_compiled = 'omega_compiled'
+_home_omega_compiled = os.path.join(_home_omega, _compiled)
+sys.path.append(_home_omega)  # J - is this a good idea??
 def input(x):
    #NB:
    # - automatically casting int to float seems wrong.
@@ -225,14 +241,14 @@ class omega_op(gof.PythonOp):
    def _c_impl(self):
        return self.c_impl(self.inputs, self.outputs)
    def c_impl(inputs, outputs):
        raise NotImplementedError()
    def c_thunk_factory(self):
        self.refresh()
        d, names, code, struct, converters = self.c_code()
        cthunk = object()
        module_name = md5.md5(code).hexdigest()
        mod = weave.ext_tools.ext_module(module_name)
@@ -244,29 +260,25 @@ class omega_op(gof.PythonOp):
                                                   type_converters = converters)
        instantiate.customize.add_support_code(self.c_support_code() + struct)
        instantiate.customize.add_extra_compile_arg("-O3")
-        instantiate.customize.add_extra_compile_arg("-ffast-math")
+        instantiate.customize.add_extra_compile_arg("-ffast-math") #TODO: make this optional, say by passing args to c_thunk_factory?
        instantiate.customize.add_extra_compile_arg("-falign-loops=4")
 #        instantiate.customize.add_extra_compile_arg("-mfpmath=sse")
        for header in self.c_headers():
            instantiate.customize.add_header(header)
        for lib in self.c_libs():
            instantiate.customize.add_library(lib)
        mod.add_function(instantiate)
-        module_dir = os.path.expanduser('~/.omega/compiled')
+        mod.compile(location = _home_omega_compiled)
-        sys.path.insert(0, module_dir)
+        module = __import__("%s.%s" % (_compiled, module_name), {}, {}, [module_name])
-        mod.compile(location = module_dir)
-        module = __import__("%s" % module_name) #, {}, {}, [module_name])
-        sys.path = sys.path[1:]
        def creator():
            return module.instantiate(*[x.data for x in self.inputs + self.outputs])
        return creator
    def c_thunk(self):
        return self.c_thunk_creator()
    def c_perform(self):
        thunk = self.c_thunk()
        cutils.run_cthunk(thunk)
@@ -287,7 +299,7 @@ def elemwise_loopcode(loopcode, init_template, next_template, acquire_template,
                             for v1, v2 in aliases.items()]),
        loopcode = loopcode
        )
    code = """
    %(init)s
    while (__elemwise_size--) {
@@ -381,8 +393,8 @@ class elemwise(omega_op):
            for oname in onames:
                if oname not in lonames:
                    raise Exception("cannot infer a specification automatically for variable " \
-                                    "%s because it is not part of the elementwise loop - "\
+                                    "%s.%s because it is not part of the elementwise loop - "\
-                                    "please override the specs method" % oname)
+                                    "please override the specs method" % (self.__class__.__name__, oname))
            shape, dtype = None, None
            for iname, input in zip(inames, self.inputs):
                if iname in linames:
@@ -855,7 +867,7 @@ class blas_code :
            npy_intp* Nx = _x->dimensions;
            npy_intp* Ny = _y->dimensions;
            npy_intp* Nz = _z->dimensions;
            npy_intp* Sx = _x->strides;
            npy_intp* Sy = _y->strides;
            npy_intp* Sz = _z->strides;
@@ -867,7 +879,7 @@ class blas_code :
            if (_x->nd != 2) goto _dot_execute_fallback;
            if (_y->nd != 2) goto _dot_execute_fallback;
            if (_z->nd != 2) goto _dot_execute_fallback;
            if ((_x->descr->type_num != PyArray_DOUBLE) 
                && (_x->descr->type_num != PyArray_FLOAT))
                goto _dot_execute_fallback;
@@ -884,7 +896,7 @@ class blas_code :
                ||(_x->descr->type_num != _z->descr->type_num))
                goto _dot_execute_fallback;
            if ((Nx[0] != Nz[0]) || (Nx[1] != Ny[0]) || (Ny[1] != Nz[1]))
            {
                error_string = "Input dimensions do not agree";
@@ -905,7 +917,7 @@ class blas_code :
            unit |= ((Sz[1] == type_size) ? 0x0 : (Sz[0] == type_size) ? 0x1 : 0x2) << 8;
            /* create appropriate strides for malformed matrices that are row or column
-             * vectors 
+             * vectors
             */
            sx_0 = (Nx[0] > 1) ? Sx[0]/type_size : Nx[1];
            sx_1 = (Nx[1] > 1) ? Sx[1]/type_size : Nx[0];
@@ -1054,11 +1066,13 @@ class gemm(omega_op, inplace):
    def alloc(self, except_list):
        self.outputs[0].data = self.inputs[0].data
    def c_headers(self):
-        return ["<gsl/gsl_cblas.h>"]
+        return _blas_headers
    def c_libs(self):
-        return ["cblas", "atlas", "g2c"]
+        return _blas_libs
    def c_impl((_zin, _a, _x, _y, _b), (_z,)):
-        return blas_code.gemm_xyz(str(_a), str(_b))
+        return blas_code.gemm_xyz(
+                '((_a->descr->type_num == PyArray_FLOAT) ? (float*)_a->data : (double*)_a->data)[0]',
+                '((_b->descr->type_num == PyArray_FLOAT) ? (float*)_b->data : (double*)_b->data)[0]')
 ## Transposition ##

--- a/opt.py
+++ b/opt.py
@@ -9,14 +9,13 @@ This variable is used in compile.prog as the optimizer for all programs built
 using either compile.single, compile.to_func, and compile.prog.
 """
 def optimizer(lst):
    begin = gof.SeqOptimizer([])
    end   = gof.SeqOptimizer([gof.DummyRemover])
    seq_opt = gof.SeqOptimizer(begin + lst + end)
    return gof.PythonOpt(gof.MergeOptMerge(seq_opt))
 if 0:
    optimizer_begin = gof.SeqOptimizer([opt for name, opt in [
             ['double_transpose_eliminator', pattern_opt((transpose, (transpose, 'x')), 'x')],
@@ -28,7 +27,7 @@ if 0:
             ['mulxx_to_sqr',                pattern_opt((mul_elemwise, 'x', 'x'), (sqr, 'x'))],
             ['sqr_to_isqr',                 op_sub(sqr, isqr)],
             ['add_to_iadd',                 op_sub(add_elemwise, iadd_elemwise)],
             ['add_to_iadd_reverse',         pattern_opt((add_elemwise, 'x', 'y'),