Merge pull request #1271 from lamblin/arch_in_cache

Always put arch bitwidth in cache

Merge pull request #1271 from lamblin/arch_in_cache
816a83e3 · nouiz · 3805afa3 · 1d3ba7ec · 816a83e3 · 816a83e3
--- a/doc/library/config.txt
+++ b/doc/library/config.txt
@@ -364,7 +364,7 @@ import theano and print the config variable, as in:
 .. attribute:: compiledir_format
-    Default: "compiledir_%(platform)s-%(processor)s-%(python_version)s"
+    Default: "compiledir_%(platform)s-%(processor)s-%(python_version)s-%(python_bitwidth)s"
    This is a Python format string that specifies the subdirectory
    of ``config.base_compiledir`` in which to store platform-dependent

--- a/theano/compile/tests/test_pfunc.py
+++ b/theano/compile/tests/test_pfunc.py
@@ -548,7 +548,7 @@ class Test_pfunc(unittest.TestCase):
    def test_default_updates_input(self):
        x = shared(0)
        y = shared(1)
-        if theano.gof.cmodule.python_int_bitwidth() == 32:
+        if theano.gof.python_int_bitwidth() == 32:
            a = iscalar('a')
        else:
            a = lscalar('a')

--- a/theano/compile/tests/test_shared.py
+++ b/theano/compile/tests/test_shared.py
@@ -18,7 +18,7 @@ class Test_SharedVariable(unittest.TestCase):
            assert shared(7, dtype='float64').type == Scalar('float64')
        else:
-            if theano.gof.cmodule.python_int_bitwidth() == 32:
+            if theano.gof.python_int_bitwidth() == 32:
                assert shared(7).type == theano.tensor.iscalar, shared(7).type
            else:
                assert shared(7).type == theano.tensor.lscalar, shared(7).type

--- a/theano/gof/__init__.py
+++ b/theano/gof/__init__.py
@@ -38,7 +38,9 @@ e-mail thread "What is gof?"
 from theano.gof.cc import \
    CLinker, OpWiseCLinker, DualLinker
-import theano.gof.compiledir # adds config vars
+# Also adds config vars
+from theano.gof.compiledir import \
+    local_bitwidth, python_int_bitwidth
 from theano.gof.fg import \
    InconsistencyError, MissingInputError, FunctionGraph
@@ -77,4 +79,3 @@ from theano.gof.type import \
 from theano.gof.utils import \
    object2, MethodNotDefined
--- a/theano/gof/cmodule.py
+++ b/theano/gof/cmodule.py
@@ -8,7 +8,6 @@ import os
 import shutil
 import stat
 import StringIO
-import struct
 import subprocess
 import sys
 import tempfile
@@ -27,7 +26,7 @@ from theano.misc.windows import call_subprocess_Popen
 # we will abuse the lockfile mechanism when reading and writing the registry
 from theano.gof import compilelock
-from theano.gof.compiledir import gcc_version_str
+from theano.gof.compiledir import gcc_version_str, local_bitwidth
 from theano.configparser import AddConfigVar, BoolParam
@@ -55,29 +54,6 @@ AddConfigVar('cmodule.compilation_warning',
             BoolParam(False))
-def local_bitwidth():
-    """
-    Return 32 for 32bit arch, 64 for 64bit arch
-    By "architecture", we mean the size of memory pointers (size_t in C),
-    *not* the size of long int, as it can be different.
-    """
-    # Note that according to Python documentation, `platform.architecture()` is
-    # not reliable on OS X with universal binaries.
-    # Also, sys.maxsize does not exist in Python < 2.6.
-    # 'P' denotes a void*, and the size is expressed in bytes.
-    return struct.calcsize('P') * 8
-def python_int_bitwidth():
-    """
-    Return the bit width of Python int (C long int).
-    Note that it can be different from the size of a memory pointer.
-    """
-    # 'l' denotes a C long int, and the size is expressed in bytes.
-    return struct.calcsize('l') * 8
 _logger = logging.getLogger("theano.gof.cmodule")
 _logger.setLevel(logging.WARNING)
@@ -176,14 +152,14 @@ static struct PyModuleDef moduledef = {{
 }};
 """.format(name=self.name)
            print >> stream, "PyMODINIT_FUNC PyInit_%s(void) {" % self.name
-            for b in self.init_blocks:
+            for block in self.init_blocks:
-                print >> stream, '  ', b
+                print >> stream, '  ', block
            print >> stream, "    PyObject *m = PyModule_Create(&moduledef);"
            print >> stream, "    return m;"
        else:
            print >> stream, "PyMODINIT_FUNC init%s(void){" % self.name
-            for b in self.init_blocks:
+            for block in self.init_blocks:
-                print >> stream, '  ', b
+                print >> stream, '  ', block
            print >> stream, '  ', ('(void) Py_InitModule("%s", MyMethods);'
                                    % self.name)
        print >> stream, "}"
@@ -1564,7 +1540,8 @@ class GCC_compiler(object):
                    lines = stdout + stderr
                return lines
-            # The '-' at the end is needed. Otherwise, g++ do not output enough information.
+            # The '-' at the end is needed. Otherwise, g++ do not output
+            # enough information.
            native_lines = get_lines("g++ -march=native -E -v -")
            _logger.info("g++ -march=native selected lines: %s", native_lines)
            if len(native_lines) != 1:
@@ -1619,6 +1596,39 @@ class GCC_compiler(object):
            cxxflags.append("-D NPY_ARRAY_UPDATE_ALL=NPY_UPDATE_ALL")
            cxxflags.append("-D NPY_ARRAY_C_CONTIGUOUS=NPY_C_CONTIGUOUS")
            cxxflags.append("-D NPY_ARRAY_F_CONTIGUOUS=NPY_F_CONTIGUOUS")
+        # Platform-specific flags.
+        # We put them here, rather than in compile_str(), so they en up
+        # in the key of the compiled module, avoiding potential conflicts.
+        # Figure out whether the current Python executable is 32
+        # or 64 bit and compile accordingly.
+        n_bits = local_bitwidth()
+        cxxflags.append('-m%d' % n_bits)
+        _logger.debug("Compiling for %s bit architecture", n_bits)
+        if sys.platform != 'win32':
+            # Under Windows it looks like fPIC is useless. Compiler warning:
+            # '-fPIC ignored for target (all code is position independent)'
+            cxxflags.append('-fPIC')
+        if sys.platform == 'win32' and local_bitwidth() == 64:
+            # Under 64-bit Windows installation, sys.platform is 'win32'.
+            # We need to define MS_WIN64 for the preprocessor to be able to
+            # link with libpython.
+            cxxflags.append('-DMS_WIN64')
+        #DSE Patch 1 for supporting OSX frameworks; add -framework Python
+        if sys.platform == 'darwin':
+            cxxflags.extend(['-undefined', 'dynamic_lookup'])
+            python_inc = distutils.sysconfig.get_python_inc()
+            # link with the framework library *if specifically requested*
+            # config.mac_framework_link is by default False, since on some mac
+            # installs linking with -framework causes a Bus Error
+            if (python_inc.count('Python.framework') > 0 and
+                config.cmodule.mac_framework_link):
+                cxxflags.extend(['-framework', 'Python'])
        return cxxflags
    @staticmethod
@@ -1744,40 +1754,10 @@ class GCC_compiler(object):
        else:
            preargs = list(preargs)
-        if sys.platform != 'win32':
-            # Under Windows it looks like fPIC is useless. Compiler warning:
-            # '-fPIC ignored for target (all code is position independent)'
-            preargs.append('-fPIC')
-        if sys.platform == 'win32' and local_bitwidth() == 64:
-            # Under 64-bit Windows installation, sys.platform is 'win32'.
-            # We need to define MS_WIN64 for the preprocessor to be able to
-            # link with libpython.
-            preargs.append('-DMS_WIN64')
-            # We also add "-m64", in case the installed gcc is 32-bit
-            preargs.append('-m64')
        include_dirs = include_dirs + std_include_dirs()
        libs = std_libs() + libs
        lib_dirs = std_lib_dirs() + lib_dirs
-        #DSE Patch 1 for supporting OSX frameworks; add -framework Python
-        if sys.platform == 'darwin':
-            preargs.extend(['-undefined', 'dynamic_lookup'])
-            python_inc = distutils.sysconfig.get_python_inc()
-            # link with the framework library *if specifically requested*
-            # config.mac_framework_link is by default False, since on some mac
-            # installs linking with -framework causes a Bus Error
-            if (python_inc.count('Python.framework') > 0 and
-                config.cmodule.mac_framework_link):
-                preargs.extend(['-framework', 'Python'])
-            # Figure out whether the current Python executable is 32
-            # or 64 bit and compile accordingly.
-            n_bits = local_bitwidth()
-            preargs.extend(['-m%s' % n_bits])
-            _logger.debug("OS X: compiling for %s bit architecture", n_bits)
        # sometimes, the linker cannot find -lpython so we need to tell it
        # explicitly where it is located
        # this returns somepath/lib/python2.x

--- a/theano/gof/compiledir.py
+++ b/theano/gof/compiledir.py
@@ -4,6 +4,7 @@ import os
 import platform
 import re
 import shutil
+import struct
 import subprocess
 import sys
 import textwrap
@@ -32,16 +33,44 @@ except OSError:
 del p
 del dummy_err
-compiledir_format_dict = {"platform": platform.platform(),
-                          "processor": platform.processor(),
+def local_bitwidth():
-                          "python_version": platform.python_version(),
+    """
-                          "theano_version": theano.__version__,
+    Return 32 for 32bit arch, 64 for 64bit arch
-                          "numpy_version": numpy.__version__,
-                          "gxx_version": gcc_version_str.replace(" ", "_"),
+    By "architecture", we mean the size of memory pointers (size_t in C),
-                         }
+    *not* the size of long int, as it can be different.
+    """
+    # Note that according to Python documentation, `platform.architecture()` is
+    # not reliable on OS X with universal binaries.
+    # Also, sys.maxsize does not exist in Python < 2.6.
+    # 'P' denotes a void*, and the size is expressed in bytes.
+    return struct.calcsize('P') * 8
+def python_int_bitwidth():
+    """
+    Return the bit width of Python int (C long int).
+    Note that it can be different from the size of a memory pointer.
+    """
+    # 'l' denotes a C long int, and the size is expressed in bytes.
+    return struct.calcsize('l') * 8
+compiledir_format_dict = {
+        "platform": platform.platform(),
+        "processor": platform.processor(),
+        "python_version": platform.python_version(),
+        "python_bitwidth": local_bitwidth(),
+        "python_int_bitwidth": python_int_bitwidth(),
+        "theano_version": theano.__version__,
+        "numpy_version": numpy.__version__,
+        "gxx_version": gcc_version_str.replace(" ", "_"),
+        }
 compiledir_format_keys = ", ".join(sorted(compiledir_format_dict.keys()))
-default_compiledir_format =\
+default_compiledir_format = ("compiledir_%(platform)s-%(processor)s-"
-                    "compiledir_%(platform)s-%(processor)s-%(python_version)s"
+                             "%(python_version)s-%(python_bitwidth)s")
 AddConfigVar("compiledir_format",
             textwrap.fill(textwrap.dedent("""\

--- a/theano/gof/utils.py
+++ b/theano/gof/utils.py
--- a/theano/sandbox/cuda/nvcc_compiler.py
+++ b/theano/sandbox/cuda/nvcc_compiler.py
@@ -9,11 +9,11 @@ import warnings
 import numpy
-import theano
+from theano.gof import local_bitwidth
 from theano.gof.cc import hash_from_file
 from theano.gof.cmodule import (std_libs, std_lib_dirs,
                                std_include_dirs, dlimport,
-                                get_lib_extension, local_bitwidth)
+                                get_lib_extension)
 from theano.gof.python25 import any
 from theano.misc.windows import call_subprocess_Popen
@@ -245,8 +245,6 @@ class NVCC_compiler(object):
        cppfile = file(cppfilename, 'w')
        _logger.debug('Writing module C++ code to %s', cppfilename)
-        ofiles = []
-        rval = None
        cppfile.write(src_code)
        cppfile.close()

--- a/theano/tensor/extra_ops.py
+++ b/theano/tensor/extra_ops.py
@@ -114,12 +114,12 @@ class BinCountOp(theano.Op):
        # Some dtypes are not supported by numpy's implementation of bincount.
        # Until another one is available, we should fail at graph construction
        # time, not wait for execution.
-        int_bitwidth = theano.gof.cmodule.python_int_bitwidth()
+        int_bitwidth = theano.gof.python_int_bitwidth()
        if int_bitwidth == 64:
            numpy_unsupported_dtypes = ('uint64',)
        if int_bitwidth == 32:
            numpy_unsupported_dtypes = ('uint32', 'int64', 'uint64')
-        intp_bitwidth = theano.gof.cmodule.local_bitwidth()
+        intp_bitwidth = theano.gof.local_bitwidth()
        if intp_bitwidth == 32:
            out_type = basic.ivector()
        elif intp_bitwidth == 64:
@@ -246,7 +246,7 @@ class RepeatOp(theano.Op):
        # Some dtypes are not supported by numpy's implementation of repeat.
        # Until another one is available, we should fail at graph construction
        # time, not wait for execution.
-        int_bitwidth = theano.gof.cmodule.python_int_bitwidth()
+        int_bitwidth = theano.gof.python_int_bitwidth()
        if int_bitwidth == 64:
            numpy_unsupported_dtypes = ('uint64',)
        if int_bitwidth == 32:
@@ -259,7 +259,7 @@ class RepeatOp(theano.Op):
                     % numpy_unsupported_dtypes), repeats.dtype)
        if self.axis is None:
-            broadcastable=[False]
+            broadcastable = [False]
        else:
            try:
                const_reps = basic.get_scalar_constant_value(repeats)

--- a/theano/tensor/tests/test_extra_ops.py
+++ b/theano/tensor/tests/test_extra_ops.py
@@ -13,6 +13,7 @@ from theano import config, tensor, function
 numpy_ver = [int(n) for n in numpy.__version__.split('.')[:2]]
 numpy_16 = bool(numpy_ver >= [1, 6])
 class TestBinCountOp(utt.InferShapeTester):
    def setUp(self):
        super(TestBinCountOp, self).setUp()
@@ -25,7 +26,7 @@ class TestBinCountOp(utt.InferShapeTester):
                      'uint8', 'uint16', 'uint32', 'uint64'):
            # uint64 always fails
            # int64 and uint32 also fail if python int are 32-bit
-            int_bitwidth = theano.gof.cmodule.python_int_bitwidth()
+            int_bitwidth = theano.gof.python_int_bitwidth()
            if int_bitwidth == 64:
                numpy_unsupported_dtypes = ('uint64',)
            if int_bitwidth == 32:
@@ -57,7 +58,7 @@ class TestBinCountOp(utt.InferShapeTester):
        for dtype in tensor.discrete_dtypes:
            # uint64 always fails
            # int64 and uint32 also fail if python int are 32-bit
-            int_bitwidth = theano.gof.cmodule.python_int_bitwidth()
+            int_bitwidth = theano.gof.python_int_bitwidth()
            if int_bitwidth == 64:
                numpy_unsupported_dtypes = ('uint64',)
            if int_bitwidth == 32:
@@ -188,7 +189,6 @@ class SqueezeTester(utt.InferShapeTester):
    def test_grad(self):
        for shape, broadcast in zip(self.shape_list, self.broadcast_list):
            data = numpy.random.random(size=shape).astype(theano.config.floatX)
-            variable = tensor.TensorType(theano.config.floatX, broadcast)()
            utt.verify_grad(self.op, [data])
@@ -203,7 +203,7 @@ class TestRepeatOp(utt.InferShapeTester):
        self.op = RepeatOp()
        # uint64 always fails
        # int64 and uint32 also fail if python int are 32-bit
-        int_bitwidth = theano.gof.cmodule.python_int_bitwidth()
+        int_bitwidth = theano.gof.python_int_bitwidth()
        if int_bitwidth == 64:
            self.numpy_unsupported_dtypes = ('uint64',)
        if int_bitwidth == 32:
@@ -287,11 +287,12 @@ class TestRepeatOp(utt.InferShapeTester):
        x = T.TensorType(config.floatX, [False, True, False])()
        r = RepeatOp(axis=1)(x, 2)
        self.assertEqual(r.broadcastable, (False, False, False))
-        r =  RepeatOp(axis=1)(x, 1)
+        r = RepeatOp(axis=1)(x, 1)
        self.assertEqual(r.broadcastable, (False, True, False))
-        r =  RepeatOp(axis=0)(x, 2)
+        r = RepeatOp(axis=0)(x, 2)
        self.assertEqual(r.broadcastable, (False, True, False))
 class TestBartlett(utt.InferShapeTester):
    def setUp(self):