make {nvcc,gcc}_module_compile_str a class with another function compile_args…

make {nvcc,gcc}_module_compile_str a class with another function compile_args that get added in the keys.

make {nvcc,gcc}_module_compile_str a class with another function compile_args…
cff27c13 · Frederic · 2f2b424a · cff27c13 · cff27c13 · cff27c13
--- a/theano/gof/cc.py
+++ b/theano/gof/cc.py
@@ -622,6 +622,10 @@ class CLinker(link.Linker):
        for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
            try: ret += x.c_compile_args()
            except utils.MethodNotDefined: pass
+
+        c_compiler = self.c_compiler()
+        ret += c_compiler.compile_args()
+
        ret=list(set(ret))#to remove duplicate
        for x in [y.type for y in self.variables] + [y.op for y in self.node_order]:
            try:
@@ -661,7 +665,7 @@ class CLinker(link.Linker):
                    raise Exception('Nodes have requested specific different compilers',
                            (c_compiler, x_compiler))
        if (c_compiler is None):
-            return cmodule.gcc_module_compile_str
+            return cmodule.GCC_compiler
        else: return c_compiler

    def header_dirs(self):
@@ -1007,7 +1011,7 @@ class CLinker(link.Linker):
        libs = self.libraries()
        preargs = self.compile_args()
        compiler_name = c_compiler.__name__
-        if compiler_name == 'nvcc_module_compile_str' and config.lib.amdlibm:
+        if compiler_name == 'NVCC_compiler' and config.lib.amdlibm:
            # This lib does not work correctly with nvcc in device code.
            # and newer version of g++ as 4.5.1.
            # example of errors: "/usr/lib/gcc/x86_64-redhat-linux/4.5.1/include/mmintrin.h(49): error: identifier "__builtin_ia32_emms" is undefined"
@@ -1024,7 +1028,7 @@ class CLinker(link.Linker):
        try:
            _logger.debug("LOCATION %s", str(location))
            try:
-                module = c_compiler(
+                module = c_compiler.compile_str(
                    module_name=mod.name,
                    src_code=src_code,
                    location=location,

--- a/theano/gof/cmodule.py
+++ b/theano/gof/cmodule.py
@@ -1312,140 +1312,147 @@ def gcc_version():
    return gcc_version_str


-def gcc_module_compile_str(module_name, src_code, location=None,
-                           include_dirs=[], lib_dirs=[], libs=[], preargs=[]):
-    """
-    :param module_name: string (this has been embedded in the src_code
-
-    :param src_code: a complete c or c++ source listing for the module
-
-    :param location: a pre-existing filesystem directory where the cpp file and
-    .so will be written
+class GCC_compiler():
+    @staticmethod
+    def compile_args():
+        return []
+
+    @staticmethod
+    def compile_str(module_name, src_code, location=None,
+                    include_dirs=[], lib_dirs=[], libs=[], preargs=[]):
+        """
+        :param module_name: string (this has been embedded in the src_code

-    :param include_dirs: a list of include directory names (each gets prefixed
-    with -I)
+        :param src_code: a complete c or c++ source listing for the module

-    :param lib_dirs: a list of library search path directory names (each gets
-    prefixed with -L)
+        :param location: a pre-existing filesystem directory where the
+        cpp file and .so will be written

-    :param libs: a list of libraries to link with (each gets prefixed with -l)
+        :param include_dirs: a list of include directory names (each
+        gets prefixed with -I)

-    :param preargs: a list of extra compiler arguments
+        :param lib_dirs: a list of library search path directory names
+        (each gets prefixed with -L)

-    :returns: dynamically-imported python module of the compiled code.
-    """
-    #TODO: Do not do the dlimport in this function
+        :param libs: a list of libraries to link with (each gets
+        prefixed with -l)

-    if preargs is None:
-        preargs = []
-    else:
-        preargs = list(preargs)
+        :param preargs: a list of extra compiler arguments

-    if sys.platform != 'win32':
-        # Under Windows it looks like fPIC is useless. Compiler warning:
-        # '-fPIC ignored for target (all code is position independent)'
-        preargs.append('-fPIC')
-    no_opt = False
+        :returns: dynamically-imported python module of the compiled code.
+        """
+        #TODO: Do not do the dlimport in this function

-    include_dirs = include_dirs + std_include_dirs()
-    libs = std_libs() + libs
-    lib_dirs = std_lib_dirs() + lib_dirs
+        if preargs is None:
+            preargs = []
+        else:
+            preargs = list(preargs)
+
+        if sys.platform != 'win32':
+            # Under Windows it looks like fPIC is useless. Compiler warning:
+            # '-fPIC ignored for target (all code is position independent)'
+            preargs.append('-fPIC')
+        no_opt = False
+
+        include_dirs = include_dirs + std_include_dirs()
+        libs = std_libs() + libs
+        lib_dirs = std_lib_dirs() + lib_dirs
+
+        #DSE Patch 1 for supporting OSX frameworks; add -framework Python
+        if sys.platform == 'darwin':
+            preargs.extend(['-undefined', 'dynamic_lookup'])
+            python_inc = distutils.sysconfig.get_python_inc()
+            # link with the framework library *if specifically requested*
+            # config.mac_framework_link is by default False, since on some mac
+            # installs linking with -framework causes a Bus Error
+            if (python_inc.count('Python.framework') > 0 and
+                config.cmodule.mac_framework_link):
+                preargs.extend(['-framework', 'Python'])
+
+            # Figure out whether the current Python executable is 32
+            # or 64 bit and compile accordingly.
+            n_bits = local_bitwidth()
+            preargs.extend(['-m%s' % n_bits])
+            _logger.debug("OS X: compiling for %s bit architecture", n_bits)
+
+        # sometimes, the linker cannot find -lpython so we need to tell it
+        # explicitly where it is located
+        # this returns somepath/lib/python2.x
+        python_lib = distutils.sysconfig.get_python_lib(plat_specific=1, \
+                        standard_lib=1)
+        python_lib = os.path.dirname(python_lib)
+        if python_lib not in lib_dirs:
+            lib_dirs.append(python_lib)
+
+        workdir = location
+
+        cppfilename = os.path.join(location, 'mod.cpp')
+        cppfile = file(cppfilename, 'w')
+
+        _logger.debug('Writing module C++ code to %s', cppfilename)
+        ofiles = []
+        rval = None

-    #DSE Patch 1 for supporting OSX frameworks; add -framework Python
-    if sys.platform == 'darwin':
-        preargs.extend(['-undefined', 'dynamic_lookup'])
-        python_inc = distutils.sysconfig.get_python_inc()
-        # link with the framework library *if specifically requested*
-        # config.mac_framework_link is by default False, since on some mac
-        # installs linking with -framework causes a Bus Error
-        if (python_inc.count('Python.framework') > 0 and
-            config.cmodule.mac_framework_link):
-            preargs.extend(['-framework', 'Python'])
-
-        # Figure out whether the current Python executable is 32 or 64 bit and
-        # compile accordingly.
-        n_bits = local_bitwidth()
-        preargs.extend(['-m%s' % n_bits])
-        _logger.debug("OS X: compiling for %s bit architecture", n_bits)
-
-    # sometimes, the linker cannot find -lpython so we need to tell it
-    # explicitly where it is located
-    # this returns somepath/lib/python2.x
-    python_lib = distutils.sysconfig.get_python_lib(plat_specific=1, \
-                    standard_lib=1)
-    python_lib = os.path.dirname(python_lib)
-    if python_lib not in lib_dirs:
-        lib_dirs.append(python_lib)
-
-    workdir = location
-
-    cppfilename = os.path.join(location, 'mod.cpp')
-    cppfile = file(cppfilename, 'w')
-
-    _logger.debug('Writing module C++ code to %s', cppfilename)
-    ofiles = []
-    rval = None
+        cppfile.write(src_code)
+        # Avoid gcc warning "no newline at end of file".
+        if not src_code.endswith('\n'):
+            cppfile.write('\n')
+        cppfile.close()

-    cppfile.write(src_code)
-    # Avoid gcc warning "no newline at end of file".
-    if not src_code.endswith('\n'):
-        cppfile.write('\n')
-    cppfile.close()
+        lib_filename = os.path.join(location, '%s.%s' %
+                (module_name, get_lib_extension()))

-    lib_filename = os.path.join(location, '%s.%s' %
-            (module_name, get_lib_extension()))
+        _logger.debug('Generating shared lib %s', lib_filename)
+        cmd = ['g++', get_gcc_shared_library_arg(), '-g']
+        if no_opt:
+            cmd.extend(p for p in preargs if not p.startswith('-O'))
+        else:
+            cmd.extend(preargs)
+        cxxflags = [flag for flag in config.gcc.cxxflags.split(' ') if flag]
+        #print >> sys.stderr, config.gcc.cxxflags.split(' ')
+        cmd.extend(cxxflags)
+        cmd.extend('-I%s' % idir for idir in include_dirs)
+        cmd.extend(['-o', lib_filename])
+        cmd.append(cppfilename)
+        cmd.extend(['-L%s' % ldir for ldir in lib_dirs])
+        cmd.extend(['-l%s' % l for l in libs])
+        #print >> sys.stderr, 'COMPILING W CMD', cmd
+        _logger.debug('Running cmd: %s', ' '.join(cmd))
+
+        def print_command_line_error():
+            # Print command line when a problem occurred.
+            print >> sys.stderr, ("Problem occurred during compilation with the "
+                                  "command line below:")
+            print >> sys.stderr, ' '.join(cmd)

-    _logger.debug('Generating shared lib %s', lib_filename)
-    cmd = ['g++', get_gcc_shared_library_arg(), '-g']
-    if no_opt:
-        cmd.extend(p for p in preargs if not p.startswith('-O'))
-    else:
-        cmd.extend(preargs)
-    cxxflags = [flag for flag in config.gcc.cxxflags.split(' ') if flag]
-    #print >> sys.stderr, config.gcc.cxxflags.split(' ')
-    cmd.extend(cxxflags)
-    cmd.extend('-I%s' % idir for idir in include_dirs)
-    cmd.extend(['-o', lib_filename])
-    cmd.append(cppfilename)
-    cmd.extend(['-L%s' % ldir for ldir in lib_dirs])
-    cmd.extend(['-l%s' % l for l in libs])
-    #print >> sys.stderr, 'COMPILING W CMD', cmd
-    _logger.debug('Running cmd: %s', ' '.join(cmd))
-
-    def print_command_line_error():
-        # Print command line when a problem occurred.
-        print >> sys.stderr, ("Problem occurred during compilation with the "
-                              "command line below:")
-        print >> sys.stderr, ' '.join(cmd)
+        try:
+            p = subprocess.Popen(cmd, stderr=subprocess.PIPE)
+            compile_stderr = p.communicate()[1]
+        except Exception:
+            # An exception can occur e.g. if `g++` is not found.
+            print_command_line_error()
+            raise

-    try:
-        p = subprocess.Popen(cmd, stderr=subprocess.PIPE)
-        compile_stderr = p.communicate()[1]
-    except Exception:
-        # An exception can occur e.g. if `g++` is not found.
-        print_command_line_error()
-        raise
-
-    status = p.returncode
-
-    if status:
-        print '==============================='
-        for i, l in enumerate(src_code.split('\n')):
-            #gcc put its messages to stderr, so we add ours now
-            print >> sys.stderr, '%05i\t%s' % (i + 1, l)
-        print '==============================='
-        print_command_line_error()
-        # Print errors just below the command line.
-        print compile_stderr
-        # We replace '\n' by '. ' in the error message because when Python
-        # prints the exception, having '\n' in the text makes it more difficult
-        # to read.
-        raise Exception('Compilation failed (return status=%s): %s' %
-                        (status, compile_stderr.replace('\n', '. ')))
-
-    #touch the __init__ file
-    file(os.path.join(location, "__init__.py"), 'w').close()
-    return dlimport(lib_filename)
+        status = p.returncode
+
+        if status:
+            print '==============================='
+            for i, l in enumerate(src_code.split('\n')):
+                #gcc put its messages to stderr, so we add ours now
+                print >> sys.stderr, '%05i\t%s' % (i + 1, l)
+            print '==============================='
+            print_command_line_error()
+            # Print errors just below the command line.
+            print compile_stderr
+            # We replace '\n' by '. ' in the error message because when Python
+            # prints the exception, having '\n' in the text makes it more difficult
+            # to read.
+            raise Exception('Compilation failed (return status=%s): %s' %
+                            (status, compile_stderr.replace('\n', '. ')))
+
+        #touch the __init__ file
+        file(os.path.join(location, "__init__.py"), 'w').close()
+        return dlimport(lib_filename)


 def icc_module_compile_str(*args):

--- a/theano/gof/cutils.py
+++ b/theano/gof/cutils.py
@@ -70,7 +70,7 @@ except ImportError:
            if not os.path.exists(loc):
                os.mkdir(loc)

-            cmodule.gcc_module_compile_str('cutils_ext', code, location=loc)
+            cmodule.GCC_compiler.compile_str('cutils_ext', code, location=loc)
            from cutils_ext.cutils_ext import *

    finally:

--- a/theano/gof/lazylinker_c.py
+++ b/theano/gof/lazylinker_c.py
@@ -53,7 +53,7 @@ except ImportError:
            loc = os.path.join(config.compiledir, dirname)
            if not os.path.exists(loc):
                os.mkdir(loc)
-            cmodule.gcc_module_compile_str(dirname, code, location=loc)
+            cmodule.GCC_compiler.compile_str(dirname, code, location=loc)
            # Save version into the __init__.py file.
            init_py = os.path.join(loc, '__init__.py')
            open(init_py, 'w').write('_version = %s\n' % version)

--- a/theano/sandbox/cuda/__init__.py
+++ b/theano/sandbox/cuda/__init__.py
@@ -87,7 +87,7 @@ libcuda_ndarray_so = os.path.join(cuda_ndarray_loc,
 # Add the theano cache directory's cuda_ndarray subdirectory to the
 # list of places that are hard-coded into compiled modules' runtime
 # library search list.  This works in conjunction with
-# nvcc_compiler.nvcc_module_compile_str which adds this folder during
+# nvcc_compiler.NVCC_compiler.compile_str which adds this folder during
 # compilation with -L and also adds -lcuda_ndarray when compiling
 # modules.
 nvcc_compiler.add_standard_rpath(cuda_ndarray_loc)
@@ -117,7 +117,8 @@ try:
            if not os.path.exists(cuda_ndarray_loc):
                os.makedirs(cuda_ndarray_loc)

-            nvcc_compiler.nvcc_module_compile_str(
+            compiler = nvcc_compiler.NVCC_compiler()
+            compiler.compile_str(
                    'cuda_ndarray',
                    code,
                    location=cuda_ndarray_loc,
@@ -130,7 +131,7 @@ except Exception, e:
 if cuda_available:
    # If necessary,
    # create a symlink called libcuda_ndarray.so
-    # which nvcc_module_compile_str uses when linking
+    # which nvcc_compiler.NVCC_compiler uses when linking
    # any module except "cuda_ndarray" itself.
    try:
        open(libcuda_ndarray_so).close()

--- a/theano/sandbox/cuda/nvcc_compiler.py
+++ b/theano/sandbox/cuda/nvcc_compiler.py
@@ -72,210 +72,226 @@ rpath_defaults = []
 def add_standard_rpath(rpath):
    rpath_defaults.append(rpath)

-def nvcc_module_compile_str(
-        module_name, src_code,
-        location=None, include_dirs=[], lib_dirs=[], libs=[], preargs=[],
-        rpaths=rpath_defaults):
-    """
-    :param module_name: string (this has been embedded in the src_code
-    :param src_code: a complete c or c++ source listing for the module
-    :param location: a pre-existing filesystem directory where the cpp file and .so will be written
-    :param include_dirs: a list of include directory names (each gets prefixed with -I)
-    :param lib_dirs: a list of library search path directory names (each gets prefixed with -L)
-    :param libs: a list of libraries to link with (each gets prefixed with -l)
-    :param preargs: a list of extra compiler arguments
-    :param rpaths: list of rpaths to use with Xlinker. Defaults to `rpath_defaults`.
-
-    :returns: dynamically-imported python module of the compiled code.
-
-    :note 1: On Windows 7 with nvcc 3.1 we need to compile in the real directory
-             Otherwise nvcc never finish.
-    """

-    rpaths = list(rpaths)
-
-    if sys.platform=="win32":
-        # Remove some compilation args that cl.exe does not understand.
-        # cl.exe is the compiler used by nvcc on Windows.
-        for a in ["-Wno-write-strings","-Wno-unused-label",
-                  "-Wno-unused-variable", "-fno-math-errno"]:
-            if a in preargs:
-                preargs.remove(a)
-    if preargs is None:
-        preargs= []
-    else: preargs = list(preargs)
-    if sys.platform!='win32':
-        preargs.append('-fPIC')
-    no_opt = False
-    cuda_root = config.cuda.root
-
-    #The include dirs gived by the user should have precedence over
-    #the standards ones.
-    include_dirs = include_dirs + std_include_dirs()
-    if os.path.abspath(os.path.split(__file__)[0]) not in include_dirs:
-        include_dirs.append(os.path.abspath(os.path.split(__file__)[0]))
-
-    libs = std_libs() + libs
-    if 'cudart' not in libs:
-        libs.append('cudart')
-
-    lib_dirs = std_lib_dirs() + lib_dirs
-    if cuda_root:
-        lib_dirs.append(os.path.join(cuda_root, 'lib'))
-
-        # from Benjamin Schrauwen April 14 2010
-        if sys.platform != 'darwin':
-            # No 64 bit CUDA libraries available on the mac, yet..
-            lib_dirs.append(os.path.join(cuda_root, 'lib64'))
-
-
-    if sys.platform == 'darwin':
-        # On the mac, nvcc is not able to link using -framework Python, so we have
-        # manually add the correct library and paths
-        darwin_python_lib = commands.getoutput('python-config --ldflags')
-    else:
-        # sometimes, the linker cannot find -lpython so we need to tell it
-        # explicitly where it is located
-        # this returns somepath/lib/python2.x
-        python_lib = distutils.sysconfig.get_python_lib(plat_specific=1, \
-                        standard_lib=1)
-        python_lib = os.path.dirname(python_lib)
-        if python_lib not in lib_dirs:
-            lib_dirs.append(python_lib)
-
-    cppfilename = os.path.join(location, 'mod.cu')
-    cppfile = file(cppfilename, 'w')
-
-    _logger.debug('Writing module C++ code to %s', cppfilename)
-    ofiles = []
-    rval = None
-
-    cppfile.write(src_code)
-    cppfile.close()
-    lib_filename = os.path.join(location, '%s.%s' %
-            (module_name, get_lib_extension()))
-
-    _logger.debug('Generating shared lib %s', lib_filename)
-    # TODO: Why do these args cause failure on gtx285 that has 1.3 compute capability? '--gpu-architecture=compute_13', '--gpu-code=compute_13',
-    preargs1=[pa for pa in preargs if pa.startswith('-O') or pa.startswith('--maxrregcount=')]#nvcc argument
-    preargs2=[pa for pa in preargs if pa not in preargs1]#other arguments
-
-    cmd = [nvcc_path, '-shared', '-g'] + preargs1
-    if config.nvcc.compiler_bindir:
-        cmd.extend(['--compiler-bindir', config.nvcc.compiler_bindir])
-
-    if sys.platform == 'win32':
-        # add flags for Microsoft compiler to create .pdb files
-        preargs2.append('/Zi')
-        cmd.extend(['-Xlinker', '/DEBUG'])
-
-    if sys.platform != 'win32':
-        if local_bitwidth() == 64:
-            cmd.append('-m64')
-            preargs2.append('-m64')
+class NVCC_compiler():
+    @staticmethod
+    def compile_args():
+        """
+        This args will be received by compile_str() in the preargs paramter.
+        They will also be included in the "hard" part of the key module.
+        """
+        return []
+#        flags = [flag for flag in config.nvcc.flags.split(' ') if flag]
+#        cuda_ndarray_cuh_hash = hash_from_file(
+#            os.path.join(os.path.split(__file__)[0], 'cuda_ndarray.cuh'))
+#        cuda_macro = '-DCUDA_NDARRAY_CUH=' + cuda_ndarray_cuh_hash
+#        return [cuda_macro]
+
+    @staticmethod
+    def compile_str(
+            module_name, src_code,
+            location=None, include_dirs=[], lib_dirs=[], libs=[], preargs=[],
+            rpaths=rpath_defaults):
+        """
+        :param module_name: string (this has been embedded in the src_code
+        :param src_code: a complete c or c++ source listing for the module
+        :param location: a pre-existing filesystem directory where the cpp file and .so will be written
+        :param include_dirs: a list of include directory names (each gets prefixed with -I)
+        :param lib_dirs: a list of library search path directory names (each gets prefixed with -L)
+        :param libs: a list of libraries to link with (each gets prefixed with -l)
+        :param preargs: a list of extra compiler arguments
+        :param rpaths: list of rpaths to use with Xlinker. Defaults to `rpath_defaults`.
+
+        :returns: dynamically-imported python module of the compiled code.
+
+        :note 1: On Windows 7 with nvcc 3.1 we need to compile in the real directory
+                 Otherwise nvcc never finish.
+        """
+
+        rpaths = list(rpaths)
+
+        if sys.platform=="win32":
+            # Remove some compilation args that cl.exe does not understand.
+            # cl.exe is the compiler used by nvcc on Windows.
+            for a in ["-Wno-write-strings","-Wno-unused-label",
+                      "-Wno-unused-variable", "-fno-math-errno"]:
+                if a in preargs:
+                    preargs.remove(a)
+        if preargs is None:
+            preargs= []
+        else: preargs = list(preargs)
+        if sys.platform!='win32':
+            preargs.append('-fPIC')
+        no_opt = False
+        cuda_root = config.cuda.root
+
+        #The include dirs gived by the user should have precedence over
+        #the standards ones.
+        include_dirs = include_dirs + std_include_dirs()
+        if os.path.abspath(os.path.split(__file__)[0]) not in include_dirs:
+            include_dirs.append(os.path.abspath(os.path.split(__file__)[0]))
+
+        libs = std_libs() + libs
+        if 'cudart' not in libs:
+            libs.append('cudart')
+
+        lib_dirs = std_lib_dirs() + lib_dirs
+        if cuda_root:
+            lib_dirs.append(os.path.join(cuda_root, 'lib'))
+
+            # from Benjamin Schrauwen April 14 2010
+            if sys.platform != 'darwin':
+                # No 64 bit CUDA libraries available on the mac, yet..
+                lib_dirs.append(os.path.join(cuda_root, 'lib64'))
+
+
+        if sys.platform == 'darwin':
+            # On the mac, nvcc is not able to link using -framework Python, so we have
+            # manually add the correct library and paths
+            darwin_python_lib = commands.getoutput('python-config --ldflags')
        else:
-            cmd.append('-m32')
-            preargs2.append('-m32')
-
-    if len(preargs2)>0:
-        cmd.extend(['-Xcompiler', ','.join(preargs2)])
-
-    if config.cuda.root and os.path.exists(os.path.join(config.cuda.root,'lib')):
-        rpaths.append(os.path.join(config.cuda.root,'lib'))
-        if sys.platform != 'darwin':
-            # the 64bit CUDA libs are in the same files as are named by the function above
-            rpaths.append(os.path.join(config.cuda.root,'lib64'))
-    if sys.platform != 'win32':
-        # the -rpath option is not understood by the Microsoft linker
-        for rpath in rpaths:
-            cmd.extend(['-Xlinker',','.join(['-rpath',rpath])])
-    cmd.extend([flag for flag in config.nvcc.flags.split(' ') if flag])
-    cmd.extend('-I%s'%idir for idir in include_dirs)
-    cmd.extend(['-o',lib_filename])
-    cmd.append(os.path.split(cppfilename)[-1])
-    cmd.extend(['-L%s'%ldir for ldir in lib_dirs])
-    cmd.extend(['-l%s'%l for l in libs])
-    if module_name != 'cuda_ndarray':
-        cmd.append("-lcuda_ndarray")
-    if sys.platform == 'darwin':
-        cmd.extend(darwin_python_lib.split())
-
-    if sys.platform == 'darwin':
+            # sometimes, the linker cannot find -lpython so we need to tell it
+            # explicitly where it is located
+            # this returns somepath/lib/python2.x
+            python_lib = distutils.sysconfig.get_python_lib(plat_specific=1, \
+                            standard_lib=1)
+            python_lib = os.path.dirname(python_lib)
+            if python_lib not in lib_dirs:
+                lib_dirs.append(python_lib)
+
+        cppfilename = os.path.join(location, 'mod.cu')
+        cppfile = file(cppfilename, 'w')
+
+        _logger.debug('Writing module C++ code to %s', cppfilename)
+        ofiles = []
+        rval = None
+
+        cppfile.write(src_code)
+        cppfile.close()
+        lib_filename = os.path.join(location, '%s.%s' %
+                (module_name, get_lib_extension()))
+
+        _logger.debug('Generating shared lib %s', lib_filename)
+        # TODO: Why do these args cause failure on gtx285 that has 1.3 compute capability? '--gpu-architecture=compute_13', '--gpu-code=compute_13',
+        preargs1=[pa for pa in preargs if pa.startswith('-O') or pa.startswith('--maxrregcount=')]#nvcc argument
+        preargs2=[pa for pa in preargs if pa not in preargs1]#other arguments
+
+        cmd = [nvcc_path, '-shared', '-g'] + preargs1
+        if config.nvcc.compiler_bindir:
+            cmd.extend(['--compiler-bindir', config.nvcc.compiler_bindir])
+
+        if sys.platform == 'win32':
+            # add flags for Microsoft compiler to create .pdb files
+            preargs2.append('/Zi')
+            cmd.extend(['-Xlinker', '/DEBUG'])
+
+        if sys.platform != 'win32':
+            if local_bitwidth() == 64:
+                cmd.append('-m64')
+                preargs2.append('-m64')
+            else:
+                cmd.append('-m32')
+                preargs2.append('-m32')
+
+        if len(preargs2)>0:
+            cmd.extend(['-Xcompiler', ','.join(preargs2)])
+
+        if config.cuda.root and os.path.exists(os.path.join(config.cuda.root,'lib')):
+            rpaths.append(os.path.join(config.cuda.root,'lib'))
+            if sys.platform != 'darwin':
+                # the 64bit CUDA libs are in the same files as are named by the function above
+                rpaths.append(os.path.join(config.cuda.root,'lib64'))
+        if sys.platform != 'win32':
+            # the -rpath option is not understood by the Microsoft linker
+            for rpath in rpaths:
+                cmd.extend(['-Xlinker',','.join(['-rpath',rpath])])
+        cmd.extend([flag for flag in config.nvcc.flags.split(' ') if flag])
+        cmd.extend('-I%s'%idir for idir in include_dirs)
+        cmd.extend(['-o',lib_filename])
+        cmd.append(os.path.split(cppfilename)[-1])
+        cmd.extend(['-L%s'%ldir for ldir in lib_dirs])
+        cmd.extend(['-l%s'%l for l in libs])
+        if module_name != 'cuda_ndarray':
+            cmd.append("-lcuda_ndarray")
+        if sys.platform == 'darwin':
+            cmd.extend(darwin_python_lib.split())
+
+        if sys.platform == 'darwin':
+            done = False
+            while not done:
+                try:
+                    indexof = cmd.index('-framework')
+                    newarg = '-Xcompiler', ','.join(cmd[indexof:(indexof + 2)])
+                    cmd.pop(indexof) # Remove -framework
+                    cmd.pop(indexof) # Remove argument to -framework
+                    cmd.extend(newarg)
+                except ValueError, e:
+                    done = True
+
+        # Remove "-u Symbol" arguments, since they are usually not relevant
+        # for the new compilation, even if they were used for compiling python.
+        # If they are necessary, the nvcc syntax is "-U Symbol" with a capital U.
        done = False
        while not done:
            try:
-                indexof = cmd.index('-framework')
-                newarg = '-Xcompiler', ','.join(cmd[indexof:(indexof + 2)])
-                cmd.pop(indexof) # Remove -framework
-                cmd.pop(indexof) # Remove argument to -framework
-                cmd.extend(newarg)
+                indexof = cmd.index('-u')
+                cmd.pop(indexof) # Remove -u
+                cmd.pop(indexof) # Remove argument to -u
            except ValueError, e:
                done = True

-    # Remove "-u Symbol" arguments, since they are usually not relevant
-    # for the new compilation, even if they were used for compiling python.
-    # If they are necessary, the nvcc syntax is "-U Symbol" with a capital U.
-    done = False
-    while not done:
+        # Fix for MacOS X.
+        cmd = remove_python_framework_dir(cmd)
+
+        #cmd.append("--ptxas-options=-v")  #uncomment this to see register and shared-mem requirements
+        _logger.debug('Running cmd %s', ' '.join(cmd))
+        orig_dir = os.getcwd()
        try:
-            indexof = cmd.index('-u')
-            cmd.pop(indexof) # Remove -u
-            cmd.pop(indexof) # Remove argument to -u
-        except ValueError, e:
-            done = True
-
-    # Fix for MacOS X.
-    cmd = remove_python_framework_dir(cmd)
-
-    #cmd.append("--ptxas-options=-v")  #uncomment this to see register and shared-mem requirements
-    _logger.debug('Running cmd %s', ' '.join(cmd))
-    orig_dir = os.getcwd()
-    try:
-        os.chdir(location)
-        p = subprocess.Popen(
-                cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        nvcc_stdout, nvcc_stderr = p.communicate()[:2]
-    finally:
-        os.chdir(orig_dir)
-
-    if nvcc_stdout:
-        # this doesn't happen to my knowledge
-        print >> sys.stderr, "DEBUG: nvcc STDOUT", nvcc_stdout
-
-    for eline in nvcc_stderr.split('\n'):
-        if not eline:
-            continue
-        if 'skipping incompatible' in eline: #ld is skipping an incompatible library
-            continue
-        if 'declared but never referenced' in eline:
-            continue
-        if 'statement is unreachable' in eline:
-            continue
-        _logger.info("NVCC: %s", eline)
-
-    if p.returncode:
-        # filter the output from the compiler
-        for l in nvcc_stderr.split('\n'):
-            if not l:
+            os.chdir(location)
+            p = subprocess.Popen(
+                    cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            nvcc_stdout, nvcc_stderr = p.communicate()[:2]
+        finally:
+            os.chdir(orig_dir)
+
+        if nvcc_stdout:
+            # this doesn't happen to my knowledge
+            print >> sys.stderr, "DEBUG: nvcc STDOUT", nvcc_stdout
+
+        for eline in nvcc_stderr.split('\n'):
+            if not eline:
+                continue
+            if 'skipping incompatible' in eline: #ld is skipping an incompatible library
                continue
-            # filter out the annoying declaration warnings
+            if 'declared but never referenced' in eline:
+                continue
+            if 'statement is unreachable' in eline:
+                continue
+            _logger.info("NVCC: %s", eline)

-            try:
-                if l[l.index(':'):].startswith(': warning: variable'):
-                    continue
-                if l[l.index(':'):].startswith(': warning: label'):
+        if p.returncode:
+            # filter the output from the compiler
+            for l in nvcc_stderr.split('\n'):
+                if not l:
                    continue
-            except Exception:
-                pass
-            print >> sys.stderr, l
-        print >> sys.stderr, '==============================='
-        for i, l in enumerate(src_code.split('\n')):
-            print >> sys.stderr,  i+1, l
-        raise Exception('nvcc return status', p.returncode, 'for cmd', ' '.join(cmd))
-
-    #touch the __init__ file
-    file(os.path.join(location, "__init__.py"),'w').close()
-    return dlimport(lib_filename)
+                # filter out the annoying declaration warnings
+
+                try:
+                    if l[l.index(':'):].startswith(': warning: variable'):
+                        continue
+                    if l[l.index(':'):].startswith(': warning: label'):
+                        continue
+                except Exception:
+                    pass
+                print >> sys.stderr, l
+            print >> sys.stderr, '==============================='
+            for i, l in enumerate(src_code.split('\n')):
+                print >> sys.stderr,  i+1, l
+            raise Exception('nvcc return status', p.returncode, 'for cmd', ' '.join(cmd))
+
+        #touch the __init__ file
+        file(os.path.join(location, "__init__.py"),'w').close()
+        return dlimport(lib_filename)


 def remove_python_framework_dir(cmd):

--- a/theano/sandbox/cuda/type.py
+++ b/theano/sandbox/cuda/type.py
@@ -12,7 +12,7 @@ try:
    # We must do those import to be able to create the full doc when nvcc
    # is not available
    import cuda_ndarray.cuda_ndarray as cuda
-    from theano.sandbox.cuda.nvcc_compiler import nvcc_module_compile_str
+    from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler
    import cuda_ndarray
 except ImportError:
    pass
@@ -370,7 +370,7 @@ class CudaNdarrayType(Type):
        return (2,) # with assertion about refcounts

    def c_compiler(self):
-        return nvcc_module_compile_str
+        return NVCC_compiler

    def c_compile_args(self):
        ret = []

--- a/theano/scan_module/scan_perform_ext.py
+++ b/theano/scan_module/scan_perform_ext.py
@@ -50,8 +50,8 @@ except ImportError:
            loc = os.path.join(config.compiledir, dirname)
            if not os.path.exists(loc):
                os.mkdir(loc)
-            cmodule.gcc_module_compile_str(dirname, code, location=loc,
-                                           preargs = ['-pthread','-fwrapv',
+            cmodule.GCC_compiler.compile_str(dirname, code, location=loc,
+                                             preargs=['-pthread', '-fwrapv',
                                                      '-O2',
                                                      '-fno-strict-aliasing'])
            # Save version into the __init__.py file.