Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
cff27c13
提交
cff27c13
authored
2月 21, 2012
作者:
Frederic
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
make {nvcc,gcc}_module_compile_str a class with another function compile_args…
make {nvcc,gcc}_module_compile_str a class with another function compile_args that get added in the keys.
上级
2f2b424a
隐藏空白字符变更
内嵌
并排
正在显示
8 个修改的文件
包含
353 行增加
和
325 行删除
+353
-325
cc.py
theano/gof/cc.py
+7
-3
cmodule.py
theano/gof/cmodule.py
+128
-121
cutils.py
theano/gof/cutils.py
+1
-1
lazylinker_c.py
theano/gof/lazylinker_c.py
+1
-1
__init__.py
theano/sandbox/cuda/__init__.py
+4
-3
nvcc_compiler.py
theano/sandbox/cuda/nvcc_compiler.py
+208
-192
type.py
theano/sandbox/cuda/type.py
+2
-2
scan_perform_ext.py
theano/scan_module/scan_perform_ext.py
+2
-2
没有找到文件。
theano/gof/cc.py
浏览文件 @
cff27c13
...
...
@@ -622,6 +622,10 @@ class CLinker(link.Linker):
for
x
in
[
y
.
type
for
y
in
self
.
variables
]
+
[
y
.
op
for
y
in
self
.
node_order
]:
try
:
ret
+=
x
.
c_compile_args
()
except
utils
.
MethodNotDefined
:
pass
c_compiler
=
self
.
c_compiler
()
ret
+=
c_compiler
.
compile_args
()
ret
=
list
(
set
(
ret
))
#to remove duplicate
for
x
in
[
y
.
type
for
y
in
self
.
variables
]
+
[
y
.
op
for
y
in
self
.
node_order
]:
try
:
...
...
@@ -661,7 +665,7 @@ class CLinker(link.Linker):
raise
Exception
(
'Nodes have requested specific different compilers'
,
(
c_compiler
,
x_compiler
))
if
(
c_compiler
is
None
):
return
cmodule
.
gcc_module_compile_st
r
return
cmodule
.
GCC_compile
r
else
:
return
c_compiler
def
header_dirs
(
self
):
...
...
@@ -1007,7 +1011,7 @@ class CLinker(link.Linker):
libs
=
self
.
libraries
()
preargs
=
self
.
compile_args
()
compiler_name
=
c_compiler
.
__name__
if
compiler_name
==
'
nvcc_module_compile_st
r'
and
config
.
lib
.
amdlibm
:
if
compiler_name
==
'
NVCC_compile
r'
and
config
.
lib
.
amdlibm
:
# This lib does not work correctly with nvcc in device code.
# and newer version of g++ as 4.5.1.
# example of errors: "/usr/lib/gcc/x86_64-redhat-linux/4.5.1/include/mmintrin.h(49): error: identifier "__builtin_ia32_emms" is undefined"
...
...
@@ -1024,7 +1028,7 @@ class CLinker(link.Linker):
try
:
_logger
.
debug
(
"LOCATION
%
s"
,
str
(
location
))
try
:
module
=
c_compiler
(
module
=
c_compiler
.
compile_str
(
module_name
=
mod
.
name
,
src_code
=
src_code
,
location
=
location
,
...
...
theano/gof/cmodule.py
浏览文件 @
cff27c13
...
...
@@ -1312,140 +1312,147 @@ def gcc_version():
return
gcc_version_str
def
gcc_module_compile_str
(
module_name
,
src_code
,
location
=
None
,
include_dirs
=
[],
lib_dirs
=
[],
libs
=
[],
preargs
=
[]):
"""
:param module_name: string (this has been embedded in the src_code
:param src_code: a complete c or c++ source listing for the module
:param location: a pre-existing filesystem directory where the cpp file and
.so will be written
class
GCC_compiler
():
@staticmethod
def
compile_args
():
return
[]
@staticmethod
def
compile_str
(
module_name
,
src_code
,
location
=
None
,
include_dirs
=
[],
lib_dirs
=
[],
libs
=
[],
preargs
=
[]):
"""
:param module_name: string (this has been embedded in the src_code
:param include_dirs: a list of include directory names (each gets prefixed
with -I)
:param src_code: a complete c or c++ source listing for the module
:param lib_dirs: a list of library search path directory names (each gets
prefixed with -L)
:param location: a pre-existing filesystem directory where the
cpp file and .so will be written
:param libs: a list of libraries to link with (each gets prefixed with -l)
:param include_dirs: a list of include directory names (each
gets prefixed with -I)
:param preargs: a list of extra compiler arguments
:param lib_dirs: a list of library search path directory names
(each gets prefixed with -L)
:returns: dynamically-imported python module of the compiled code.
"""
#TODO: Do not do the dlimport in this function
:param libs: a list of libraries to link with (each gets
prefixed with -l)
if
preargs
is
None
:
preargs
=
[]
else
:
preargs
=
list
(
preargs
)
:param preargs: a list of extra compiler arguments
if
sys
.
platform
!=
'win32'
:
# Under Windows it looks like fPIC is useless. Compiler warning:
# '-fPIC ignored for target (all code is position independent)'
preargs
.
append
(
'-fPIC'
)
no_opt
=
False
:returns: dynamically-imported python module of the compiled code.
"""
#TODO: Do not do the dlimport in this function
include_dirs
=
include_dirs
+
std_include_dirs
()
libs
=
std_libs
()
+
libs
lib_dirs
=
std_lib_dirs
()
+
lib_dirs
if
preargs
is
None
:
preargs
=
[]
else
:
preargs
=
list
(
preargs
)
if
sys
.
platform
!=
'win32'
:
# Under Windows it looks like fPIC is useless. Compiler warning:
# '-fPIC ignored for target (all code is position independent)'
preargs
.
append
(
'-fPIC'
)
no_opt
=
False
include_dirs
=
include_dirs
+
std_include_dirs
()
libs
=
std_libs
()
+
libs
lib_dirs
=
std_lib_dirs
()
+
lib_dirs
#DSE Patch 1 for supporting OSX frameworks; add -framework Python
if
sys
.
platform
==
'darwin'
:
preargs
.
extend
([
'-undefined'
,
'dynamic_lookup'
])
python_inc
=
distutils
.
sysconfig
.
get_python_inc
()
# link with the framework library *if specifically requested*
# config.mac_framework_link is by default False, since on some mac
# installs linking with -framework causes a Bus Error
if
(
python_inc
.
count
(
'Python.framework'
)
>
0
and
config
.
cmodule
.
mac_framework_link
):
preargs
.
extend
([
'-framework'
,
'Python'
])
# Figure out whether the current Python executable is 32
# or 64 bit and compile accordingly.
n_bits
=
local_bitwidth
()
preargs
.
extend
([
'-m
%
s'
%
n_bits
])
_logger
.
debug
(
"OS X: compiling for
%
s bit architecture"
,
n_bits
)
# sometimes, the linker cannot find -lpython so we need to tell it
# explicitly where it is located
# this returns somepath/lib/python2.x
python_lib
=
distutils
.
sysconfig
.
get_python_lib
(
plat_specific
=
1
,
\
standard_lib
=
1
)
python_lib
=
os
.
path
.
dirname
(
python_lib
)
if
python_lib
not
in
lib_dirs
:
lib_dirs
.
append
(
python_lib
)
workdir
=
location
cppfilename
=
os
.
path
.
join
(
location
,
'mod.cpp'
)
cppfile
=
file
(
cppfilename
,
'w'
)
_logger
.
debug
(
'Writing module C++ code to
%
s'
,
cppfilename
)
ofiles
=
[]
rval
=
None
#DSE Patch 1 for supporting OSX frameworks; add -framework Python
if
sys
.
platform
==
'darwin'
:
preargs
.
extend
([
'-undefined'
,
'dynamic_lookup'
])
python_inc
=
distutils
.
sysconfig
.
get_python_inc
()
# link with the framework library *if specifically requested*
# config.mac_framework_link is by default False, since on some mac
# installs linking with -framework causes a Bus Error
if
(
python_inc
.
count
(
'Python.framework'
)
>
0
and
config
.
cmodule
.
mac_framework_link
):
preargs
.
extend
([
'-framework'
,
'Python'
])
# Figure out whether the current Python executable is 32 or 64 bit and
# compile accordingly.
n_bits
=
local_bitwidth
()
preargs
.
extend
([
'-m
%
s'
%
n_bits
])
_logger
.
debug
(
"OS X: compiling for
%
s bit architecture"
,
n_bits
)
# sometimes, the linker cannot find -lpython so we need to tell it
# explicitly where it is located
# this returns somepath/lib/python2.x
python_lib
=
distutils
.
sysconfig
.
get_python_lib
(
plat_specific
=
1
,
\
standard_lib
=
1
)
python_lib
=
os
.
path
.
dirname
(
python_lib
)
if
python_lib
not
in
lib_dirs
:
lib_dirs
.
append
(
python_lib
)
workdir
=
location
cppfilename
=
os
.
path
.
join
(
location
,
'mod.cpp'
)
cppfile
=
file
(
cppfilename
,
'w'
)
_logger
.
debug
(
'Writing module C++ code to
%
s'
,
cppfilename
)
ofiles
=
[]
rval
=
None
cppfile
.
write
(
src_code
)
# Avoid gcc warning "no newline at end of file".
if
not
src_code
.
endswith
(
'
\n
'
):
cppfile
.
write
(
'
\n
'
)
cppfile
.
close
()
cppfile
.
write
(
src_code
)
# Avoid gcc warning "no newline at end of file".
if
not
src_code
.
endswith
(
'
\n
'
):
cppfile
.
write
(
'
\n
'
)
cppfile
.
close
()
lib_filename
=
os
.
path
.
join
(
location
,
'
%
s.
%
s'
%
(
module_name
,
get_lib_extension
()))
lib_filename
=
os
.
path
.
join
(
location
,
'
%
s.
%
s'
%
(
module_name
,
get_lib_extension
()))
_logger
.
debug
(
'Generating shared lib
%
s'
,
lib_filename
)
cmd
=
[
'g++'
,
get_gcc_shared_library_arg
(),
'-g'
]
if
no_opt
:
cmd
.
extend
(
p
for
p
in
preargs
if
not
p
.
startswith
(
'-O'
))
else
:
cmd
.
extend
(
preargs
)
cxxflags
=
[
flag
for
flag
in
config
.
gcc
.
cxxflags
.
split
(
' '
)
if
flag
]
#print >> sys.stderr, config.gcc.cxxflags.split(' ')
cmd
.
extend
(
cxxflags
)
cmd
.
extend
(
'-I
%
s'
%
idir
for
idir
in
include_dirs
)
cmd
.
extend
([
'-o'
,
lib_filename
])
cmd
.
append
(
cppfilename
)
cmd
.
extend
([
'-L
%
s'
%
ldir
for
ldir
in
lib_dirs
])
cmd
.
extend
([
'-l
%
s'
%
l
for
l
in
libs
])
#print >> sys.stderr, 'COMPILING W CMD', cmd
_logger
.
debug
(
'Running cmd:
%
s'
,
' '
.
join
(
cmd
))
def
print_command_line_error
():
# Print command line when a problem occurred.
print
>>
sys
.
stderr
,
(
"Problem occurred during compilation with the "
"command line below:"
)
print
>>
sys
.
stderr
,
' '
.
join
(
cmd
)
_logger
.
debug
(
'Generating shared lib
%
s'
,
lib_filename
)
cmd
=
[
'g++'
,
get_gcc_shared_library_arg
(),
'-g'
]
if
no_opt
:
cmd
.
extend
(
p
for
p
in
preargs
if
not
p
.
startswith
(
'-O'
))
else
:
cmd
.
extend
(
preargs
)
cxxflags
=
[
flag
for
flag
in
config
.
gcc
.
cxxflags
.
split
(
' '
)
if
flag
]
#print >> sys.stderr, config.gcc.cxxflags.split(' ')
cmd
.
extend
(
cxxflags
)
cmd
.
extend
(
'-I
%
s'
%
idir
for
idir
in
include_dirs
)
cmd
.
extend
([
'-o'
,
lib_filename
])
cmd
.
append
(
cppfilename
)
cmd
.
extend
([
'-L
%
s'
%
ldir
for
ldir
in
lib_dirs
])
cmd
.
extend
([
'-l
%
s'
%
l
for
l
in
libs
])
#print >> sys.stderr, 'COMPILING W CMD', cmd
_logger
.
debug
(
'Running cmd:
%
s'
,
' '
.
join
(
cmd
))
def
print_command_line_error
():
# Print command line when a problem occurred.
print
>>
sys
.
stderr
,
(
"Problem occurred during compilation with the "
"command line below:"
)
print
>>
sys
.
stderr
,
' '
.
join
(
cmd
)
try
:
p
=
subprocess
.
Popen
(
cmd
,
stderr
=
subprocess
.
PIPE
)
compile_stderr
=
p
.
communicate
()[
1
]
except
Exception
:
# An exception can occur e.g. if `g++` is not found.
print_command_line_error
()
raise
try
:
p
=
subprocess
.
Popen
(
cmd
,
stderr
=
subprocess
.
PIPE
)
compile_stderr
=
p
.
communicate
()[
1
]
except
Exception
:
# An exception can occur e.g. if `g++` is not found.
print_command_line_error
()
raise
status
=
p
.
returncode
if
status
:
print
'==============================='
for
i
,
l
in
enumerate
(
src_code
.
split
(
'
\n
'
)):
#gcc put its messages to stderr, so we add ours now
print
>>
sys
.
stderr
,
'
%05
i
\t
%
s'
%
(
i
+
1
,
l
)
print
'==============================='
print_command_line_error
()
# Print errors just below the command line.
print
compile_stderr
# We replace '\n' by '. ' in the error message because when Python
# prints the exception, having '\n' in the text makes it more difficult
# to read.
raise
Exception
(
'Compilation failed (return status=
%
s):
%
s'
%
(
status
,
compile_stderr
.
replace
(
'
\n
'
,
'. '
)))
#touch the __init__ file
file
(
os
.
path
.
join
(
location
,
"__init__.py"
),
'w'
)
.
close
()
return
dlimport
(
lib_filename
)
status
=
p
.
returncode
if
status
:
print
'==============================='
for
i
,
l
in
enumerate
(
src_code
.
split
(
'
\n
'
)):
#gcc put its messages to stderr, so we add ours now
print
>>
sys
.
stderr
,
'
%05
i
\t
%
s'
%
(
i
+
1
,
l
)
print
'==============================='
print_command_line_error
()
# Print errors just below the command line.
print
compile_stderr
# We replace '\n' by '. ' in the error message because when Python
# prints the exception, having '\n' in the text makes it more difficult
# to read.
raise
Exception
(
'Compilation failed (return status=
%
s):
%
s'
%
(
status
,
compile_stderr
.
replace
(
'
\n
'
,
'. '
)))
#touch the __init__ file
file
(
os
.
path
.
join
(
location
,
"__init__.py"
),
'w'
)
.
close
()
return
dlimport
(
lib_filename
)
def
icc_module_compile_str
(
*
args
):
...
...
theano/gof/cutils.py
浏览文件 @
cff27c13
...
...
@@ -70,7 +70,7 @@ except ImportError:
if
not
os
.
path
.
exists
(
loc
):
os
.
mkdir
(
loc
)
cmodule
.
gcc_module_
compile_str
(
'cutils_ext'
,
code
,
location
=
loc
)
cmodule
.
GCC_compiler
.
compile_str
(
'cutils_ext'
,
code
,
location
=
loc
)
from
cutils_ext.cutils_ext
import
*
finally
:
...
...
theano/gof/lazylinker_c.py
浏览文件 @
cff27c13
...
...
@@ -53,7 +53,7 @@ except ImportError:
loc
=
os
.
path
.
join
(
config
.
compiledir
,
dirname
)
if
not
os
.
path
.
exists
(
loc
):
os
.
mkdir
(
loc
)
cmodule
.
gcc_module_
compile_str
(
dirname
,
code
,
location
=
loc
)
cmodule
.
GCC_compiler
.
compile_str
(
dirname
,
code
,
location
=
loc
)
# Save version into the __init__.py file.
init_py
=
os
.
path
.
join
(
loc
,
'__init__.py'
)
open
(
init_py
,
'w'
)
.
write
(
'_version =
%
s
\n
'
%
version
)
...
...
theano/sandbox/cuda/__init__.py
浏览文件 @
cff27c13
...
...
@@ -87,7 +87,7 @@ libcuda_ndarray_so = os.path.join(cuda_ndarray_loc,
# Add the theano cache directory's cuda_ndarray subdirectory to the
# list of places that are hard-coded into compiled modules' runtime
# library search list. This works in conjunction with
# nvcc_compiler.
nvcc_module_
compile_str which adds this folder during
# nvcc_compiler.
NVCC_compiler.
compile_str which adds this folder during
# compilation with -L and also adds -lcuda_ndarray when compiling
# modules.
nvcc_compiler
.
add_standard_rpath
(
cuda_ndarray_loc
)
...
...
@@ -117,7 +117,8 @@ try:
if
not
os
.
path
.
exists
(
cuda_ndarray_loc
):
os
.
makedirs
(
cuda_ndarray_loc
)
nvcc_compiler
.
nvcc_module_compile_str
(
compiler
=
nvcc_compiler
.
NVCC_compiler
()
compiler
.
compile_str
(
'cuda_ndarray'
,
code
,
location
=
cuda_ndarray_loc
,
...
...
@@ -130,7 +131,7 @@ except Exception, e:
if
cuda_available
:
# If necessary,
# create a symlink called libcuda_ndarray.so
# which nvcc_
module_compile_st
r uses when linking
# which nvcc_
compiler.NVCC_compile
r uses when linking
# any module except "cuda_ndarray" itself.
try
:
open
(
libcuda_ndarray_so
)
.
close
()
...
...
theano/sandbox/cuda/nvcc_compiler.py
浏览文件 @
cff27c13
...
...
@@ -72,210 +72,226 @@ rpath_defaults = []
def
add_standard_rpath
(
rpath
):
rpath_defaults
.
append
(
rpath
)
def
nvcc_module_compile_str
(
module_name
,
src_code
,
location
=
None
,
include_dirs
=
[],
lib_dirs
=
[],
libs
=
[],
preargs
=
[],
rpaths
=
rpath_defaults
):
"""
:param module_name: string (this has been embedded in the src_code
:param src_code: a complete c or c++ source listing for the module
:param location: a pre-existing filesystem directory where the cpp file and .so will be written
:param include_dirs: a list of include directory names (each gets prefixed with -I)
:param lib_dirs: a list of library search path directory names (each gets prefixed with -L)
:param libs: a list of libraries to link with (each gets prefixed with -l)
:param preargs: a list of extra compiler arguments
:param rpaths: list of rpaths to use with Xlinker. Defaults to `rpath_defaults`.
:returns: dynamically-imported python module of the compiled code.
:note 1: On Windows 7 with nvcc 3.1 we need to compile in the real directory
Otherwise nvcc never finish.
"""
rpaths
=
list
(
rpaths
)
if
sys
.
platform
==
"win32"
:
# Remove some compilation args that cl.exe does not understand.
# cl.exe is the compiler used by nvcc on Windows.
for
a
in
[
"-Wno-write-strings"
,
"-Wno-unused-label"
,
"-Wno-unused-variable"
,
"-fno-math-errno"
]:
if
a
in
preargs
:
preargs
.
remove
(
a
)
if
preargs
is
None
:
preargs
=
[]
else
:
preargs
=
list
(
preargs
)
if
sys
.
platform
!=
'win32'
:
preargs
.
append
(
'-fPIC'
)
no_opt
=
False
cuda_root
=
config
.
cuda
.
root
#The include dirs gived by the user should have precedence over
#the standards ones.
include_dirs
=
include_dirs
+
std_include_dirs
()
if
os
.
path
.
abspath
(
os
.
path
.
split
(
__file__
)[
0
])
not
in
include_dirs
:
include_dirs
.
append
(
os
.
path
.
abspath
(
os
.
path
.
split
(
__file__
)[
0
]))
libs
=
std_libs
()
+
libs
if
'cudart'
not
in
libs
:
libs
.
append
(
'cudart'
)
lib_dirs
=
std_lib_dirs
()
+
lib_dirs
if
cuda_root
:
lib_dirs
.
append
(
os
.
path
.
join
(
cuda_root
,
'lib'
))
# from Benjamin Schrauwen April 14 2010
if
sys
.
platform
!=
'darwin'
:
# No 64 bit CUDA libraries available on the mac, yet..
lib_dirs
.
append
(
os
.
path
.
join
(
cuda_root
,
'lib64'
))
if
sys
.
platform
==
'darwin'
:
# On the mac, nvcc is not able to link using -framework Python, so we have
# manually add the correct library and paths
darwin_python_lib
=
commands
.
getoutput
(
'python-config --ldflags'
)
else
:
# sometimes, the linker cannot find -lpython so we need to tell it
# explicitly where it is located
# this returns somepath/lib/python2.x
python_lib
=
distutils
.
sysconfig
.
get_python_lib
(
plat_specific
=
1
,
\
standard_lib
=
1
)
python_lib
=
os
.
path
.
dirname
(
python_lib
)
if
python_lib
not
in
lib_dirs
:
lib_dirs
.
append
(
python_lib
)
cppfilename
=
os
.
path
.
join
(
location
,
'mod.cu'
)
cppfile
=
file
(
cppfilename
,
'w'
)
_logger
.
debug
(
'Writing module C++ code to
%
s'
,
cppfilename
)
ofiles
=
[]
rval
=
None
cppfile
.
write
(
src_code
)
cppfile
.
close
()
lib_filename
=
os
.
path
.
join
(
location
,
'
%
s.
%
s'
%
(
module_name
,
get_lib_extension
()))
_logger
.
debug
(
'Generating shared lib
%
s'
,
lib_filename
)
# TODO: Why do these args cause failure on gtx285 that has 1.3 compute capability? '--gpu-architecture=compute_13', '--gpu-code=compute_13',
preargs1
=
[
pa
for
pa
in
preargs
if
pa
.
startswith
(
'-O'
)
or
pa
.
startswith
(
'--maxrregcount='
)]
#nvcc argument
preargs2
=
[
pa
for
pa
in
preargs
if
pa
not
in
preargs1
]
#other arguments
cmd
=
[
nvcc_path
,
'-shared'
,
'-g'
]
+
preargs1
if
config
.
nvcc
.
compiler_bindir
:
cmd
.
extend
([
'--compiler-bindir'
,
config
.
nvcc
.
compiler_bindir
])
if
sys
.
platform
==
'win32'
:
# add flags for Microsoft compiler to create .pdb files
preargs2
.
append
(
'/Zi'
)
cmd
.
extend
([
'-Xlinker'
,
'/DEBUG'
])
if
sys
.
platform
!=
'win32'
:
if
local_bitwidth
()
==
64
:
cmd
.
append
(
'-m64'
)
preargs2
.
append
(
'-m64'
)
class
NVCC_compiler
():
@staticmethod
def
compile_args
():
"""
This args will be received by compile_str() in the preargs paramter.
They will also be included in the "hard" part of the key module.
"""
return
[]
# flags = [flag for flag in config.nvcc.flags.split(' ') if flag]
# cuda_ndarray_cuh_hash = hash_from_file(
# os.path.join(os.path.split(__file__)[0], 'cuda_ndarray.cuh'))
# cuda_macro = '-DCUDA_NDARRAY_CUH=' + cuda_ndarray_cuh_hash
# return [cuda_macro]
@staticmethod
def
compile_str
(
module_name
,
src_code
,
location
=
None
,
include_dirs
=
[],
lib_dirs
=
[],
libs
=
[],
preargs
=
[],
rpaths
=
rpath_defaults
):
"""
:param module_name: string (this has been embedded in the src_code
:param src_code: a complete c or c++ source listing for the module
:param location: a pre-existing filesystem directory where the cpp file and .so will be written
:param include_dirs: a list of include directory names (each gets prefixed with -I)
:param lib_dirs: a list of library search path directory names (each gets prefixed with -L)
:param libs: a list of libraries to link with (each gets prefixed with -l)
:param preargs: a list of extra compiler arguments
:param rpaths: list of rpaths to use with Xlinker. Defaults to `rpath_defaults`.
:returns: dynamically-imported python module of the compiled code.
:note 1: On Windows 7 with nvcc 3.1 we need to compile in the real directory
Otherwise nvcc never finish.
"""
rpaths
=
list
(
rpaths
)
if
sys
.
platform
==
"win32"
:
# Remove some compilation args that cl.exe does not understand.
# cl.exe is the compiler used by nvcc on Windows.
for
a
in
[
"-Wno-write-strings"
,
"-Wno-unused-label"
,
"-Wno-unused-variable"
,
"-fno-math-errno"
]:
if
a
in
preargs
:
preargs
.
remove
(
a
)
if
preargs
is
None
:
preargs
=
[]
else
:
preargs
=
list
(
preargs
)
if
sys
.
platform
!=
'win32'
:
preargs
.
append
(
'-fPIC'
)
no_opt
=
False
cuda_root
=
config
.
cuda
.
root
#The include dirs gived by the user should have precedence over
#the standards ones.
include_dirs
=
include_dirs
+
std_include_dirs
()
if
os
.
path
.
abspath
(
os
.
path
.
split
(
__file__
)[
0
])
not
in
include_dirs
:
include_dirs
.
append
(
os
.
path
.
abspath
(
os
.
path
.
split
(
__file__
)[
0
]))
libs
=
std_libs
()
+
libs
if
'cudart'
not
in
libs
:
libs
.
append
(
'cudart'
)
lib_dirs
=
std_lib_dirs
()
+
lib_dirs
if
cuda_root
:
lib_dirs
.
append
(
os
.
path
.
join
(
cuda_root
,
'lib'
))
# from Benjamin Schrauwen April 14 2010
if
sys
.
platform
!=
'darwin'
:
# No 64 bit CUDA libraries available on the mac, yet..
lib_dirs
.
append
(
os
.
path
.
join
(
cuda_root
,
'lib64'
))
if
sys
.
platform
==
'darwin'
:
# On the mac, nvcc is not able to link using -framework Python, so we have
# manually add the correct library and paths
darwin_python_lib
=
commands
.
getoutput
(
'python-config --ldflags'
)
else
:
cmd
.
append
(
'-m32'
)
preargs2
.
append
(
'-m32'
)
if
len
(
preargs2
)
>
0
:
cmd
.
extend
([
'-Xcompiler'
,
','
.
join
(
preargs2
)])
if
config
.
cuda
.
root
and
os
.
path
.
exists
(
os
.
path
.
join
(
config
.
cuda
.
root
,
'lib'
)):
rpaths
.
append
(
os
.
path
.
join
(
config
.
cuda
.
root
,
'lib'
))
if
sys
.
platform
!=
'darwin'
:
# the 64bit CUDA libs are in the same files as are named by the function above
rpaths
.
append
(
os
.
path
.
join
(
config
.
cuda
.
root
,
'lib64'
))
if
sys
.
platform
!=
'win32'
:
# the -rpath option is not understood by the Microsoft linker
for
rpath
in
rpaths
:
cmd
.
extend
([
'-Xlinker'
,
','
.
join
([
'-rpath'
,
rpath
])])
cmd
.
extend
([
flag
for
flag
in
config
.
nvcc
.
flags
.
split
(
' '
)
if
flag
])
cmd
.
extend
(
'-I
%
s'
%
idir
for
idir
in
include_dirs
)
cmd
.
extend
([
'-o'
,
lib_filename
])
cmd
.
append
(
os
.
path
.
split
(
cppfilename
)[
-
1
])
cmd
.
extend
([
'-L
%
s'
%
ldir
for
ldir
in
lib_dirs
])
cmd
.
extend
([
'-l
%
s'
%
l
for
l
in
libs
])
if
module_name
!=
'cuda_ndarray'
:
cmd
.
append
(
"-lcuda_ndarray"
)
if
sys
.
platform
==
'darwin'
:
cmd
.
extend
(
darwin_python_lib
.
split
())
if
sys
.
platform
==
'darwin'
:
# sometimes, the linker cannot find -lpython so we need to tell it
# explicitly where it is located
# this returns somepath/lib/python2.x
python_lib
=
distutils
.
sysconfig
.
get_python_lib
(
plat_specific
=
1
,
\
standard_lib
=
1
)
python_lib
=
os
.
path
.
dirname
(
python_lib
)
if
python_lib
not
in
lib_dirs
:
lib_dirs
.
append
(
python_lib
)
cppfilename
=
os
.
path
.
join
(
location
,
'mod.cu'
)
cppfile
=
file
(
cppfilename
,
'w'
)
_logger
.
debug
(
'Writing module C++ code to
%
s'
,
cppfilename
)
ofiles
=
[]
rval
=
None
cppfile
.
write
(
src_code
)
cppfile
.
close
()
lib_filename
=
os
.
path
.
join
(
location
,
'
%
s.
%
s'
%
(
module_name
,
get_lib_extension
()))
_logger
.
debug
(
'Generating shared lib
%
s'
,
lib_filename
)
# TODO: Why do these args cause failure on gtx285 that has 1.3 compute capability? '--gpu-architecture=compute_13', '--gpu-code=compute_13',
preargs1
=
[
pa
for
pa
in
preargs
if
pa
.
startswith
(
'-O'
)
or
pa
.
startswith
(
'--maxrregcount='
)]
#nvcc argument
preargs2
=
[
pa
for
pa
in
preargs
if
pa
not
in
preargs1
]
#other arguments
cmd
=
[
nvcc_path
,
'-shared'
,
'-g'
]
+
preargs1
if
config
.
nvcc
.
compiler_bindir
:
cmd
.
extend
([
'--compiler-bindir'
,
config
.
nvcc
.
compiler_bindir
])
if
sys
.
platform
==
'win32'
:
# add flags for Microsoft compiler to create .pdb files
preargs2
.
append
(
'/Zi'
)
cmd
.
extend
([
'-Xlinker'
,
'/DEBUG'
])
if
sys
.
platform
!=
'win32'
:
if
local_bitwidth
()
==
64
:
cmd
.
append
(
'-m64'
)
preargs2
.
append
(
'-m64'
)
else
:
cmd
.
append
(
'-m32'
)
preargs2
.
append
(
'-m32'
)
if
len
(
preargs2
)
>
0
:
cmd
.
extend
([
'-Xcompiler'
,
','
.
join
(
preargs2
)])
if
config
.
cuda
.
root
and
os
.
path
.
exists
(
os
.
path
.
join
(
config
.
cuda
.
root
,
'lib'
)):
rpaths
.
append
(
os
.
path
.
join
(
config
.
cuda
.
root
,
'lib'
))
if
sys
.
platform
!=
'darwin'
:
# the 64bit CUDA libs are in the same files as are named by the function above
rpaths
.
append
(
os
.
path
.
join
(
config
.
cuda
.
root
,
'lib64'
))
if
sys
.
platform
!=
'win32'
:
# the -rpath option is not understood by the Microsoft linker
for
rpath
in
rpaths
:
cmd
.
extend
([
'-Xlinker'
,
','
.
join
([
'-rpath'
,
rpath
])])
cmd
.
extend
([
flag
for
flag
in
config
.
nvcc
.
flags
.
split
(
' '
)
if
flag
])
cmd
.
extend
(
'-I
%
s'
%
idir
for
idir
in
include_dirs
)
cmd
.
extend
([
'-o'
,
lib_filename
])
cmd
.
append
(
os
.
path
.
split
(
cppfilename
)[
-
1
])
cmd
.
extend
([
'-L
%
s'
%
ldir
for
ldir
in
lib_dirs
])
cmd
.
extend
([
'-l
%
s'
%
l
for
l
in
libs
])
if
module_name
!=
'cuda_ndarray'
:
cmd
.
append
(
"-lcuda_ndarray"
)
if
sys
.
platform
==
'darwin'
:
cmd
.
extend
(
darwin_python_lib
.
split
())
if
sys
.
platform
==
'darwin'
:
done
=
False
while
not
done
:
try
:
indexof
=
cmd
.
index
(
'-framework'
)
newarg
=
'-Xcompiler'
,
','
.
join
(
cmd
[
indexof
:(
indexof
+
2
)])
cmd
.
pop
(
indexof
)
# Remove -framework
cmd
.
pop
(
indexof
)
# Remove argument to -framework
cmd
.
extend
(
newarg
)
except
ValueError
,
e
:
done
=
True
# Remove "-u Symbol" arguments, since they are usually not relevant
# for the new compilation, even if they were used for compiling python.
# If they are necessary, the nvcc syntax is "-U Symbol" with a capital U.
done
=
False
while
not
done
:
try
:
indexof
=
cmd
.
index
(
'-framework'
)
newarg
=
'-Xcompiler'
,
','
.
join
(
cmd
[
indexof
:(
indexof
+
2
)])
cmd
.
pop
(
indexof
)
# Remove -framework
cmd
.
pop
(
indexof
)
# Remove argument to -framework
cmd
.
extend
(
newarg
)
indexof
=
cmd
.
index
(
'-u'
)
cmd
.
pop
(
indexof
)
# Remove -u
cmd
.
pop
(
indexof
)
# Remove argument to -u
except
ValueError
,
e
:
done
=
True
# Remove "-u Symbol" arguments, since they are usually not relevant
# for the new compilation, even if they were used for compiling python.
# If they are necessary, the nvcc syntax is "-U Symbol" with a capital U.
done
=
False
while
not
done
:
# Fix for MacOS X.
cmd
=
remove_python_framework_dir
(
cmd
)
#cmd.append("--ptxas-options=-v") #uncomment this to see register and shared-mem requirements
_logger
.
debug
(
'Running cmd
%
s'
,
' '
.
join
(
cmd
))
orig_dir
=
os
.
getcwd
()
try
:
indexof
=
cmd
.
index
(
'-u'
)
cmd
.
pop
(
indexof
)
# Remove -u
cmd
.
pop
(
indexof
)
# Remove argument to -u
except
ValueError
,
e
:
done
=
True
# Fix for MacOS X.
cmd
=
remove_python_framework_dir
(
cmd
)
#cmd.append("--ptxas-options=-v") #uncomment this to see register and shared-mem requirements
_logger
.
debug
(
'Running cmd
%
s'
,
' '
.
join
(
cmd
))
orig_dir
=
os
.
getcwd
()
try
:
os
.
chdir
(
location
)
p
=
subprocess
.
Popen
(
cmd
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
)
nvcc_stdout
,
nvcc_stderr
=
p
.
communicate
()[:
2
]
finally
:
os
.
chdir
(
orig_dir
)
if
nvcc_stdout
:
# this doesn't happen to my knowledge
print
>>
sys
.
stderr
,
"DEBUG: nvcc STDOUT"
,
nvcc_stdout
for
eline
in
nvcc_stderr
.
split
(
'
\n
'
):
if
not
eline
:
continue
if
'skipping incompatible'
in
eline
:
#ld is skipping an incompatible library
continue
if
'declared but never referenced'
in
eline
:
continue
if
'statement is unreachable'
in
eline
:
continue
_logger
.
info
(
"NVCC:
%
s"
,
eline
)
if
p
.
returncode
:
# filter the output from the compiler
for
l
in
nvcc_stderr
.
split
(
'
\n
'
):
if
not
l
:
os
.
chdir
(
location
)
p
=
subprocess
.
Popen
(
cmd
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
)
nvcc_stdout
,
nvcc_stderr
=
p
.
communicate
()[:
2
]
finally
:
os
.
chdir
(
orig_dir
)
if
nvcc_stdout
:
# this doesn't happen to my knowledge
print
>>
sys
.
stderr
,
"DEBUG: nvcc STDOUT"
,
nvcc_stdout
for
eline
in
nvcc_stderr
.
split
(
'
\n
'
):
if
not
eline
:
continue
if
'skipping incompatible'
in
eline
:
#ld is skipping an incompatible library
continue
# filter out the annoying declaration warnings
if
'declared but never referenced'
in
eline
:
continue
if
'statement is unreachable'
in
eline
:
continue
_logger
.
info
(
"NVCC:
%
s"
,
eline
)
try
:
if
l
[
l
.
index
(
':'
):]
.
startswith
(
': warning: variable'
):
continue
if
l
[
l
.
index
(
':'
):]
.
startswith
(
': warning: label'
)
:
if
p
.
returncode
:
# filter the output from the compiler
for
l
in
nvcc_stderr
.
split
(
'
\n
'
):
if
not
l
:
continue
except
Exception
:
pass
print
>>
sys
.
stderr
,
l
print
>>
sys
.
stderr
,
'==============================='
for
i
,
l
in
enumerate
(
src_code
.
split
(
'
\n
'
)):
print
>>
sys
.
stderr
,
i
+
1
,
l
raise
Exception
(
'nvcc return status'
,
p
.
returncode
,
'for cmd'
,
' '
.
join
(
cmd
))
#touch the __init__ file
file
(
os
.
path
.
join
(
location
,
"__init__.py"
),
'w'
)
.
close
()
return
dlimport
(
lib_filename
)
# filter out the annoying declaration warnings
try
:
if
l
[
l
.
index
(
':'
):]
.
startswith
(
': warning: variable'
):
continue
if
l
[
l
.
index
(
':'
):]
.
startswith
(
': warning: label'
):
continue
except
Exception
:
pass
print
>>
sys
.
stderr
,
l
print
>>
sys
.
stderr
,
'==============================='
for
i
,
l
in
enumerate
(
src_code
.
split
(
'
\n
'
)):
print
>>
sys
.
stderr
,
i
+
1
,
l
raise
Exception
(
'nvcc return status'
,
p
.
returncode
,
'for cmd'
,
' '
.
join
(
cmd
))
#touch the __init__ file
file
(
os
.
path
.
join
(
location
,
"__init__.py"
),
'w'
)
.
close
()
return
dlimport
(
lib_filename
)
def
remove_python_framework_dir
(
cmd
):
...
...
theano/sandbox/cuda/type.py
浏览文件 @
cff27c13
...
...
@@ -12,7 +12,7 @@ try:
# We must do those import to be able to create the full doc when nvcc
# is not available
import
cuda_ndarray.cuda_ndarray
as
cuda
from
theano.sandbox.cuda.nvcc_compiler
import
nvcc_module_compile_st
r
from
theano.sandbox.cuda.nvcc_compiler
import
NVCC_compile
r
import
cuda_ndarray
except
ImportError
:
pass
...
...
@@ -370,7 +370,7 @@ class CudaNdarrayType(Type):
return
(
2
,)
# with assertion about refcounts
def
c_compiler
(
self
):
return
nvcc_module_compile_st
r
return
NVCC_compile
r
def
c_compile_args
(
self
):
ret
=
[]
...
...
theano/scan_module/scan_perform_ext.py
浏览文件 @
cff27c13
...
...
@@ -50,8 +50,8 @@ except ImportError:
loc
=
os
.
path
.
join
(
config
.
compiledir
,
dirname
)
if
not
os
.
path
.
exists
(
loc
):
os
.
mkdir
(
loc
)
cmodule
.
gcc_module_
compile_str
(
dirname
,
code
,
location
=
loc
,
preargs
=
[
'-pthread'
,
'-fwrapv'
,
cmodule
.
GCC_compiler
.
compile_str
(
dirname
,
code
,
location
=
loc
,
preargs
=
[
'-pthread'
,
'-fwrapv'
,
'-O2'
,
'-fno-strict-aliasing'
])
# Save version into the __init__.py file.
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论