Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
5d427eab
提交
5d427eab
authored
5月 11, 2012
作者:
Frederic
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
pep8
上级
8e99e640
隐藏空白字符变更
内嵌
并排
正在显示
1 个修改的文件
包含
77 行增加
和
52 行删除
+77
-52
nvcc_compiler.py
theano/sandbox/cuda/nvcc_compiler.py
+77
-52
没有找到文件。
theano/sandbox/cuda/nvcc_compiler.py
浏览文件 @
5d427eab
...
@@ -8,16 +8,18 @@ import sys
...
@@ -8,16 +8,18 @@ import sys
import
warnings
import
warnings
from
theano.gof.cc
import
hash_from_file
from
theano.gof.cc
import
hash_from_file
from
theano.gof.cmodule
import
(
std_libs
,
std_lib_dirs
,
std_include_dirs
,
dlimport
,
from
theano.gof.cmodule
import
(
std_libs
,
std_lib_dirs
,
get_lib_extension
,
local_bitwidth
)
std_include_dirs
,
dlimport
,
get_lib_extension
,
local_bitwidth
)
_logger
=
logging
.
getLogger
(
"theano.sandbox.cuda.nvcc_compiler"
)
_logger
=
logging
.
getLogger
(
"theano.sandbox.cuda.nvcc_compiler"
)
_logger
.
setLevel
(
logging
.
WARN
)
_logger
.
setLevel
(
logging
.
WARN
)
from
theano.configparser
import
config
,
AddConfigVar
,
StrParam
,
BoolParam
from
theano.configparser
import
config
,
AddConfigVar
,
StrParam
,
BoolParam
AddConfigVar
(
'nvcc.compiler_bindir'
,
AddConfigVar
(
'nvcc.compiler_bindir'
,
"If defined, nvcc compiler driver will seek g++ and gcc in this directory"
,
"If defined, nvcc compiler driver will seek g++ and gcc"
" in this directory"
,
StrParam
(
""
))
StrParam
(
""
))
AddConfigVar
(
'cuda.nvccflags'
,
AddConfigVar
(
'cuda.nvccflags'
,
...
@@ -40,6 +42,8 @@ AddConfigVar('nvcc.fastmath',
...
@@ -40,6 +42,8 @@ AddConfigVar('nvcc.fastmath',
nvcc_path
=
'nvcc'
nvcc_path
=
'nvcc'
nvcc_version
=
None
nvcc_version
=
None
def
is_nvcc_available
():
def
is_nvcc_available
():
"""Return True iff the nvcc compiler is found."""
"""Return True iff the nvcc compiler is found."""
try
:
try
:
...
@@ -47,29 +51,33 @@ def is_nvcc_available():
...
@@ -47,29 +51,33 @@ def is_nvcc_available():
stderr
=
subprocess
.
PIPE
)
stderr
=
subprocess
.
PIPE
)
p
.
wait
()
p
.
wait
()
s
=
p
.
stdout
.
readlines
()[
-
1
]
.
split
(
','
)[
1
]
.
strip
()
.
split
()
s
=
p
.
stdout
.
readlines
()[
-
1
]
.
split
(
','
)[
1
]
.
strip
()
.
split
()
assert
s
[
0
]
==
'release'
assert
s
[
0
]
==
'release'
global
nvcc_version
global
nvcc_version
nvcc_version
=
s
[
1
]
nvcc_version
=
s
[
1
]
return
True
return
True
except
Exception
:
except
Exception
:
#try to find nvcc into cuda.root
#try to find nvcc into cuda.root
p
=
os
.
path
.
join
(
config
.
cuda
.
root
,
'bin'
,
'nvcc'
)
p
=
os
.
path
.
join
(
config
.
cuda
.
root
,
'bin'
,
'nvcc'
)
if
os
.
path
.
exists
(
p
):
if
os
.
path
.
exists
(
p
):
global
nvcc_path
global
nvcc_path
nvcc_path
=
p
nvcc_path
=
p
return
True
return
True
else
:
return
False
else
:
return
False
def
set_cuda_root
():
def
set_cuda_root
():
s
=
os
.
getenv
(
"PATH"
)
s
=
os
.
getenv
(
"PATH"
)
if
not
s
:
if
not
s
:
return
return
for
dir
in
s
.
split
(
os
.
path
.
pathsep
):
for
dir
in
s
.
split
(
os
.
path
.
pathsep
):
if
os
.
path
.
exists
(
os
.
path
.
join
(
dir
,
"nvcc"
)):
if
os
.
path
.
exists
(
os
.
path
.
join
(
dir
,
"nvcc"
)):
config
.
cuda
.
root
=
os
.
path
.
split
(
dir
)[
0
]
config
.
cuda
.
root
=
os
.
path
.
split
(
dir
)[
0
]
return
return
rpath_defaults
=
[]
rpath_defaults
=
[]
def
add_standard_rpath
(
rpath
):
def
add_standard_rpath
(
rpath
):
rpath_defaults
.
append
(
rpath
)
rpath_defaults
.
append
(
rpath
)
...
@@ -98,35 +106,41 @@ class NVCC_compiler(object):
...
@@ -98,35 +106,41 @@ class NVCC_compiler(object):
module_name
,
src_code
,
module_name
,
src_code
,
location
=
None
,
include_dirs
=
[],
lib_dirs
=
[],
libs
=
[],
preargs
=
[],
location
=
None
,
include_dirs
=
[],
lib_dirs
=
[],
libs
=
[],
preargs
=
[],
rpaths
=
rpath_defaults
):
rpaths
=
rpath_defaults
):
"""
""":param module_name: string (this has been embedded in the src_code
:param module_name: string (this has been embedded in the src_code
:param src_code: a complete c or c++ source listing for the module
:param src_code: a complete c or c++ source listing for the module
:param location: a pre-existing filesystem directory where the cpp file and .so will be written
:param location: a pre-existing filesystem directory where the
:param include_dirs: a list of include directory names (each gets prefixed with -I)
cpp file and .so will be written
:param lib_dirs: a list of library search path directory names (each gets prefixed with -L)
:param include_dirs: a list of include directory names
:param libs: a list of libraries to link with (each gets prefixed with -l)
(each gets prefixed with -I)
:param lib_dirs: a list of library search path directory names
(each gets prefixed with -L)
:param libs: a list of libraries to link with
(each gets prefixed with -l)
:param preargs: a list of extra compiler arguments
:param preargs: a list of extra compiler arguments
:param rpaths: list of rpaths to use with Xlinker. Defaults to `rpath_defaults`.
:param rpaths: list of rpaths to use with Xlinker.
Defaults to `rpath_defaults`.
:returns: dynamically-imported python module of the compiled code.
:returns: dynamically-imported python module of the compiled code.
:note 1: On Windows 7 with nvcc 3.1 we need to compile in the real directory
:note 1: On Windows 7 with nvcc 3.1 we need to compile in the
Otherwise nvcc never finish.
real directory Otherwise nvcc never finish.
"""
"""
rpaths
=
list
(
rpaths
)
rpaths
=
list
(
rpaths
)
if
sys
.
platform
==
"win32"
:
if
sys
.
platform
==
"win32"
:
# Remove some compilation args that cl.exe does not understand.
# Remove some compilation args that cl.exe does not understand.
# cl.exe is the compiler used by nvcc on Windows.
# cl.exe is the compiler used by nvcc on Windows.
for
a
in
[
"-Wno-write-strings"
,
"-Wno-unused-label"
,
for
a
in
[
"-Wno-write-strings"
,
"-Wno-unused-label"
,
"-Wno-unused-variable"
,
"-fno-math-errno"
]:
"-Wno-unused-variable"
,
"-fno-math-errno"
]:
if
a
in
preargs
:
if
a
in
preargs
:
preargs
.
remove
(
a
)
preargs
.
remove
(
a
)
if
preargs
is
None
:
if
preargs
is
None
:
preargs
=
[]
preargs
=
[]
else
:
preargs
=
list
(
preargs
)
else
:
if
sys
.
platform
!=
'win32'
:
preargs
=
list
(
preargs
)
if
sys
.
platform
!=
'win32'
:
preargs
.
append
(
'-fPIC'
)
preargs
.
append
(
'-fPIC'
)
no_opt
=
False
no_opt
=
False
cuda_root
=
config
.
cuda
.
root
cuda_root
=
config
.
cuda
.
root
...
@@ -150,10 +164,10 @@ class NVCC_compiler(object):
...
@@ -150,10 +164,10 @@ class NVCC_compiler(object):
# No 64 bit CUDA libraries available on the mac, yet..
# No 64 bit CUDA libraries available on the mac, yet..
lib_dirs
.
append
(
os
.
path
.
join
(
cuda_root
,
'lib64'
))
lib_dirs
.
append
(
os
.
path
.
join
(
cuda_root
,
'lib64'
))
if
sys
.
platform
==
'darwin'
:
if
sys
.
platform
==
'darwin'
:
# On the mac, nvcc is not able to link using -framework Python, so we have
# On the mac, nvcc is not able to link using -framework
# manually add the correct library and paths
# Python, so we have manually add the correct library and
# paths
darwin_python_lib
=
commands
.
getoutput
(
'python-config --ldflags'
)
darwin_python_lib
=
commands
.
getoutput
(
'python-config --ldflags'
)
else
:
else
:
# sometimes, the linker cannot find -lpython so we need to tell it
# sometimes, the linker cannot find -lpython so we need to tell it
...
@@ -178,9 +192,14 @@ class NVCC_compiler(object):
...
@@ -178,9 +192,14 @@ class NVCC_compiler(object):
(
module_name
,
get_lib_extension
()))
(
module_name
,
get_lib_extension
()))
_logger
.
debug
(
'Generating shared lib
%
s'
,
lib_filename
)
_logger
.
debug
(
'Generating shared lib
%
s'
,
lib_filename
)
# TODO: Why do these args cause failure on gtx285 that has 1.3 compute capability? '--gpu-architecture=compute_13', '--gpu-code=compute_13',
# TODO: Why do these args cause failure on gtx285 that has 1.3
preargs1
=
[
pa
for
pa
in
preargs
if
pa
.
startswith
(
'-O'
)
or
pa
.
startswith
(
'--maxrregcount='
)]
#nvcc argument
# compute capability? '--gpu-architecture=compute_13',
preargs2
=
[
pa
for
pa
in
preargs
if
pa
not
in
preargs1
]
#other arguments
# '--gpu-code=compute_13',
#nvcc argument
preargs1
=
[
pa
for
pa
in
preargs
if
pa
.
startswith
(
'-O'
)
or
pa
.
startswith
(
'--maxrregcount='
)]
preargs2
=
[
pa
for
pa
in
preargs
if
pa
not
in
preargs1
]
# other arguments
cmd
=
[
nvcc_path
,
'-shared'
,
'-g'
]
+
preargs1
cmd
=
[
nvcc_path
,
'-shared'
,
'-g'
]
+
preargs1
if
config
.
nvcc
.
compiler_bindir
:
if
config
.
nvcc
.
compiler_bindir
:
...
@@ -199,23 +218,25 @@ class NVCC_compiler(object):
...
@@ -199,23 +218,25 @@ class NVCC_compiler(object):
cmd
.
append
(
'-m32'
)
cmd
.
append
(
'-m32'
)
preargs2
.
append
(
'-m32'
)
preargs2
.
append
(
'-m32'
)
if
len
(
preargs2
)
>
0
:
if
len
(
preargs2
)
>
0
:
cmd
.
extend
([
'-Xcompiler'
,
','
.
join
(
preargs2
)])
cmd
.
extend
([
'-Xcompiler'
,
','
.
join
(
preargs2
)])
if
config
.
cuda
.
root
and
os
.
path
.
exists
(
os
.
path
.
join
(
config
.
cuda
.
root
,
'lib'
)):
if
config
.
cuda
.
root
and
os
.
path
.
exists
(
os
.
path
.
join
(
config
.
cuda
.
root
,
rpaths
.
append
(
os
.
path
.
join
(
config
.
cuda
.
root
,
'lib'
))
'lib'
)):
rpaths
.
append
(
os
.
path
.
join
(
config
.
cuda
.
root
,
'lib'
))
if
sys
.
platform
!=
'darwin'
:
if
sys
.
platform
!=
'darwin'
:
# the 64bit CUDA libs are in the same files as are named by the function above
# the 64bit CUDA libs are in the same files as are
rpaths
.
append
(
os
.
path
.
join
(
config
.
cuda
.
root
,
'lib64'
))
# named by the function above
rpaths
.
append
(
os
.
path
.
join
(
config
.
cuda
.
root
,
'lib64'
))
if
sys
.
platform
!=
'win32'
:
if
sys
.
platform
!=
'win32'
:
# the -rpath option is not understood by the Microsoft linker
# the -rpath option is not understood by the Microsoft linker
for
rpath
in
rpaths
:
for
rpath
in
rpaths
:
cmd
.
extend
([
'-Xlinker'
,
','
.
join
([
'-rpath'
,
rpath
])])
cmd
.
extend
([
'-Xlinker'
,
','
.
join
([
'-rpath'
,
rpath
])])
cmd
.
extend
(
'-I
%
s'
%
idir
for
idir
in
include_dirs
)
cmd
.
extend
(
'-I
%
s'
%
idir
for
idir
in
include_dirs
)
cmd
.
extend
([
'-o'
,
lib_filename
])
cmd
.
extend
([
'-o'
,
lib_filename
])
cmd
.
append
(
os
.
path
.
split
(
cppfilename
)[
-
1
])
cmd
.
append
(
os
.
path
.
split
(
cppfilename
)[
-
1
])
cmd
.
extend
([
'-L
%
s'
%
ldir
for
ldir
in
lib_dirs
])
cmd
.
extend
([
'-L
%
s'
%
ldir
for
ldir
in
lib_dirs
])
cmd
.
extend
([
'-l
%
s'
%
l
for
l
in
libs
])
cmd
.
extend
([
'-l
%
s'
%
l
for
l
in
libs
])
if
module_name
!=
'cuda_ndarray'
:
if
module_name
!=
'cuda_ndarray'
:
cmd
.
append
(
"-lcuda_ndarray"
)
cmd
.
append
(
"-lcuda_ndarray"
)
if
sys
.
platform
==
'darwin'
:
if
sys
.
platform
==
'darwin'
:
...
@@ -227,35 +248,37 @@ class NVCC_compiler(object):
...
@@ -227,35 +248,37 @@ class NVCC_compiler(object):
try
:
try
:
indexof
=
cmd
.
index
(
'-framework'
)
indexof
=
cmd
.
index
(
'-framework'
)
newarg
=
'-Xcompiler'
,
','
.
join
(
cmd
[
indexof
:(
indexof
+
2
)])
newarg
=
'-Xcompiler'
,
','
.
join
(
cmd
[
indexof
:(
indexof
+
2
)])
cmd
.
pop
(
indexof
)
# Remove -framework
cmd
.
pop
(
indexof
)
# Remove -framework
cmd
.
pop
(
indexof
)
# Remove argument to -framework
cmd
.
pop
(
indexof
)
# Remove argument to -framework
cmd
.
extend
(
newarg
)
cmd
.
extend
(
newarg
)
except
ValueError
,
e
:
except
ValueError
,
e
:
done
=
True
done
=
True
# Remove "-u Symbol" arguments, since they are usually not relevant
# Remove "-u Symbol" arguments, since they are usually not
# for the new compilation, even if they were used for compiling python.
# relevant for the new compilation, even if they were used for
# If they are necessary, the nvcc syntax is "-U Symbol" with a capital U.
# compiling python. If they are necessary, the nvcc syntax is
# "-U Symbol" with a capital U.
done
=
False
done
=
False
while
not
done
:
while
not
done
:
try
:
try
:
indexof
=
cmd
.
index
(
'-u'
)
indexof
=
cmd
.
index
(
'-u'
)
cmd
.
pop
(
indexof
)
# Remove -u
cmd
.
pop
(
indexof
)
# Remove -u
cmd
.
pop
(
indexof
)
# Remove argument to -u
cmd
.
pop
(
indexof
)
# Remove argument to -u
except
ValueError
,
e
:
except
ValueError
,
e
:
done
=
True
done
=
True
# Fix for MacOS X.
# Fix for MacOS X.
cmd
=
remove_python_framework_dir
(
cmd
)
cmd
=
remove_python_framework_dir
(
cmd
)
# CUDA Toolkit v4.1 Known Issues:
# CUDA Toolkit v4.1 Known Issues:
# Host linker on Mac OS 10.7 (and 10.6 for me) passes -no_pie option
to nvcc
# Host linker on Mac OS 10.7 (and 10.6 for me) passes -no_pie option
# this option is not recognized and generates an error
# t
o nvcc t
his option is not recognized and generates an error
# http://stackoverflow.com/questions/9327265/nvcc-unknown-option-no-pie
# http://stackoverflow.com/questions/9327265/nvcc-unknown-option-no-pie
# Passing -Xlinker -pie stops -no_pie from getting passed
# Passing -Xlinker -pie stops -no_pie from getting passed
if
sys
.
platform
==
'darwin'
and
nvcc_version
>=
'4.1'
:
if
sys
.
platform
==
'darwin'
and
nvcc_version
>=
'4.1'
:
cmd
.
extend
([
'-Xlinker'
,
'-pie'
])
cmd
.
extend
([
'-Xlinker'
,
'-pie'
])
#cmd.append("--ptxas-options=-v") #uncomment this to see register and shared-mem requirements
#cmd.append("--ptxas-options=-v") #uncomment this to see
#register and shared-mem requirements
_logger
.
debug
(
'Running cmd
%
s'
,
' '
.
join
(
cmd
))
_logger
.
debug
(
'Running cmd
%
s'
,
' '
.
join
(
cmd
))
orig_dir
=
os
.
getcwd
()
orig_dir
=
os
.
getcwd
()
try
:
try
:
...
@@ -273,7 +296,8 @@ class NVCC_compiler(object):
...
@@ -273,7 +296,8 @@ class NVCC_compiler(object):
for
eline
in
nvcc_stderr
.
split
(
'
\n
'
):
for
eline
in
nvcc_stderr
.
split
(
'
\n
'
):
if
not
eline
:
if
not
eline
:
continue
continue
if
'skipping incompatible'
in
eline
:
#ld is skipping an incompatible library
if
'skipping incompatible'
in
eline
:
#ld is skipping an incompatible library
continue
continue
if
'declared but never referenced'
in
eline
:
if
'declared but never referenced'
in
eline
:
continue
continue
...
@@ -298,11 +322,12 @@ class NVCC_compiler(object):
...
@@ -298,11 +322,12 @@ class NVCC_compiler(object):
print
>>
sys
.
stderr
,
l
print
>>
sys
.
stderr
,
l
print
>>
sys
.
stderr
,
'==============================='
print
>>
sys
.
stderr
,
'==============================='
for
i
,
l
in
enumerate
(
src_code
.
split
(
'
\n
'
)):
for
i
,
l
in
enumerate
(
src_code
.
split
(
'
\n
'
)):
print
>>
sys
.
stderr
,
i
+
1
,
l
print
>>
sys
.
stderr
,
i
+
1
,
l
raise
Exception
(
'nvcc return status'
,
p
.
returncode
,
'for cmd'
,
' '
.
join
(
cmd
))
raise
Exception
(
'nvcc return status'
,
p
.
returncode
,
'for cmd'
,
' '
.
join
(
cmd
))
#touch the __init__ file
#touch the __init__ file
file
(
os
.
path
.
join
(
location
,
"__init__.py"
),
'w'
)
.
close
()
file
(
os
.
path
.
join
(
location
,
"__init__.py"
),
'w'
)
.
close
()
return
dlimport
(
lib_filename
)
return
dlimport
(
lib_filename
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论