Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
cff27c13
提交
cff27c13
authored
2月 21, 2012
作者:
Frederic
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
make {nvcc,gcc}_module_compile_str a class with another function compile_args…
make {nvcc,gcc}_module_compile_str a class with another function compile_args that get added in the keys.
上级
2f2b424a
隐藏空白字符变更
内嵌
并排
正在显示
8 个修改的文件
包含
353 行增加
和
325 行删除
+353
-325
cc.py
theano/gof/cc.py
+7
-3
cmodule.py
theano/gof/cmodule.py
+128
-121
cutils.py
theano/gof/cutils.py
+1
-1
lazylinker_c.py
theano/gof/lazylinker_c.py
+1
-1
__init__.py
theano/sandbox/cuda/__init__.py
+4
-3
nvcc_compiler.py
theano/sandbox/cuda/nvcc_compiler.py
+208
-192
type.py
theano/sandbox/cuda/type.py
+2
-2
scan_perform_ext.py
theano/scan_module/scan_perform_ext.py
+2
-2
没有找到文件。
theano/gof/cc.py
浏览文件 @
cff27c13
...
@@ -622,6 +622,10 @@ class CLinker(link.Linker):
...
@@ -622,6 +622,10 @@ class CLinker(link.Linker):
for
x
in
[
y
.
type
for
y
in
self
.
variables
]
+
[
y
.
op
for
y
in
self
.
node_order
]:
for
x
in
[
y
.
type
for
y
in
self
.
variables
]
+
[
y
.
op
for
y
in
self
.
node_order
]:
try
:
ret
+=
x
.
c_compile_args
()
try
:
ret
+=
x
.
c_compile_args
()
except
utils
.
MethodNotDefined
:
pass
except
utils
.
MethodNotDefined
:
pass
c_compiler
=
self
.
c_compiler
()
ret
+=
c_compiler
.
compile_args
()
ret
=
list
(
set
(
ret
))
#to remove duplicate
ret
=
list
(
set
(
ret
))
#to remove duplicate
for
x
in
[
y
.
type
for
y
in
self
.
variables
]
+
[
y
.
op
for
y
in
self
.
node_order
]:
for
x
in
[
y
.
type
for
y
in
self
.
variables
]
+
[
y
.
op
for
y
in
self
.
node_order
]:
try
:
try
:
...
@@ -661,7 +665,7 @@ class CLinker(link.Linker):
...
@@ -661,7 +665,7 @@ class CLinker(link.Linker):
raise
Exception
(
'Nodes have requested specific different compilers'
,
raise
Exception
(
'Nodes have requested specific different compilers'
,
(
c_compiler
,
x_compiler
))
(
c_compiler
,
x_compiler
))
if
(
c_compiler
is
None
):
if
(
c_compiler
is
None
):
return
cmodule
.
gcc_module_compile_st
r
return
cmodule
.
GCC_compile
r
else
:
return
c_compiler
else
:
return
c_compiler
def
header_dirs
(
self
):
def
header_dirs
(
self
):
...
@@ -1007,7 +1011,7 @@ class CLinker(link.Linker):
...
@@ -1007,7 +1011,7 @@ class CLinker(link.Linker):
libs
=
self
.
libraries
()
libs
=
self
.
libraries
()
preargs
=
self
.
compile_args
()
preargs
=
self
.
compile_args
()
compiler_name
=
c_compiler
.
__name__
compiler_name
=
c_compiler
.
__name__
if
compiler_name
==
'
nvcc_module_compile_st
r'
and
config
.
lib
.
amdlibm
:
if
compiler_name
==
'
NVCC_compile
r'
and
config
.
lib
.
amdlibm
:
# This lib does not work correctly with nvcc in device code.
# This lib does not work correctly with nvcc in device code.
# and newer version of g++ as 4.5.1.
# and newer version of g++ as 4.5.1.
# example of errors: "/usr/lib/gcc/x86_64-redhat-linux/4.5.1/include/mmintrin.h(49): error: identifier "__builtin_ia32_emms" is undefined"
# example of errors: "/usr/lib/gcc/x86_64-redhat-linux/4.5.1/include/mmintrin.h(49): error: identifier "__builtin_ia32_emms" is undefined"
...
@@ -1024,7 +1028,7 @@ class CLinker(link.Linker):
...
@@ -1024,7 +1028,7 @@ class CLinker(link.Linker):
try
:
try
:
_logger
.
debug
(
"LOCATION
%
s"
,
str
(
location
))
_logger
.
debug
(
"LOCATION
%
s"
,
str
(
location
))
try
:
try
:
module
=
c_compiler
(
module
=
c_compiler
.
compile_str
(
module_name
=
mod
.
name
,
module_name
=
mod
.
name
,
src_code
=
src_code
,
src_code
=
src_code
,
location
=
location
,
location
=
location
,
...
...
theano/gof/cmodule.py
浏览文件 @
cff27c13
...
@@ -1312,140 +1312,147 @@ def gcc_version():
...
@@ -1312,140 +1312,147 @@ def gcc_version():
return
gcc_version_str
return
gcc_version_str
def
gcc_module_compile_str
(
module_name
,
src_code
,
location
=
None
,
class
GCC_compiler
():
include_dirs
=
[],
lib_dirs
=
[],
libs
=
[],
preargs
=
[]):
@staticmethod
"""
def
compile_args
():
:param module_name: string (this has been embedded in the src_code
return
[]
:param src_code: a complete c or c++ source listing for the module
@staticmethod
def
compile_str
(
module_name
,
src_code
,
location
=
None
,
:param location: a pre-existing filesystem directory where the cpp file and
include_dirs
=
[],
lib_dirs
=
[],
libs
=
[],
preargs
=
[]):
.so will be written
"""
:param module_name: string (this has been embedded in the src_code
:param include_dirs: a list of include directory names (each gets prefixed
:param src_code: a complete c or c++ source listing for the module
with -I)
:param lib_dirs: a list of library search path directory names (each gets
:param location: a pre-existing filesystem directory where the
prefixed with -L)
cpp file and .so will be written
:param libs: a list of libraries to link with (each gets prefixed with -l)
:param include_dirs: a list of include directory names (each
gets prefixed with -I)
:param preargs: a list of extra compiler arguments
:param lib_dirs: a list of library search path directory names
(each gets prefixed with -L)
:returns: dynamically-imported python module of the compiled code.
:param libs: a list of libraries to link with (each gets
"""
prefixed with -l)
#TODO: Do not do the dlimport in this function
if
preargs
is
None
:
:param preargs: a list of extra compiler arguments
preargs
=
[]
else
:
preargs
=
list
(
preargs
)
if
sys
.
platform
!=
'win32'
:
:returns: dynamically-imported python module of the compiled code.
# Under Windows it looks like fPIC is useless. Compiler warning:
"""
# '-fPIC ignored for target (all code is position independent)'
#TODO: Do not do the dlimport in this function
preargs
.
append
(
'-fPIC'
)
no_opt
=
False
include_dirs
=
include_dirs
+
std_include_dirs
()
if
preargs
is
None
:
libs
=
std_libs
()
+
libs
preargs
=
[]
lib_dirs
=
std_lib_dirs
()
+
lib_dirs
else
:
preargs
=
list
(
preargs
)
if
sys
.
platform
!=
'win32'
:
# Under Windows it looks like fPIC is useless. Compiler warning:
# '-fPIC ignored for target (all code is position independent)'
preargs
.
append
(
'-fPIC'
)
no_opt
=
False
include_dirs
=
include_dirs
+
std_include_dirs
()
libs
=
std_libs
()
+
libs
lib_dirs
=
std_lib_dirs
()
+
lib_dirs
#DSE Patch 1 for supporting OSX frameworks; add -framework Python
if
sys
.
platform
==
'darwin'
:
preargs
.
extend
([
'-undefined'
,
'dynamic_lookup'
])
python_inc
=
distutils
.
sysconfig
.
get_python_inc
()
# link with the framework library *if specifically requested*
# config.mac_framework_link is by default False, since on some mac
# installs linking with -framework causes a Bus Error
if
(
python_inc
.
count
(
'Python.framework'
)
>
0
and
config
.
cmodule
.
mac_framework_link
):
preargs
.
extend
([
'-framework'
,
'Python'
])
# Figure out whether the current Python executable is 32
# or 64 bit and compile accordingly.
n_bits
=
local_bitwidth
()
preargs
.
extend
([
'-m
%
s'
%
n_bits
])
_logger
.
debug
(
"OS X: compiling for
%
s bit architecture"
,
n_bits
)
# sometimes, the linker cannot find -lpython so we need to tell it
# explicitly where it is located
# this returns somepath/lib/python2.x
python_lib
=
distutils
.
sysconfig
.
get_python_lib
(
plat_specific
=
1
,
\
standard_lib
=
1
)
python_lib
=
os
.
path
.
dirname
(
python_lib
)
if
python_lib
not
in
lib_dirs
:
lib_dirs
.
append
(
python_lib
)
workdir
=
location
cppfilename
=
os
.
path
.
join
(
location
,
'mod.cpp'
)
cppfile
=
file
(
cppfilename
,
'w'
)
_logger
.
debug
(
'Writing module C++ code to
%
s'
,
cppfilename
)
ofiles
=
[]
rval
=
None
#DSE Patch 1 for supporting OSX frameworks; add -framework Python
cppfile
.
write
(
src_code
)
if
sys
.
platform
==
'darwin'
:
# Avoid gcc warning "no newline at end of file".
preargs
.
extend
([
'-undefined'
,
'dynamic_lookup'
])
if
not
src_code
.
endswith
(
'
\n
'
):
python_inc
=
distutils
.
sysconfig
.
get_python_inc
()
cppfile
.
write
(
'
\n
'
)
# link with the framework library *if specifically requested*
cppfile
.
close
()
# config.mac_framework_link is by default False, since on some mac
# installs linking with -framework causes a Bus Error
if
(
python_inc
.
count
(
'Python.framework'
)
>
0
and
config
.
cmodule
.
mac_framework_link
):
preargs
.
extend
([
'-framework'
,
'Python'
])
# Figure out whether the current Python executable is 32 or 64 bit and
# compile accordingly.
n_bits
=
local_bitwidth
()
preargs
.
extend
([
'-m
%
s'
%
n_bits
])
_logger
.
debug
(
"OS X: compiling for
%
s bit architecture"
,
n_bits
)
# sometimes, the linker cannot find -lpython so we need to tell it
# explicitly where it is located
# this returns somepath/lib/python2.x
python_lib
=
distutils
.
sysconfig
.
get_python_lib
(
plat_specific
=
1
,
\
standard_lib
=
1
)
python_lib
=
os
.
path
.
dirname
(
python_lib
)
if
python_lib
not
in
lib_dirs
:
lib_dirs
.
append
(
python_lib
)
workdir
=
location
cppfilename
=
os
.
path
.
join
(
location
,
'mod.cpp'
)
cppfile
=
file
(
cppfilename
,
'w'
)
_logger
.
debug
(
'Writing module C++ code to
%
s'
,
cppfilename
)
ofiles
=
[]
rval
=
None
cppfile
.
write
(
src_code
)
lib_filename
=
os
.
path
.
join
(
location
,
'
%
s.
%
s'
%
# Avoid gcc warning "no newline at end of file".
(
module_name
,
get_lib_extension
()))
if
not
src_code
.
endswith
(
'
\n
'
):
cppfile
.
write
(
'
\n
'
)
cppfile
.
close
()
lib_filename
=
os
.
path
.
join
(
location
,
'
%
s.
%
s'
%
_logger
.
debug
(
'Generating shared lib
%
s'
,
lib_filename
)
(
module_name
,
get_lib_extension
()))
cmd
=
[
'g++'
,
get_gcc_shared_library_arg
(),
'-g'
]
if
no_opt
:
cmd
.
extend
(
p
for
p
in
preargs
if
not
p
.
startswith
(
'-O'
))
else
:
cmd
.
extend
(
preargs
)
cxxflags
=
[
flag
for
flag
in
config
.
gcc
.
cxxflags
.
split
(
' '
)
if
flag
]
#print >> sys.stderr, config.gcc.cxxflags.split(' ')
cmd
.
extend
(
cxxflags
)
cmd
.
extend
(
'-I
%
s'
%
idir
for
idir
in
include_dirs
)
cmd
.
extend
([
'-o'
,
lib_filename
])
cmd
.
append
(
cppfilename
)
cmd
.
extend
([
'-L
%
s'
%
ldir
for
ldir
in
lib_dirs
])
cmd
.
extend
([
'-l
%
s'
%
l
for
l
in
libs
])
#print >> sys.stderr, 'COMPILING W CMD', cmd
_logger
.
debug
(
'Running cmd:
%
s'
,
' '
.
join
(
cmd
))
def
print_command_line_error
():
# Print command line when a problem occurred.
print
>>
sys
.
stderr
,
(
"Problem occurred during compilation with the "
"command line below:"
)
print
>>
sys
.
stderr
,
' '
.
join
(
cmd
)
_logger
.
debug
(
'Generating shared lib
%
s'
,
lib_filename
)
try
:
cmd
=
[
'g++'
,
get_gcc_shared_library_arg
(),
'-g'
]
p
=
subprocess
.
Popen
(
cmd
,
stderr
=
subprocess
.
PIPE
)
if
no_opt
:
compile_stderr
=
p
.
communicate
()[
1
]
cmd
.
extend
(
p
for
p
in
preargs
if
not
p
.
startswith
(
'-O'
))
except
Exception
:
else
:
# An exception can occur e.g. if `g++` is not found.
cmd
.
extend
(
preargs
)
print_command_line_error
()
cxxflags
=
[
flag
for
flag
in
config
.
gcc
.
cxxflags
.
split
(
' '
)
if
flag
]
raise
#print >> sys.stderr, config.gcc.cxxflags.split(' ')
cmd
.
extend
(
cxxflags
)
cmd
.
extend
(
'-I
%
s'
%
idir
for
idir
in
include_dirs
)
cmd
.
extend
([
'-o'
,
lib_filename
])
cmd
.
append
(
cppfilename
)
cmd
.
extend
([
'-L
%
s'
%
ldir
for
ldir
in
lib_dirs
])
cmd
.
extend
([
'-l
%
s'
%
l
for
l
in
libs
])
#print >> sys.stderr, 'COMPILING W CMD', cmd
_logger
.
debug
(
'Running cmd:
%
s'
,
' '
.
join
(
cmd
))
def
print_command_line_error
():
# Print command line when a problem occurred.
print
>>
sys
.
stderr
,
(
"Problem occurred during compilation with the "
"command line below:"
)
print
>>
sys
.
stderr
,
' '
.
join
(
cmd
)
try
:
status
=
p
.
returncode
p
=
subprocess
.
Popen
(
cmd
,
stderr
=
subprocess
.
PIPE
)
compile_stderr
=
p
.
communicate
()[
1
]
if
status
:
except
Exception
:
print
'==============================='
# An exception can occur e.g. if `g++` is not found.
for
i
,
l
in
enumerate
(
src_code
.
split
(
'
\n
'
)):
print_command_line_error
()
#gcc put its messages to stderr, so we add ours now
raise
print
>>
sys
.
stderr
,
'
%05
i
\t
%
s'
%
(
i
+
1
,
l
)
print
'==============================='
status
=
p
.
returncode
print_command_line_error
()
# Print errors just below the command line.
if
status
:
print
compile_stderr
print
'==============================='
# We replace '\n' by '. ' in the error message because when Python
for
i
,
l
in
enumerate
(
src_code
.
split
(
'
\n
'
)):
# prints the exception, having '\n' in the text makes it more difficult
#gcc put its messages to stderr, so we add ours now
# to read.
print
>>
sys
.
stderr
,
'
%05
i
\t
%
s'
%
(
i
+
1
,
l
)
raise
Exception
(
'Compilation failed (return status=
%
s):
%
s'
%
print
'==============================='
(
status
,
compile_stderr
.
replace
(
'
\n
'
,
'. '
)))
print_command_line_error
()
# Print errors just below the command line.
#touch the __init__ file
print
compile_stderr
file
(
os
.
path
.
join
(
location
,
"__init__.py"
),
'w'
)
.
close
()
# We replace '\n' by '. ' in the error message because when Python
return
dlimport
(
lib_filename
)
# prints the exception, having '\n' in the text makes it more difficult
# to read.
raise
Exception
(
'Compilation failed (return status=
%
s):
%
s'
%
(
status
,
compile_stderr
.
replace
(
'
\n
'
,
'. '
)))
#touch the __init__ file
file
(
os
.
path
.
join
(
location
,
"__init__.py"
),
'w'
)
.
close
()
return
dlimport
(
lib_filename
)
def
icc_module_compile_str
(
*
args
):
def
icc_module_compile_str
(
*
args
):
...
...
theano/gof/cutils.py
浏览文件 @
cff27c13
...
@@ -70,7 +70,7 @@ except ImportError:
...
@@ -70,7 +70,7 @@ except ImportError:
if
not
os
.
path
.
exists
(
loc
):
if
not
os
.
path
.
exists
(
loc
):
os
.
mkdir
(
loc
)
os
.
mkdir
(
loc
)
cmodule
.
gcc_module_
compile_str
(
'cutils_ext'
,
code
,
location
=
loc
)
cmodule
.
GCC_compiler
.
compile_str
(
'cutils_ext'
,
code
,
location
=
loc
)
from
cutils_ext.cutils_ext
import
*
from
cutils_ext.cutils_ext
import
*
finally
:
finally
:
...
...
theano/gof/lazylinker_c.py
浏览文件 @
cff27c13
...
@@ -53,7 +53,7 @@ except ImportError:
...
@@ -53,7 +53,7 @@ except ImportError:
loc
=
os
.
path
.
join
(
config
.
compiledir
,
dirname
)
loc
=
os
.
path
.
join
(
config
.
compiledir
,
dirname
)
if
not
os
.
path
.
exists
(
loc
):
if
not
os
.
path
.
exists
(
loc
):
os
.
mkdir
(
loc
)
os
.
mkdir
(
loc
)
cmodule
.
gcc_module_
compile_str
(
dirname
,
code
,
location
=
loc
)
cmodule
.
GCC_compiler
.
compile_str
(
dirname
,
code
,
location
=
loc
)
# Save version into the __init__.py file.
# Save version into the __init__.py file.
init_py
=
os
.
path
.
join
(
loc
,
'__init__.py'
)
init_py
=
os
.
path
.
join
(
loc
,
'__init__.py'
)
open
(
init_py
,
'w'
)
.
write
(
'_version =
%
s
\n
'
%
version
)
open
(
init_py
,
'w'
)
.
write
(
'_version =
%
s
\n
'
%
version
)
...
...
theano/sandbox/cuda/__init__.py
浏览文件 @
cff27c13
...
@@ -87,7 +87,7 @@ libcuda_ndarray_so = os.path.join(cuda_ndarray_loc,
...
@@ -87,7 +87,7 @@ libcuda_ndarray_so = os.path.join(cuda_ndarray_loc,
# Add the theano cache directory's cuda_ndarray subdirectory to the
# Add the theano cache directory's cuda_ndarray subdirectory to the
# list of places that are hard-coded into compiled modules' runtime
# list of places that are hard-coded into compiled modules' runtime
# library search list. This works in conjunction with
# library search list. This works in conjunction with
# nvcc_compiler.
nvcc_module_
compile_str which adds this folder during
# nvcc_compiler.
NVCC_compiler.
compile_str which adds this folder during
# compilation with -L and also adds -lcuda_ndarray when compiling
# compilation with -L and also adds -lcuda_ndarray when compiling
# modules.
# modules.
nvcc_compiler
.
add_standard_rpath
(
cuda_ndarray_loc
)
nvcc_compiler
.
add_standard_rpath
(
cuda_ndarray_loc
)
...
@@ -117,7 +117,8 @@ try:
...
@@ -117,7 +117,8 @@ try:
if
not
os
.
path
.
exists
(
cuda_ndarray_loc
):
if
not
os
.
path
.
exists
(
cuda_ndarray_loc
):
os
.
makedirs
(
cuda_ndarray_loc
)
os
.
makedirs
(
cuda_ndarray_loc
)
nvcc_compiler
.
nvcc_module_compile_str
(
compiler
=
nvcc_compiler
.
NVCC_compiler
()
compiler
.
compile_str
(
'cuda_ndarray'
,
'cuda_ndarray'
,
code
,
code
,
location
=
cuda_ndarray_loc
,
location
=
cuda_ndarray_loc
,
...
@@ -130,7 +131,7 @@ except Exception, e:
...
@@ -130,7 +131,7 @@ except Exception, e:
if
cuda_available
:
if
cuda_available
:
# If necessary,
# If necessary,
# create a symlink called libcuda_ndarray.so
# create a symlink called libcuda_ndarray.so
# which nvcc_
module_compile_st
r uses when linking
# which nvcc_
compiler.NVCC_compile
r uses when linking
# any module except "cuda_ndarray" itself.
# any module except "cuda_ndarray" itself.
try
:
try
:
open
(
libcuda_ndarray_so
)
.
close
()
open
(
libcuda_ndarray_so
)
.
close
()
...
...
theano/sandbox/cuda/nvcc_compiler.py
浏览文件 @
cff27c13
...
@@ -72,210 +72,226 @@ rpath_defaults = []
...
@@ -72,210 +72,226 @@ rpath_defaults = []
def
add_standard_rpath
(
rpath
):
def
add_standard_rpath
(
rpath
):
rpath_defaults
.
append
(
rpath
)
rpath_defaults
.
append
(
rpath
)
def
nvcc_module_compile_str
(
module_name
,
src_code
,
location
=
None
,
include_dirs
=
[],
lib_dirs
=
[],
libs
=
[],
preargs
=
[],
rpaths
=
rpath_defaults
):
"""
:param module_name: string (this has been embedded in the src_code
:param src_code: a complete c or c++ source listing for the module
:param location: a pre-existing filesystem directory where the cpp file and .so will be written
:param include_dirs: a list of include directory names (each gets prefixed with -I)
:param lib_dirs: a list of library search path directory names (each gets prefixed with -L)
:param libs: a list of libraries to link with (each gets prefixed with -l)
:param preargs: a list of extra compiler arguments
:param rpaths: list of rpaths to use with Xlinker. Defaults to `rpath_defaults`.
:returns: dynamically-imported python module of the compiled code.
:note 1: On Windows 7 with nvcc 3.1 we need to compile in the real directory
Otherwise nvcc never finish.
"""
rpaths
=
list
(
rpaths
)
class
NVCC_compiler
():
@staticmethod
if
sys
.
platform
==
"win32"
:
def
compile_args
():
# Remove some compilation args that cl.exe does not understand.
"""
# cl.exe is the compiler used by nvcc on Windows.
This args will be received by compile_str() in the preargs paramter.
for
a
in
[
"-Wno-write-strings"
,
"-Wno-unused-label"
,
They will also be included in the "hard" part of the key module.
"-Wno-unused-variable"
,
"-fno-math-errno"
]:
"""
if
a
in
preargs
:
return
[]
preargs
.
remove
(
a
)
# flags = [flag for flag in config.nvcc.flags.split(' ') if flag]
if
preargs
is
None
:
# cuda_ndarray_cuh_hash = hash_from_file(
preargs
=
[]
# os.path.join(os.path.split(__file__)[0], 'cuda_ndarray.cuh'))
else
:
preargs
=
list
(
preargs
)
# cuda_macro = '-DCUDA_NDARRAY_CUH=' + cuda_ndarray_cuh_hash
if
sys
.
platform
!=
'win32'
:
# return [cuda_macro]
preargs
.
append
(
'-fPIC'
)
no_opt
=
False
@staticmethod
cuda_root
=
config
.
cuda
.
root
def
compile_str
(
module_name
,
src_code
,
#The include dirs gived by the user should have precedence over
location
=
None
,
include_dirs
=
[],
lib_dirs
=
[],
libs
=
[],
preargs
=
[],
#the standards ones.
rpaths
=
rpath_defaults
):
include_dirs
=
include_dirs
+
std_include_dirs
()
"""
if
os
.
path
.
abspath
(
os
.
path
.
split
(
__file__
)[
0
])
not
in
include_dirs
:
:param module_name: string (this has been embedded in the src_code
include_dirs
.
append
(
os
.
path
.
abspath
(
os
.
path
.
split
(
__file__
)[
0
]))
:param src_code: a complete c or c++ source listing for the module
:param location: a pre-existing filesystem directory where the cpp file and .so will be written
libs
=
std_libs
()
+
libs
:param include_dirs: a list of include directory names (each gets prefixed with -I)
if
'cudart'
not
in
libs
:
:param lib_dirs: a list of library search path directory names (each gets prefixed with -L)
libs
.
append
(
'cudart'
)
:param libs: a list of libraries to link with (each gets prefixed with -l)
:param preargs: a list of extra compiler arguments
lib_dirs
=
std_lib_dirs
()
+
lib_dirs
:param rpaths: list of rpaths to use with Xlinker. Defaults to `rpath_defaults`.
if
cuda_root
:
lib_dirs
.
append
(
os
.
path
.
join
(
cuda_root
,
'lib'
))
:returns: dynamically-imported python module of the compiled code.
# from Benjamin Schrauwen April 14 2010
:note 1: On Windows 7 with nvcc 3.1 we need to compile in the real directory
if
sys
.
platform
!=
'darwin'
:
Otherwise nvcc never finish.
# No 64 bit CUDA libraries available on the mac, yet..
"""
lib_dirs
.
append
(
os
.
path
.
join
(
cuda_root
,
'lib64'
))
rpaths
=
list
(
rpaths
)
if
sys
.
platform
==
'darwin'
:
if
sys
.
platform
==
"win32"
:
# On the mac, nvcc is not able to link using -framework Python, so we have
# Remove some compilation args that cl.exe does not understand.
# manually add the correct library and paths
# cl.exe is the compiler used by nvcc on Windows.
darwin_python_lib
=
commands
.
getoutput
(
'python-config --ldflags'
)
for
a
in
[
"-Wno-write-strings"
,
"-Wno-unused-label"
,
else
:
"-Wno-unused-variable"
,
"-fno-math-errno"
]:
# sometimes, the linker cannot find -lpython so we need to tell it
if
a
in
preargs
:
# explicitly where it is located
preargs
.
remove
(
a
)
# this returns somepath/lib/python2.x
if
preargs
is
None
:
python_lib
=
distutils
.
sysconfig
.
get_python_lib
(
plat_specific
=
1
,
\
preargs
=
[]
standard_lib
=
1
)
else
:
preargs
=
list
(
preargs
)
python_lib
=
os
.
path
.
dirname
(
python_lib
)
if
sys
.
platform
!=
'win32'
:
if
python_lib
not
in
lib_dirs
:
preargs
.
append
(
'-fPIC'
)
lib_dirs
.
append
(
python_lib
)
no_opt
=
False
cuda_root
=
config
.
cuda
.
root
cppfilename
=
os
.
path
.
join
(
location
,
'mod.cu'
)
cppfile
=
file
(
cppfilename
,
'w'
)
#The include dirs gived by the user should have precedence over
#the standards ones.
_logger
.
debug
(
'Writing module C++ code to
%
s'
,
cppfilename
)
include_dirs
=
include_dirs
+
std_include_dirs
()
ofiles
=
[]
if
os
.
path
.
abspath
(
os
.
path
.
split
(
__file__
)[
0
])
not
in
include_dirs
:
rval
=
None
include_dirs
.
append
(
os
.
path
.
abspath
(
os
.
path
.
split
(
__file__
)[
0
]))
cppfile
.
write
(
src_code
)
libs
=
std_libs
()
+
libs
cppfile
.
close
()
if
'cudart'
not
in
libs
:
lib_filename
=
os
.
path
.
join
(
location
,
'
%
s.
%
s'
%
libs
.
append
(
'cudart'
)
(
module_name
,
get_lib_extension
()))
lib_dirs
=
std_lib_dirs
()
+
lib_dirs
_logger
.
debug
(
'Generating shared lib
%
s'
,
lib_filename
)
if
cuda_root
:
# TODO: Why do these args cause failure on gtx285 that has 1.3 compute capability? '--gpu-architecture=compute_13', '--gpu-code=compute_13',
lib_dirs
.
append
(
os
.
path
.
join
(
cuda_root
,
'lib'
))
preargs1
=
[
pa
for
pa
in
preargs
if
pa
.
startswith
(
'-O'
)
or
pa
.
startswith
(
'--maxrregcount='
)]
#nvcc argument
preargs2
=
[
pa
for
pa
in
preargs
if
pa
not
in
preargs1
]
#other arguments
# from Benjamin Schrauwen April 14 2010
if
sys
.
platform
!=
'darwin'
:
cmd
=
[
nvcc_path
,
'-shared'
,
'-g'
]
+
preargs1
# No 64 bit CUDA libraries available on the mac, yet..
if
config
.
nvcc
.
compiler_bindir
:
lib_dirs
.
append
(
os
.
path
.
join
(
cuda_root
,
'lib64'
))
cmd
.
extend
([
'--compiler-bindir'
,
config
.
nvcc
.
compiler_bindir
])
if
sys
.
platform
==
'win32'
:
if
sys
.
platform
==
'darwin'
:
# add flags for Microsoft compiler to create .pdb files
# On the mac, nvcc is not able to link using -framework Python, so we have
preargs2
.
append
(
'/Zi'
)
# manually add the correct library and paths
cmd
.
extend
([
'-Xlinker'
,
'/DEBUG'
])
darwin_python_lib
=
commands
.
getoutput
(
'python-config --ldflags'
)
if
sys
.
platform
!=
'win32'
:
if
local_bitwidth
()
==
64
:
cmd
.
append
(
'-m64'
)
preargs2
.
append
(
'-m64'
)
else
:
else
:
cmd
.
append
(
'-m32'
)
# sometimes, the linker cannot find -lpython so we need to tell it
preargs2
.
append
(
'-m32'
)
# explicitly where it is located
# this returns somepath/lib/python2.x
if
len
(
preargs2
)
>
0
:
python_lib
=
distutils
.
sysconfig
.
get_python_lib
(
plat_specific
=
1
,
\
cmd
.
extend
([
'-Xcompiler'
,
','
.
join
(
preargs2
)])
standard_lib
=
1
)
python_lib
=
os
.
path
.
dirname
(
python_lib
)
if
config
.
cuda
.
root
and
os
.
path
.
exists
(
os
.
path
.
join
(
config
.
cuda
.
root
,
'lib'
)):
if
python_lib
not
in
lib_dirs
:
rpaths
.
append
(
os
.
path
.
join
(
config
.
cuda
.
root
,
'lib'
))
lib_dirs
.
append
(
python_lib
)
if
sys
.
platform
!=
'darwin'
:
# the 64bit CUDA libs are in the same files as are named by the function above
cppfilename
=
os
.
path
.
join
(
location
,
'mod.cu'
)
rpaths
.
append
(
os
.
path
.
join
(
config
.
cuda
.
root
,
'lib64'
))
cppfile
=
file
(
cppfilename
,
'w'
)
if
sys
.
platform
!=
'win32'
:
# the -rpath option is not understood by the Microsoft linker
_logger
.
debug
(
'Writing module C++ code to
%
s'
,
cppfilename
)
for
rpath
in
rpaths
:
ofiles
=
[]
cmd
.
extend
([
'-Xlinker'
,
','
.
join
([
'-rpath'
,
rpath
])])
rval
=
None
cmd
.
extend
([
flag
for
flag
in
config
.
nvcc
.
flags
.
split
(
' '
)
if
flag
])
cmd
.
extend
(
'-I
%
s'
%
idir
for
idir
in
include_dirs
)
cppfile
.
write
(
src_code
)
cmd
.
extend
([
'-o'
,
lib_filename
])
cppfile
.
close
()
cmd
.
append
(
os
.
path
.
split
(
cppfilename
)[
-
1
])
lib_filename
=
os
.
path
.
join
(
location
,
'
%
s.
%
s'
%
cmd
.
extend
([
'-L
%
s'
%
ldir
for
ldir
in
lib_dirs
])
(
module_name
,
get_lib_extension
()))
cmd
.
extend
([
'-l
%
s'
%
l
for
l
in
libs
])
if
module_name
!=
'cuda_ndarray'
:
_logger
.
debug
(
'Generating shared lib
%
s'
,
lib_filename
)
cmd
.
append
(
"-lcuda_ndarray"
)
# TODO: Why do these args cause failure on gtx285 that has 1.3 compute capability? '--gpu-architecture=compute_13', '--gpu-code=compute_13',
if
sys
.
platform
==
'darwin'
:
preargs1
=
[
pa
for
pa
in
preargs
if
pa
.
startswith
(
'-O'
)
or
pa
.
startswith
(
'--maxrregcount='
)]
#nvcc argument
cmd
.
extend
(
darwin_python_lib
.
split
())
preargs2
=
[
pa
for
pa
in
preargs
if
pa
not
in
preargs1
]
#other arguments
if
sys
.
platform
==
'darwin'
:
cmd
=
[
nvcc_path
,
'-shared'
,
'-g'
]
+
preargs1
if
config
.
nvcc
.
compiler_bindir
:
cmd
.
extend
([
'--compiler-bindir'
,
config
.
nvcc
.
compiler_bindir
])
if
sys
.
platform
==
'win32'
:
# add flags for Microsoft compiler to create .pdb files
preargs2
.
append
(
'/Zi'
)
cmd
.
extend
([
'-Xlinker'
,
'/DEBUG'
])
if
sys
.
platform
!=
'win32'
:
if
local_bitwidth
()
==
64
:
cmd
.
append
(
'-m64'
)
preargs2
.
append
(
'-m64'
)
else
:
cmd
.
append
(
'-m32'
)
preargs2
.
append
(
'-m32'
)
if
len
(
preargs2
)
>
0
:
cmd
.
extend
([
'-Xcompiler'
,
','
.
join
(
preargs2
)])
if
config
.
cuda
.
root
and
os
.
path
.
exists
(
os
.
path
.
join
(
config
.
cuda
.
root
,
'lib'
)):
rpaths
.
append
(
os
.
path
.
join
(
config
.
cuda
.
root
,
'lib'
))
if
sys
.
platform
!=
'darwin'
:
# the 64bit CUDA libs are in the same files as are named by the function above
rpaths
.
append
(
os
.
path
.
join
(
config
.
cuda
.
root
,
'lib64'
))
if
sys
.
platform
!=
'win32'
:
# the -rpath option is not understood by the Microsoft linker
for
rpath
in
rpaths
:
cmd
.
extend
([
'-Xlinker'
,
','
.
join
([
'-rpath'
,
rpath
])])
cmd
.
extend
([
flag
for
flag
in
config
.
nvcc
.
flags
.
split
(
' '
)
if
flag
])
cmd
.
extend
(
'-I
%
s'
%
idir
for
idir
in
include_dirs
)
cmd
.
extend
([
'-o'
,
lib_filename
])
cmd
.
append
(
os
.
path
.
split
(
cppfilename
)[
-
1
])
cmd
.
extend
([
'-L
%
s'
%
ldir
for
ldir
in
lib_dirs
])
cmd
.
extend
([
'-l
%
s'
%
l
for
l
in
libs
])
if
module_name
!=
'cuda_ndarray'
:
cmd
.
append
(
"-lcuda_ndarray"
)
if
sys
.
platform
==
'darwin'
:
cmd
.
extend
(
darwin_python_lib
.
split
())
if
sys
.
platform
==
'darwin'
:
done
=
False
while
not
done
:
try
:
indexof
=
cmd
.
index
(
'-framework'
)
newarg
=
'-Xcompiler'
,
','
.
join
(
cmd
[
indexof
:(
indexof
+
2
)])
cmd
.
pop
(
indexof
)
# Remove -framework
cmd
.
pop
(
indexof
)
# Remove argument to -framework
cmd
.
extend
(
newarg
)
except
ValueError
,
e
:
done
=
True
# Remove "-u Symbol" arguments, since they are usually not relevant
# for the new compilation, even if they were used for compiling python.
# If they are necessary, the nvcc syntax is "-U Symbol" with a capital U.
done
=
False
done
=
False
while
not
done
:
while
not
done
:
try
:
try
:
indexof
=
cmd
.
index
(
'-framework'
)
indexof
=
cmd
.
index
(
'-u'
)
newarg
=
'-Xcompiler'
,
','
.
join
(
cmd
[
indexof
:(
indexof
+
2
)])
cmd
.
pop
(
indexof
)
# Remove -u
cmd
.
pop
(
indexof
)
# Remove -framework
cmd
.
pop
(
indexof
)
# Remove argument to -u
cmd
.
pop
(
indexof
)
# Remove argument to -framework
cmd
.
extend
(
newarg
)
except
ValueError
,
e
:
except
ValueError
,
e
:
done
=
True
done
=
True
# Remove "-u Symbol" arguments, since they are usually not relevant
# Fix for MacOS X.
# for the new compilation, even if they were used for compiling python.
cmd
=
remove_python_framework_dir
(
cmd
)
# If they are necessary, the nvcc syntax is "-U Symbol" with a capital U.
done
=
False
#cmd.append("--ptxas-options=-v") #uncomment this to see register and shared-mem requirements
while
not
done
:
_logger
.
debug
(
'Running cmd
%
s'
,
' '
.
join
(
cmd
))
orig_dir
=
os
.
getcwd
()
try
:
try
:
indexof
=
cmd
.
index
(
'-u'
)
os
.
chdir
(
location
)
cmd
.
pop
(
indexof
)
# Remove -u
p
=
subprocess
.
Popen
(
cmd
.
pop
(
indexof
)
# Remove argument to -u
cmd
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
)
except
ValueError
,
e
:
nvcc_stdout
,
nvcc_stderr
=
p
.
communicate
()[:
2
]
done
=
True
finally
:
os
.
chdir
(
orig_dir
)
# Fix for MacOS X.
cmd
=
remove_python_framework_dir
(
cmd
)
if
nvcc_stdout
:
# this doesn't happen to my knowledge
#cmd.append("--ptxas-options=-v") #uncomment this to see register and shared-mem requirements
print
>>
sys
.
stderr
,
"DEBUG: nvcc STDOUT"
,
nvcc_stdout
_logger
.
debug
(
'Running cmd
%
s'
,
' '
.
join
(
cmd
))
orig_dir
=
os
.
getcwd
()
for
eline
in
nvcc_stderr
.
split
(
'
\n
'
):
try
:
if
not
eline
:
os
.
chdir
(
location
)
continue
p
=
subprocess
.
Popen
(
if
'skipping incompatible'
in
eline
:
#ld is skipping an incompatible library
cmd
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
)
nvcc_stdout
,
nvcc_stderr
=
p
.
communicate
()[:
2
]
finally
:
os
.
chdir
(
orig_dir
)
if
nvcc_stdout
:
# this doesn't happen to my knowledge
print
>>
sys
.
stderr
,
"DEBUG: nvcc STDOUT"
,
nvcc_stdout
for
eline
in
nvcc_stderr
.
split
(
'
\n
'
):
if
not
eline
:
continue
if
'skipping incompatible'
in
eline
:
#ld is skipping an incompatible library
continue
if
'declared but never referenced'
in
eline
:
continue
if
'statement is unreachable'
in
eline
:
continue
_logger
.
info
(
"NVCC:
%
s"
,
eline
)
if
p
.
returncode
:
# filter the output from the compiler
for
l
in
nvcc_stderr
.
split
(
'
\n
'
):
if
not
l
:
continue
continue
# filter out the annoying declaration warnings
if
'declared but never referenced'
in
eline
:
continue
if
'statement is unreachable'
in
eline
:
continue
_logger
.
info
(
"NVCC:
%
s"
,
eline
)
try
:
if
p
.
returncode
:
if
l
[
l
.
index
(
':'
):]
.
startswith
(
': warning: variable'
):
# filter the output from the compiler
continue
for
l
in
nvcc_stderr
.
split
(
'
\n
'
):
if
l
[
l
.
index
(
':'
):]
.
startswith
(
': warning: label'
)
:
if
not
l
:
continue
continue
except
Exception
:
# filter out the annoying declaration warnings
pass
print
>>
sys
.
stderr
,
l
try
:
print
>>
sys
.
stderr
,
'==============================='
if
l
[
l
.
index
(
':'
):]
.
startswith
(
': warning: variable'
):
for
i
,
l
in
enumerate
(
src_code
.
split
(
'
\n
'
)):
continue
print
>>
sys
.
stderr
,
i
+
1
,
l
if
l
[
l
.
index
(
':'
):]
.
startswith
(
': warning: label'
):
raise
Exception
(
'nvcc return status'
,
p
.
returncode
,
'for cmd'
,
' '
.
join
(
cmd
))
continue
except
Exception
:
#touch the __init__ file
pass
file
(
os
.
path
.
join
(
location
,
"__init__.py"
),
'w'
)
.
close
()
print
>>
sys
.
stderr
,
l
return
dlimport
(
lib_filename
)
print
>>
sys
.
stderr
,
'==============================='
for
i
,
l
in
enumerate
(
src_code
.
split
(
'
\n
'
)):
print
>>
sys
.
stderr
,
i
+
1
,
l
raise
Exception
(
'nvcc return status'
,
p
.
returncode
,
'for cmd'
,
' '
.
join
(
cmd
))
#touch the __init__ file
file
(
os
.
path
.
join
(
location
,
"__init__.py"
),
'w'
)
.
close
()
return
dlimport
(
lib_filename
)
def
remove_python_framework_dir
(
cmd
):
def
remove_python_framework_dir
(
cmd
):
...
...
theano/sandbox/cuda/type.py
浏览文件 @
cff27c13
...
@@ -12,7 +12,7 @@ try:
...
@@ -12,7 +12,7 @@ try:
# We must do those import to be able to create the full doc when nvcc
# We must do those import to be able to create the full doc when nvcc
# is not available
# is not available
import
cuda_ndarray.cuda_ndarray
as
cuda
import
cuda_ndarray.cuda_ndarray
as
cuda
from
theano.sandbox.cuda.nvcc_compiler
import
nvcc_module_compile_st
r
from
theano.sandbox.cuda.nvcc_compiler
import
NVCC_compile
r
import
cuda_ndarray
import
cuda_ndarray
except
ImportError
:
except
ImportError
:
pass
pass
...
@@ -370,7 +370,7 @@ class CudaNdarrayType(Type):
...
@@ -370,7 +370,7 @@ class CudaNdarrayType(Type):
return
(
2
,)
# with assertion about refcounts
return
(
2
,)
# with assertion about refcounts
def
c_compiler
(
self
):
def
c_compiler
(
self
):
return
nvcc_module_compile_st
r
return
NVCC_compile
r
def
c_compile_args
(
self
):
def
c_compile_args
(
self
):
ret
=
[]
ret
=
[]
...
...
theano/scan_module/scan_perform_ext.py
浏览文件 @
cff27c13
...
@@ -50,8 +50,8 @@ except ImportError:
...
@@ -50,8 +50,8 @@ except ImportError:
loc
=
os
.
path
.
join
(
config
.
compiledir
,
dirname
)
loc
=
os
.
path
.
join
(
config
.
compiledir
,
dirname
)
if
not
os
.
path
.
exists
(
loc
):
if
not
os
.
path
.
exists
(
loc
):
os
.
mkdir
(
loc
)
os
.
mkdir
(
loc
)
cmodule
.
gcc_module_
compile_str
(
dirname
,
code
,
location
=
loc
,
cmodule
.
GCC_compiler
.
compile_str
(
dirname
,
code
,
location
=
loc
,
preargs
=
[
'-pthread'
,
'-fwrapv'
,
preargs
=
[
'-pthread'
,
'-fwrapv'
,
'-O2'
,
'-O2'
,
'-fno-strict-aliasing'
])
'-fno-strict-aliasing'
])
# Save version into the __init__.py file.
# Save version into the __init__.py file.
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论