Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
f374e21e
提交
f374e21e
authored
2月 21, 2012
作者:
lamblin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #465 from nouiz/compiler
Compiler
上级
1c1d7642
ca895ef3
全部展开
隐藏空白字符变更
内嵌
并排
正在显示
11 个修改的文件
包含
63 行增加
和
36 行删除
+63
-36
NEWS.txt
NEWS.txt
+9
-1
install.txt
doc/install.txt
+1
-1
configdefaults.py
theano/configdefaults.py
+4
-4
cc.py
theano/gof/cc.py
+30
-14
cmodule.py
theano/gof/cmodule.py
+0
-0
cutils.py
theano/gof/cutils.py
+3
-1
lazylinker_c.py
theano/gof/lazylinker_c.py
+3
-1
__init__.py
theano/sandbox/cuda/__init__.py
+6
-4
nvcc_compiler.py
theano/sandbox/cuda/nvcc_compiler.py
+0
-0
type.py
theano/sandbox/cuda/type.py
+3
-6
scan_perform_ext.py
theano/scan_module/scan_perform_ext.py
+4
-4
没有找到文件。
NEWS.txt
浏览文件 @
f374e21e
...
@@ -11,6 +11,14 @@ Since 0.5rc2
...
@@ -11,6 +11,14 @@ Since 0.5rc2
* Fix a bug with Gemv and Ger on CPU, when used on vectors with negative
* Fix a bug with Gemv and Ger on CPU, when used on vectors with negative
strides. Data was read from incorrect (and possibly uninitialized)
strides. Data was read from incorrect (and possibly uninitialized)
memory space. This bug was probably introduced in 0.5rc1.
memory space. This bug was probably introduced in 0.5rc1.
* The Theano flag "nvcc.flags" are now included in the hard part of the key.
This mean that now we recompile all modules for each value of "nvcc.flags".
This do use change the default, but if you used this flags, it was ignored
for module already compiled.
* The Theano flag "nvcc.fastmath" is now also used for the cuda_ndarray.cu file.
* Add the header_dirs to the hard part of the compilation key. This is
currently used only by cuda, but if we use library that are only headers,
this can be useful.
=============
=============
Release Notes
Release Notes
...
@@ -189,7 +197,7 @@ Crashes fixed:
...
@@ -189,7 +197,7 @@ Crashes fixed:
* "Interactive debugger" crash fix. (Ian, Frederic)
* "Interactive debugger" crash fix. (Ian, Frederic)
* Do not call gemm with strides 0, some blas refuse it. (Pascal Lamblin)
* Do not call gemm with strides 0, some blas refuse it. (Pascal Lamblin)
* Optimization crash with gemm and complex. (Frederic)
* Optimization crash with gemm and complex. (Frederic)
* GPU crash with elemwise. (Frederic)
* GPU crash with elemwise. (Frederic
, some reported by Chris Currivan
)
* Compilation crash with amdlibm and the GPU. (Frederic)
* Compilation crash with amdlibm and the GPU. (Frederic)
* IfElse crash. (Frederic)
* IfElse crash. (Frederic)
* Execution crash fix in AdvancedSubtensor1 on 32 bit computers. (Pascal)
* Execution crash fix in AdvancedSubtensor1 on 32 bit computers. (Pascal)
...
...
doc/install.txt
浏览文件 @
f374e21e
...
@@ -367,7 +367,7 @@ correctly (for example, for MKL this might be ``-lmkl -lguide -lpthread`` or
...
@@ -367,7 +367,7 @@ correctly (for example, for MKL this might be ``-lmkl -lguide -lpthread`` or
a .dll, and on OS-X it might be either a .dylib or a .so.)
a .dll, and on OS-X it might be either a .dylib or a .so.)
This might be just a problem with the way Theano passes compilation
This might be just a problem with the way Theano passes compilation
arguments to g
cc
, but the problem is not fixed yet.
arguments to g
++
, but the problem is not fixed yet.
.. _gpu_linux:
.. _gpu_linux:
...
...
theano/configdefaults.py
浏览文件 @
f374e21e
...
@@ -84,12 +84,12 @@ AddConfigVar('mode',
...
@@ -84,12 +84,12 @@ AddConfigVar('mode',
'FAST_COMPILE'
,
'PROFILE_MODE'
,
'DEBUG_MODE'
),
'FAST_COMPILE'
,
'PROFILE_MODE'
,
'DEBUG_MODE'
),
in_c_key
=
False
)
in_c_key
=
False
)
# Test whether or not g
cc
is present: disable C code if it is not.
# Test whether or not g
++
is present: disable C code if it is not.
# Using the dummy file descriptor below is a workaround for a crash experienced
# Using the dummy file descriptor below is a workaround for a crash experienced
# in an unusual Python 2.4.4 Windows environment with the default stdin=None.
# in an unusual Python 2.4.4 Windows environment with the default stdin=None.
dummy_stdin
=
open
(
os
.
devnull
)
dummy_stdin
=
open
(
os
.
devnull
)
try
:
try
:
subprocess
.
Popen
(
'g
cc
'
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
subprocess
.
Popen
(
'g
++
'
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
stdin
=
dummy_stdin
.
fileno
())
stdin
=
dummy_stdin
.
fileno
())
# Keep the default linker the same as the one for the mode FAST_RUN
# Keep the default linker the same as the one for the mode FAST_RUN
AddConfigVar
(
'linker'
,
AddConfigVar
(
'linker'
,
...
@@ -98,13 +98,13 @@ try:
...
@@ -98,13 +98,13 @@ try:
'vm'
,
'cvm'
,
'vm_nogc'
,
'cvm_nogc'
),
'vm'
,
'cvm'
,
'vm_nogc'
,
'cvm_nogc'
),
in_c_key
=
False
)
in_c_key
=
False
)
except
OSError
:
except
OSError
:
# g
cc
is not present, linker should default to python only
# g
++
is not present, linker should default to python only
AddConfigVar
(
'linker'
,
AddConfigVar
(
'linker'
,
"Default linker used if the theano flags mode is Mode or ProfileMode"
,
"Default linker used if the theano flags mode is Mode or ProfileMode"
,
EnumStr
(
'py'
,
'c|py'
,
'c'
,
'c|py_nogc'
,
'c&py'
,
EnumStr
(
'py'
,
'c|py'
,
'c'
,
'c|py_nogc'
,
'c&py'
,
'vm'
,
'cvm'
,
'vm_nogc'
,
'cvm_nogc'
),
'vm'
,
'cvm'
,
'vm_nogc'
,
'cvm_nogc'
),
in_c_key
=
False
)
in_c_key
=
False
)
_logger
.
warning
(
'
GCC
not detected ! Theano will be unable to execute '
_logger
.
warning
(
'
g++
not detected ! Theano will be unable to execute '
'optimized C-implementations (for both CPU and GPU) and will '
'optimized C-implementations (for both CPU and GPU) and will '
'default to Python implementations. Performance will be severely '
'default to Python implementations. Performance will be severely '
'degraded.'
)
'degraded.'
)
...
...
theano/gof/cc.py
浏览文件 @
f374e21e
...
@@ -622,6 +622,10 @@ class CLinker(link.Linker):
...
@@ -622,6 +622,10 @@ class CLinker(link.Linker):
for
x
in
[
y
.
type
for
y
in
self
.
variables
]
+
[
y
.
op
for
y
in
self
.
node_order
]:
for
x
in
[
y
.
type
for
y
in
self
.
variables
]
+
[
y
.
op
for
y
in
self
.
node_order
]:
try
:
ret
+=
x
.
c_compile_args
()
try
:
ret
+=
x
.
c_compile_args
()
except
utils
.
MethodNotDefined
:
pass
except
utils
.
MethodNotDefined
:
pass
c_compiler
=
self
.
c_compiler
()
ret
+=
c_compiler
.
compile_args
()
ret
=
list
(
set
(
ret
))
#to remove duplicate
ret
=
list
(
set
(
ret
))
#to remove duplicate
for
x
in
[
y
.
type
for
y
in
self
.
variables
]
+
[
y
.
op
for
y
in
self
.
node_order
]:
for
x
in
[
y
.
type
for
y
in
self
.
variables
]
+
[
y
.
op
for
y
in
self
.
node_order
]:
try
:
try
:
...
@@ -661,7 +665,7 @@ class CLinker(link.Linker):
...
@@ -661,7 +665,7 @@ class CLinker(link.Linker):
raise
Exception
(
'Nodes have requested specific different compilers'
,
raise
Exception
(
'Nodes have requested specific different compilers'
,
(
c_compiler
,
x_compiler
))
(
c_compiler
,
x_compiler
))
if
(
c_compiler
is
None
):
if
(
c_compiler
is
None
):
return
cmodule
.
gcc_module_compile_st
r
return
cmodule
.
GCC_compile
r
else
:
return
c_compiler
else
:
return
c_compiler
def
header_dirs
(
self
):
def
header_dirs
(
self
):
...
@@ -797,7 +801,8 @@ class CLinker(link.Linker):
...
@@ -797,7 +801,8 @@ class CLinker(link.Linker):
The key returned by this function is of the form (version, signature)
The key returned by this function is of the form (version, signature)
The signature has the following form:
The signature has the following form:
{{{
{{{
'CLinker.cmodule_key', compilation args, libraries, config md5,
'CLinker.cmodule_key', compilation args, libraries,
header_dirs, config md5,
(op0, input_signature0, output_signature0),
(op0, input_signature0, output_signature0),
(op1, input_signature1, output_signature1),
(op1, input_signature1, output_signature1),
...
...
...
@@ -857,11 +862,12 @@ class CLinker(link.Linker):
...
@@ -857,11 +862,12 @@ class CLinker(link.Linker):
"""
"""
return
self
.
cmodule_key_
(
self
.
env
,
self
.
no_recycling
,
return
self
.
cmodule_key_
(
self
.
env
,
self
.
no_recycling
,
compile_args
=
self
.
compile_args
(),
compile_args
=
self
.
compile_args
(),
libraries
=
self
.
libraries
()
libraries
=
self
.
libraries
(),
header_dirs
=
self
.
header_dirs
(),
)
)
@staticmethod
@staticmethod
def
cmodule_key_
(
env
,
no_recycling
,
compile_args
=
[],
libraries
=
[],
def
cmodule_key_
(
env
,
no_recycling
,
compile_args
=
[],
libraries
=
[],
insert_config_md5
=
True
):
header_dirs
=
[],
insert_config_md5
=
True
):
"""
"""
Do the actual computation of cmodule_key in a static method
Do the actual computation of cmodule_key in a static method
to allow it to be reused in scalar.Composite.__eq__
to allow it to be reused in scalar.Composite.__eq__
...
@@ -877,8 +883,24 @@ class CLinker(link.Linker):
...
@@ -877,8 +883,24 @@ class CLinker(link.Linker):
# First we put the header, compile_args, library names and config md5
# First we put the header, compile_args, library names and config md5
# into the signature.
# into the signature.
sig
=
[
'CLinker.cmodule_key'
]
# will be cast to tuple on return
sig
=
[
'CLinker.cmodule_key'
]
# will be cast to tuple on return
if
compile_args
is
not
None
:
sig
.
append
(
tuple
(
compile_args
))
if
compile_args
is
not
None
:
if
libraries
is
not
None
:
sig
.
append
(
tuple
(
libraries
))
# We must sort it as the order from a set are not guarantee.
# In particular, 2 sets with the same content can give different
# order depending in the order you put data in it.
# Sets are used to remove duplicate elements.
args
=
sorted
(
compile_args
)
args
=
tuple
(
args
)
sig
.
append
(
args
)
if
libraries
is
not
None
:
# see comments for compile_args
args
=
sorted
(
libraries
)
args
=
tuple
(
args
)
sig
.
append
(
args
)
if
header_dirs
is
not
None
:
args
=
sorted
(
header_dirs
)
args
=
tuple
(
args
)
sig
.
append
(
args
)
# IMPORTANT: The 'md5' prefix is used to isolate the compilation
# IMPORTANT: The 'md5' prefix is used to isolate the compilation
# parameters from the rest of the key. If you want to add more key
# parameters from the rest of the key. If you want to add more key
...
@@ -889,12 +911,6 @@ class CLinker(link.Linker):
...
@@ -889,12 +911,6 @@ class CLinker(link.Linker):
else
:
else
:
sig
.
append
(
'md5: <omitted>'
)
sig
.
append
(
'md5: <omitted>'
)
# technically this should only be appended for gcc-compiled Ops
# and the flags of other compilers should be inserted here... but it's not clear how to
# do this.
if
config
.
gcc
.
cxxflags
:
sig
.
append
(
config
.
gcc
.
cxxflags
)
error_on_play
=
[
False
]
error_on_play
=
[
False
]
def
in_sig
(
i
,
topological_pos
,
i_idx
):
def
in_sig
(
i
,
topological_pos
,
i_idx
):
# assert that every input to every node is one of'
# assert that every input to every node is one of'
...
@@ -1007,7 +1023,7 @@ class CLinker(link.Linker):
...
@@ -1007,7 +1023,7 @@ class CLinker(link.Linker):
libs
=
self
.
libraries
()
libs
=
self
.
libraries
()
preargs
=
self
.
compile_args
()
preargs
=
self
.
compile_args
()
compiler_name
=
c_compiler
.
__name__
compiler_name
=
c_compiler
.
__name__
if
compiler_name
==
'
nvcc_module_compile_st
r'
and
config
.
lib
.
amdlibm
:
if
compiler_name
==
'
NVCC_compile
r'
and
config
.
lib
.
amdlibm
:
# This lib does not work correctly with nvcc in device code.
# This lib does not work correctly with nvcc in device code.
# and newer version of g++ as 4.5.1.
# and newer version of g++ as 4.5.1.
# example of errors: "/usr/lib/gcc/x86_64-redhat-linux/4.5.1/include/mmintrin.h(49): error: identifier "__builtin_ia32_emms" is undefined"
# example of errors: "/usr/lib/gcc/x86_64-redhat-linux/4.5.1/include/mmintrin.h(49): error: identifier "__builtin_ia32_emms" is undefined"
...
@@ -1024,7 +1040,7 @@ class CLinker(link.Linker):
...
@@ -1024,7 +1040,7 @@ class CLinker(link.Linker):
try
:
try
:
_logger
.
debug
(
"LOCATION
%
s"
,
str
(
location
))
_logger
.
debug
(
"LOCATION
%
s"
,
str
(
location
))
try
:
try
:
module
=
c_compiler
(
module
=
c_compiler
.
compile_str
(
module_name
=
mod
.
name
,
module_name
=
mod
.
name
,
src_code
=
src_code
,
src_code
=
src_code
,
location
=
location
,
location
=
location
,
...
...
theano/gof/cmodule.py
浏览文件 @
f374e21e
差异被折叠。
点击展开。
theano/gof/cutils.py
浏览文件 @
f374e21e
...
@@ -70,7 +70,9 @@ except ImportError:
...
@@ -70,7 +70,9 @@ except ImportError:
if
not
os
.
path
.
exists
(
loc
):
if
not
os
.
path
.
exists
(
loc
):
os
.
mkdir
(
loc
)
os
.
mkdir
(
loc
)
cmodule
.
gcc_module_compile_str
(
'cutils_ext'
,
code
,
location
=
loc
)
args
=
cmodule
.
GCC_compiler
.
compile_args
()
cmodule
.
GCC_compiler
.
compile_str
(
'cutils_ext'
,
code
,
location
=
loc
,
preargs
=
args
)
from
cutils_ext.cutils_ext
import
*
from
cutils_ext.cutils_ext
import
*
finally
:
finally
:
...
...
theano/gof/lazylinker_c.py
浏览文件 @
f374e21e
...
@@ -53,7 +53,9 @@ except ImportError:
...
@@ -53,7 +53,9 @@ except ImportError:
loc
=
os
.
path
.
join
(
config
.
compiledir
,
dirname
)
loc
=
os
.
path
.
join
(
config
.
compiledir
,
dirname
)
if
not
os
.
path
.
exists
(
loc
):
if
not
os
.
path
.
exists
(
loc
):
os
.
mkdir
(
loc
)
os
.
mkdir
(
loc
)
cmodule
.
gcc_module_compile_str
(
dirname
,
code
,
location
=
loc
)
args
=
cmodule
.
GCC_compiler
.
compile_args
()
cmodule
.
GCC_compiler
.
compile_str
(
dirname
,
code
,
location
=
loc
,
preargs
=
args
)
# Save version into the __init__.py file.
# Save version into the __init__.py file.
init_py
=
os
.
path
.
join
(
loc
,
'__init__.py'
)
init_py
=
os
.
path
.
join
(
loc
,
'__init__.py'
)
open
(
init_py
,
'w'
)
.
write
(
'_version =
%
s
\n
'
%
version
)
open
(
init_py
,
'w'
)
.
write
(
'_version =
%
s
\n
'
%
version
)
...
...
theano/sandbox/cuda/__init__.py
浏览文件 @
f374e21e
...
@@ -87,7 +87,7 @@ libcuda_ndarray_so = os.path.join(cuda_ndarray_loc,
...
@@ -87,7 +87,7 @@ libcuda_ndarray_so = os.path.join(cuda_ndarray_loc,
# Add the theano cache directory's cuda_ndarray subdirectory to the
# Add the theano cache directory's cuda_ndarray subdirectory to the
# list of places that are hard-coded into compiled modules' runtime
# list of places that are hard-coded into compiled modules' runtime
# library search list. This works in conjunction with
# library search list. This works in conjunction with
# nvcc_compiler.
nvcc_module_
compile_str which adds this folder during
# nvcc_compiler.
NVCC_compiler.
compile_str which adds this folder during
# compilation with -L and also adds -lcuda_ndarray when compiling
# compilation with -L and also adds -lcuda_ndarray when compiling
# modules.
# modules.
nvcc_compiler
.
add_standard_rpath
(
cuda_ndarray_loc
)
nvcc_compiler
.
add_standard_rpath
(
cuda_ndarray_loc
)
...
@@ -117,11 +117,13 @@ try:
...
@@ -117,11 +117,13 @@ try:
if
not
os
.
path
.
exists
(
cuda_ndarray_loc
):
if
not
os
.
path
.
exists
(
cuda_ndarray_loc
):
os
.
makedirs
(
cuda_ndarray_loc
)
os
.
makedirs
(
cuda_ndarray_loc
)
nvcc_compiler
.
nvcc_module_compile_str
(
compiler
=
nvcc_compiler
.
NVCC_compiler
()
compiler
.
compile_str
(
'cuda_ndarray'
,
'cuda_ndarray'
,
code
,
code
,
location
=
cuda_ndarray_loc
,
location
=
cuda_ndarray_loc
,
include_dirs
=
[
cuda_path
],
libs
=
[
'cublas'
])
include_dirs
=
[
cuda_path
],
libs
=
[
'cublas'
],
preargs
=
compiler
.
compile_args
())
from
cuda_ndarray.cuda_ndarray
import
*
from
cuda_ndarray.cuda_ndarray
import
*
except
Exception
,
e
:
except
Exception
,
e
:
_logger
.
error
(
"Failed to compile cuda_ndarray.cu:
%
s"
,
str
(
e
))
_logger
.
error
(
"Failed to compile cuda_ndarray.cu:
%
s"
,
str
(
e
))
...
@@ -130,7 +132,7 @@ except Exception, e:
...
@@ -130,7 +132,7 @@ except Exception, e:
if
cuda_available
:
if
cuda_available
:
# If necessary,
# If necessary,
# create a symlink called libcuda_ndarray.so
# create a symlink called libcuda_ndarray.so
# which nvcc_
module_compile_st
r uses when linking
# which nvcc_
compiler.NVCC_compile
r uses when linking
# any module except "cuda_ndarray" itself.
# any module except "cuda_ndarray" itself.
try
:
try
:
open
(
libcuda_ndarray_so
)
.
close
()
open
(
libcuda_ndarray_so
)
.
close
()
...
...
theano/sandbox/cuda/nvcc_compiler.py
浏览文件 @
f374e21e
差异被折叠。
点击展开。
theano/sandbox/cuda/type.py
浏览文件 @
f374e21e
...
@@ -12,7 +12,7 @@ try:
...
@@ -12,7 +12,7 @@ try:
# We must do those import to be able to create the full doc when nvcc
# We must do those import to be able to create the full doc when nvcc
# is not available
# is not available
import
cuda_ndarray.cuda_ndarray
as
cuda
import
cuda_ndarray.cuda_ndarray
as
cuda
from
theano.sandbox.cuda.nvcc_compiler
import
nvcc_module_compile_st
r
from
theano.sandbox.cuda.nvcc_compiler
import
NVCC_compile
r
import
cuda_ndarray
import
cuda_ndarray
except
ImportError
:
except
ImportError
:
pass
pass
...
@@ -370,13 +370,10 @@ class CudaNdarrayType(Type):
...
@@ -370,13 +370,10 @@ class CudaNdarrayType(Type):
return
(
2
,)
# with assertion about refcounts
return
(
2
,)
# with assertion about refcounts
def
c_compiler
(
self
):
def
c_compiler
(
self
):
return
nvcc_module_compile_st
r
return
NVCC_compile
r
def
c_compile_args
(
self
):
def
c_compile_args
(
self
):
ret
=
[]
return
[]
if
config
.
nvcc
.
fastmath
:
ret
.
append
(
'-use_fast_math'
)
return
ret
# Register CudaNdarrayType to the OutputGuard list of known types
# Register CudaNdarrayType to the OutputGuard list of known types
...
...
theano/scan_module/scan_perform_ext.py
浏览文件 @
f374e21e
...
@@ -50,10 +50,10 @@ except ImportError:
...
@@ -50,10 +50,10 @@ except ImportError:
loc
=
os
.
path
.
join
(
config
.
compiledir
,
dirname
)
loc
=
os
.
path
.
join
(
config
.
compiledir
,
dirname
)
if
not
os
.
path
.
exists
(
loc
):
if
not
os
.
path
.
exists
(
loc
):
os
.
mkdir
(
loc
)
os
.
mkdir
(
loc
)
cmodule
.
gcc_module_compile_str
(
dirname
,
code
,
location
=
loc
,
preargs
=
[
'-pthread'
,
'-fwrapv'
,
'-O2'
,
'-fno-strict-aliasing'
]
preargs
=
[
'-pthread'
,
'-fwrapv'
,
preargs
+=
cmodule
.
GCC_compiler
.
compile_args
()
'-O2'
,
cmodule
.
GCC_compiler
.
compile_str
(
dirname
,
code
,
location
=
loc
,
'-fno-strict-aliasing'
]
)
preargs
=
preargs
)
# Save version into the __init__.py file.
# Save version into the __init__.py file.
init_py
=
os
.
path
.
join
(
loc
,
'__init__.py'
)
init_py
=
os
.
path
.
join
(
loc
,
'__init__.py'
)
open
(
init_py
,
'w'
)
.
write
(
'_version =
%
s
\n
'
%
version
)
open
(
init_py
,
'w'
)
.
write
(
'_version =
%
s
\n
'
%
version
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论