Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
602e87d8
提交
602e87d8
authored
7月 22, 2009
作者:
James Bergstra
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
added files that i forgot to add before
上级
fbbeb192
隐藏空白字符变更
内嵌
并排
正在显示
4 个修改的文件
包含
225 行增加
和
0 行删除
+225
-0
Makefile
Makefile
+7
-0
nvcc_compiler.py
nvcc_compiler.py
+90
-0
opt.py
opt.py
+43
-0
var.py
var.py
+85
-0
没有找到文件。
Makefile
0 → 100644
浏览文件 @
602e87d8
type_support.so
:
type_support.cu
nvcc
-O3
-shared
-I
$(HOME)
/cvs/lgcm/cuda_ndarray
-I
$(CUDA_ROOT)
/include
-I
/usr/include/python2.6
-o
type_support.so
-Xcompiler
-fPIC
type_support.cu
-L
$(CUDA_ROOT)
/lib
$(HOME)
/cvs/lgcm/cuda_ndarray/cuda_ndarray.so
clean
:
rm
type_support.so
nvcc_compiler.py
0 → 100644
浏览文件 @
602e87d8
import
sys
,
os
,
subprocess
,
logging
from
theano.gof.cmodule
import
(
std_libs
,
std_lib_dirs
,
std_include_dirs
,
dlimport
,
get_lib_extension
)
_logger
=
logging
.
getLogger
(
"theano_cuda_ndarray.nvcc_compiler"
)
_logger
.
setLevel
(
logging
.
WARN
)
def
error
(
*
args
):
#sys.stderr.write('ERROR:'+ ' '.join(str(a) for a in args)+'\n')
_logger
.
error
(
"ERROR: "
+
' '
.
join
(
str
(
a
)
for
a
in
args
))
def
warning
(
*
args
):
#sys.stderr.write('WARNING:'+ ' '.join(str(a) for a in args)+'\n')
_logger
.
warning
(
"WARNING: "
+
' '
.
join
(
str
(
a
)
for
a
in
args
))
def
info
(
*
args
):
#sys.stderr.write('INFO:'+ ' '.join(str(a) for a in args)+'\n')
_logger
.
info
(
"INFO: "
+
' '
.
join
(
str
(
a
)
for
a
in
args
))
def
debug
(
*
args
):
#sys.stderr.write('DEBUG:'+ ' '.join(str(a) for a in args)+'\n')
_logger
.
debug
(
"DEBUG: "
+
' '
.
join
(
str
(
a
)
for
a
in
args
))
def
nvcc_module_compile_str
(
module_name
,
src_code
,
location
=
None
,
include_dirs
=
[],
lib_dirs
=
[],
libs
=
[],
preargs
=
[]):
"""
:param module_name: string (this has been embedded in the src_code
:param src_code: a complete c or c++ source listing for the module
:param location: a pre-existing filesystem directory where the cpp file and .so will be written
:param include_dirs: a list of include directory names (each gets prefixed with -I)
:param lib_dirs: a list of library search path directory names (each gets prefixed with -L)
:param libs: a list of libraries to link with (each gets prefixed with -l)
:param preargs: a list of extra compiler arguments
:returns: dynamically-imported python module of the compiled code.
"""
preargs
=
[]
if
preargs
is
None
else
list
(
preargs
)
preargs
.
append
(
'-fPIC'
)
no_opt
=
False
include_dirs
=
std_include_dirs
()
+
include_dirs
libs
=
std_libs
()
+
[
'cudart'
]
+
libs
lib_dirs
=
std_lib_dirs
()
+
[
os
.
path
.
join
(
os
.
getenv
(
'CUDA_ROOT'
),
'lib'
)]
+
lib_dirs
cppfilename
=
os
.
path
.
join
(
location
,
'mod.cu'
)
cppfile
=
file
(
cppfilename
,
'w'
)
debug
(
'Writing module C++ code to'
,
cppfilename
)
ofiles
=
[]
rval
=
None
cppfile
.
write
(
src_code
)
cppfile
.
close
()
lib_filename
=
os
.
path
.
join
(
location
,
'
%
s.
%
s'
%
(
module_name
,
get_lib_extension
()))
debug
(
'Generating shared lib'
,
lib_filename
)
cmd
=
[
'nvcc'
,
'-shared'
,
'-g'
]
+
[
pa
for
pa
in
preargs
if
pa
.
startswith
(
'-O'
)]
cmd
.
extend
([
'-Xcompiler'
,
','
.
join
(
pa
for
pa
in
preargs
if
not
pa
.
startswith
(
'-O'
))])
cmd
.
extend
(
'-I
%
s'
%
idir
for
idir
in
include_dirs
)
cmd
.
extend
([
'-o'
,
lib_filename
])
cmd
.
append
(
cppfilename
)
cmd
.
extend
([
'-L
%
s'
%
ldir
for
ldir
in
lib_dirs
])
cmd
.
extend
([
'-l
%
s'
%
l
for
l
in
libs
])
debug
(
'Running cmd'
,
' '
.
join
(
cmd
))
p
=
subprocess
.
Popen
(
cmd
,
stderr
=
subprocess
.
PIPE
)
stderr
=
p
.
communicate
()[
1
]
if
p
.
returncode
:
# filter the output from the compiler
for
l
in
stderr
.
split
(
'
\n
'
):
if
not
l
:
continue
# filter out the annoying declaration warnings
try
:
if
l
[
l
.
index
(
':'
):]
.
startswith
(
': warning: variable'
):
continue
if
l
[
l
.
index
(
':'
):]
.
startswith
(
': warning: label'
):
continue
except
:
pass
print
l
print
'==============================='
for
i
,
l
in
enumerate
(
src_code
.
split
(
'
\n
'
)):
print
i
+
1
,
l
raise
Exception
(
'nvcc return status'
,
p
.
returncode
)
#touch the __init__ file
file
(
os
.
path
.
join
(
location
,
"__init__.py"
),
'w'
)
.
close
()
return
dlimport
(
lib_filename
)
opt.py
0 → 100644
浏览文件 @
602e87d8
from
theano
import
tensor
,
gof
from
theano
import
tensor
,
scalar
from
.basic_ops
import
*
@gof.local_optimizer
([
GpuFromHost
(),
None
])
def
local_gpu_host_gpu
(
node
):
if
not
tensor
.
opt
.
opt
.
check_chain
(
node
,
GpuFromHost
(),
HostFromGpu
()):
return
False
return
[
node
.
inputs
[
0
]
.
owner
.
inputs
[
0
]]
tensor
.
opt
.
register_specialize
(
local_gpu_host_gpu
,
'gpu'
)
@gof.local_optimizer
([
HostFromGpu
(),
None
])
def
local_host_gpu_host
(
node
):
if
not
tensor
.
opt
.
opt
.
check_chain
(
node
,
HostFromGpu
(),
GpuFromHost
()):
return
False
return
[
node
.
inputs
[
0
]
.
owner
.
inputs
[
0
]]
tensor
.
opt
.
register_specialize
(
local_host_gpu_host
,
'gpu'
)
@gof.local_optimizer
([])
def
local_gpu_elemwise
(
node
):
if
isinstance
(
node
.
op
,
tensor
.
Elemwise
):
if
any
(
hasattr
(
i
.
owner
,
'op'
)
and
isinstance
(
i
.
owner
.
op
,
HostFromGpu
)
for
i
in
node
.
inputs
):
# move the add to a GpuAdd
new_op
=
GpuElemwise
(
node
.
op
.
scalar_op
,
node
.
op
.
inplace_pattern
)
return
[
host_from_gpu
(
new_op
(
*
(
gpu_from_host
(
i
)
for
i
in
node
.
inputs
)))]
return
False
tensor
.
opt
.
register_specialize
(
local_gpu_elemwise
,
'gpu'
)
@gof.local_optimizer
([])
def
local_gpu_dimshuffle
(
node
):
if
isinstance
(
node
.
op
,
tensor
.
DimShuffle
):
input
,
=
node
.
inputs
if
input
.
owner
and
isinstance
(
input
.
owner
.
op
,
HostFromGpu
):
# move the add to a GpuAdd
new_op
=
GpuDimShuffle
(
node
.
op
.
input_broadcastable
,
node
.
op
.
new_order
)
if
node
.
op
.
inplace
:
return
[
host_from_gpu
(
new_op
(
gpu_from_host
(
input
)))]
else
:
return
[
host_from_gpu
(
new_op
(
gpu_from_host
(
tensor
.
tensor_copy
(
input
))))]
return
False
tensor
.
opt
.
register_specialize
(
local_gpu_dimshuffle
,
'gpu'
)
var.py
0 → 100644
浏览文件 @
602e87d8
import
numpy
from
theano
import
Op
,
Type
,
Apply
,
Variable
,
Constant
from
theano
import
tensor
from
theano.compile.sandbox.sharedvalue
import
shared
,
SharedVariable
,
shared_constructor
from
.type
import
CudaNdarrayType
from
.type_support
import
filter
as
type_support_filter
from
.basic_ops
import
HostFromGpu
,
GpuFromHost
class
_operators
(
tensor
.
basic
.
_tensor_py_operators
):
"""Define a few properties and conversion methods for CudaNdarray Variables.
The default implementation of arithemetic operators is to build graphs of TensorType
variables.
The optimization pass (specialization) will insert pure GPU implementations.
This approach relieves the Cuda-Ops of having to deal with input argument checking and
gradients.
"""
def
_as_TensorVariable
(
self
):
return
HostFromGpu
()(
self
)
def
_as_CudaNdarrayVariable
(
self
):
return
self
dtype
=
property
(
lambda
s
:
'float32'
)
broadcastable
=
property
(
lambda
s
:
s
.
type
.
broadcastable
)
ndim
=
property
(
lambda
s
:
s
.
type
.
ndim
)
class
CudaNdarrayVariable
(
Variable
,
_operators
):
pass
CudaNdarrayType
.
Variable
=
CudaNdarrayVariable
class
CudaNdarrayConstant
(
Constant
,
_operators
):
pass
CudaNdarrayType
.
Constant
=
CudaNdarrayConstant
class
CudaNdarraySharedVariable
(
SharedVariable
,
_operators
):
def
__getvalue
(
self
):
return
numpy
.
asarray
(
self
.
container
.
value
)
def
__setvalue
(
self
,
value
):
self
.
container
.
value
=
value
#container does the filtering
value
=
property
(
__getvalue
,
__setvalue
)
def
filter_update
(
self
,
other
):
if
hasattr
(
other
,
'_as_CudaNdarrayVariable'
):
return
other
.
_as_CudaNdarrayVariable
()
if
isinstance
(
other
.
type
,
tensor
.
TensorType
)
and
(
other
.
type
.
dtype
==
self
.
dtype
)
and
(
other
.
broadcastable
==
self
.
broadcastable
):
return
GpuFromHost
()(
other
)
else
:
raise
TypeError
((
other
,
other
.
type
))
CudaNdarrayType
.
SharedVariable
=
CudaNdarraySharedVariable
def
shared_constructor
(
value
,
name
,
strict
=
False
):
"""SharedVariable Constructor for TensorType"""
if
strict
:
_value
=
value
else
:
_value
=
numpy
.
asarray
(
value
,
dtype
=
'float32'
)
if
not
isinstance
(
_value
,
numpy
.
ndarray
):
raise
TypeError
(
'ndarray required'
)
if
_value
.
dtype
.
num
!=
CudaNdarrayType
.
typenum
:
raise
TypeError
(
'float32 ndarray required'
)
bcast
=
[
0
for
b
in
value
.
shape
]
type
=
CudaNdarrayType
(
broadcastable
=
bcast
)
return
CudaNdarraySharedVariable
(
type
=
type
,
value
=
_value
,
name
=
name
,
strict
=
strict
)
def
unset_shared_for_numpy
():
raise
NotImplementedError
()
def
set_shared_for_numpy
():
"""
Set the gpu_tensor_constructor as the handler for ndarray
"""
shared_constructor
(
shared_constructor
)
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论