Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
a489cb3e
提交
a489cb3e
authored
11月 20, 2008
作者:
James Bergstra
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
added c implementation of DimShuffle, some small optimizations to the C opwise linker
上级
5cb8526d
显示空白字符变更
内嵌
并排
正在显示
6 个修改的文件
包含
123 行增加
和
26 行删除
+123
-26
Makefile
benchmark/autoencoder/Makefile
+1
-2
aa.py
benchmark/autoencoder/aa.py
+6
-3
cc.py
theano/gof/cc.py
+13
-8
link.py
theano/gof/link.py
+26
-8
wraplinker.py
theano/sandbox/wraplinker.py
+4
-2
elemwise.py
theano/tensor/elemwise.py
+73
-3
没有找到文件。
benchmark/autoencoder/Makefile
浏览文件 @
a489cb3e
aa.x
:
aa.cc
g++
-O3
-ffast-math
-ftree-vectorize
aa.cc
-o
aa.x
-L
${
PUB_PREFIX
}
/lib
-lgsl
-lmkl
#g++ aa.cc -o aa.x -L${PUB_PREFIX}/lib -lgsl -lmkl
g++
-O3
-ffast-math
aa.cc
-o
aa.x
-L
${
PUB_PREFIX
}
/lib
-lgsl
${
THEANO_BLAS_LDFLAGS
}
clean
:
rm
aa.x
benchmark/autoencoder/aa.py
浏览文件 @
a489cb3e
...
...
@@ -213,6 +213,7 @@ def local_sub_to_gemm(node):
#TODO: we actually want to get any scalar here, not necessrily a constant
mulleft_const
=
opt
.
local_mul_canonizer
.
get_constant
(
mulleft
)
if
mulleft_const
is
not
None
:
assert
mulleft_const
.
size
()
==
1
mulleft_const
=
mulleft_const
.
flatten
()[
0
]
#subleft - (mulleft_const * ?)
if
mulright
.
owner
and
(
mulright
.
owner
.
op
==
T
.
add
):
...
...
@@ -422,8 +423,10 @@ class M(module.Module):
self
.
step
=
module
.
Method
([
x
],
err
,
updates
=
dict
(
updates
))
mod
=
M
()
#mode = 'FAST_RUN'
mode
=
ProfileMode
(
optimizer
=
'fast_run'
,
linker
=
theano
.
gof
.
OpWiseCLinker
())
mode
=
'FAST_RUN'
#mode = ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker())
mode
=
Mode
(
optimizer
=
'fast_run'
,
linker
=
theano
.
gof
.
OpWiseCLinker
(
nice_errors
=
True
))
mode
=
Mode
(
optimizer
=
'fast_run'
,
linker
=
'c'
)
print
mod
.
pretty
(
mode
=
mode
)
m
=
mod
.
make
(
mode
=
mode
)
...
...
@@ -443,6 +446,6 @@ try:
mode
.
print_summary
()
pass
except
:
raise
pass
theano/gof/cc.py
浏览文件 @
a489cb3e
...
...
@@ -686,14 +686,15 @@ class CLinker(link.Linker):
instantiate
.
customize
.
add_support_code
(
support_code
)
instantiate
.
customize
.
add_support_code
(
self
.
struct_code
)
instantiate
.
customize
.
add_support_code
(
static
)
for
extra_arg
in
(
"-w"
,
#-w means supress all warnings
):
#"-O3",
#"-ffast-math",
for
extra_arg
in
(
"-O2"
,
"-ffast-math"
,
#"-fprefetch-loop-arrays",
#"-ftree-vect-loop-version",
#"-ftree-loop-optimize",
#"-ftree-vectorize"):
"-w"
#-w means supress all warnings
):
instantiate
.
customize
.
add_extra_compile_arg
(
extra_arg
)
for
arg
in
self
.
compile_args
():
instantiate
.
customize
.
add_extra_compile_arg
(
arg
)
...
...
@@ -736,7 +737,6 @@ def _execute(cthunk, init_tasks, tasks, error_storage):
else
:
return
tasks
[
failure_code
-
n
]
def
execute
():
execute
.
cthunk
=
cthunk
failure
=
cutils
.
run_cthunk
(
cthunk
)
if
failure
:
task
,
taskname
,
id
=
find_task
(
failure
)
...
...
@@ -748,6 +748,7 @@ def _execute(cthunk, init_tasks, tasks, error_storage):
exc_value
=
exc_type
(
_exc_value
,
task
)
exc_value
.
__thunk_trace__
=
trace
# this can be used to retrieve the location the Op was declared
raise
exc_type
,
exc_value
,
exc_trace
execute
.
cthunk
=
cthunk
return
execute
...
...
@@ -770,9 +771,12 @@ class OpWiseCLinker(link.LocalLinker):
__cache__
=
{}
def
__init__
(
self
,
fallback_on_perform
=
True
):
def
__init__
(
self
,
fallback_on_perform
=
True
,
nice_errors
=
True
):
self
.
env
=
None
self
.
fallback_on_perform
=
fallback_on_perform
self
.
nice_errors
=
nice_errors
def
accept
(
self
,
env
,
no_recycling
=
[]):
if
self
.
env
is
not
None
and
self
.
env
is
not
env
:
...
...
@@ -842,7 +846,9 @@ class OpWiseCLinker(link.LocalLinker):
else
:
no_recycling
=
[
storage_map
[
r
]
for
r
in
no_recycling
if
r
not
in
env
.
inputs
]
f
=
link
.
streamline
(
env
,
thunks
,
order
,
no_recycling
=
no_recycling
,
profiler
=
profiler
)
f
=
link
.
streamline
(
env
,
thunks
,
order
,
no_recycling
=
no_recycling
,
nice_errors
=
self
.
nice_errors
)
return
f
,
[
link
.
Container
(
input
,
storage
)
for
input
,
storage
in
zip
(
env
.
inputs
,
input_storage
)],
\
[
link
.
Container
(
output
,
storage
,
True
)
for
output
,
storage
in
zip
(
env
.
outputs
,
output_storage
)],
\
...
...
@@ -850,7 +856,6 @@ class OpWiseCLinker(link.LocalLinker):
def
_default_checker
(
x
,
y
):
"""WRITEME
Default checker for DualLinker. This checks that the
...
...
theano/gof/link.py
浏览文件 @
a489cb3e
...
...
@@ -5,6 +5,7 @@ from type import Type
import
sys
,
traceback
from
copy
import
copy
from
cutils
import
run_cthunk
__excepthook
=
sys
.
excepthook
...
...
@@ -225,9 +226,27 @@ def clear_storage_thunk(stg):
thunk
.
inputs
=
[
stg
]
return
thunk
def
streamline
(
env
,
thunks
,
order
,
no_recycling
=
[],
profiler
=
None
):
"""WRITEME"""
if
profiler
is
None
:
def
streamline
(
env
,
thunks
,
order
,
no_recycling
=
[],
profiler
=
None
,
nice_errors
=
True
):
"""WRITEME
:param env:
:param thunks: the list of program instructions
:param order: the list of apply instances that gave rise to the thunks (same order as thunks)
:param no_recycling: storage elements that cannot be 'recycled' by repeatedly executing the
program. These storage elements are cleared before re-running.
:param profiler: deprecated
:param nice_errors: run in such a way that the double-traceback is printed. This costs a
bit of performance in the inner python loop.
"""
if
profiler
is
not
None
:
raise
NotImplementedError
()
if
nice_errors
:
def
f
():
for
x
in
no_recycling
:
x
[
0
]
=
None
...
...
@@ -237,14 +256,13 @@ def streamline(env, thunks, order, no_recycling = [], profiler = None):
except
:
raise_with_op
(
node
)
else
:
# don't worry about raise_with_op, just go a little faster.
#there is a mix of python and c thunks
def
f
():
for
x
in
no_recycling
:
x
[
0
]
=
None
def
g
():
for
thunk
,
node
in
zip
(
thunks
,
order
):
profiler
.
profile_node
(
thunk
,
node
)
profiler
.
profile_env
(
g
,
env
)
f
.
profiler
=
profiler
for
thunk
in
thunks
:
thunk
()
return
f
class
LocalLinker
(
Linker
):
...
...
theano/sandbox/wraplinker.py
浏览文件 @
a489cb3e
...
...
@@ -171,13 +171,15 @@ class ProfileMode(Mode):
%
(
max
(
0
,
len
(
atimes
)
-
15
),
sum
(
t
for
t
,
a
in
atimes
[
15
:]))
n_ops_to_print
=
20
print
'Op-wise summary: <fraction of local_time spent on this kind of Op> <Op name>'
otimes
=
[(
t
/
local_time
,
a
,
self
.
op_cimpl
[
a
])
for
a
,
t
in
op_time
.
items
()]
otimes
.
sort
()
otimes
.
reverse
()
for
t
,
a
,
ci
in
otimes
[:
15
]:
for
t
,
a
,
ci
in
otimes
[:
n_ops_to_print
]:
print
'
\t
%.3
f
\t
%
s
%
s'
%
(
t
,
'*'
if
ci
else
' '
,
a
)
print
' ... (remaining
%
i Ops account for
%.2
f of the runtime)'
\
%
(
max
(
0
,
len
(
otimes
)
-
15
),
sum
(
t
for
t
,
a
,
ci
in
otimes
[
15
:]))
%
(
max
(
0
,
len
(
otimes
)
-
n_ops_to_print
),
sum
(
t
for
t
,
a
,
ci
in
otimes
[
n_ops_to_print
:]))
print
'(*) Op is running a c implementation'
theano/tensor/elemwise.py
浏览文件 @
a489cb3e
...
...
@@ -103,16 +103,18 @@ class DimShuffle(Op):
for
i
,
b
in
enumerate
(
input_broadcastable
):
if
i
not
in
new_order
:
# we want to drop this dimension because it's not a value in new_order
if
b
==
1
:
if
b
==
1
:
# 1 aka True
self
.
drop
.
append
(
i
)
else
:
# we cannot drop non-broadcastable dimensions
raise
NotImplemented
Error
(
"You cannot drop a non-broadcastable dimension."
)
raise
Value
Error
(
"You cannot drop a non-broadcastable dimension."
)
else
:
i2j
[
i
]
=
j
j
+=
1
# transposition of non-broadcastable dimensions
# This is how the dimensions will be permuted, without accounting for the extra
# 'x' broadcastable dimensions to insert.
self
.
shuffle
=
[
i2j
[
x
]
for
x
in
new_order
if
x
!=
'x'
]
# list of dimensions of the output that are broadcastable and were not in the original input
...
...
@@ -144,7 +146,8 @@ class DimShuffle(Op):
and
self
.
input_broadcastable
==
other
.
input_broadcastable
def
__hash__
(
self
):
return
hash
(
self
.
inplace
)
^
hash
(
self
.
new_order
)
^
hash
(
self
.
input_broadcastable
)
return
hash
(
type
(
self
))
^
hash
(
self
.
inplace
)
\
^
hash
(
self
.
new_order
)
^
hash
(
self
.
input_broadcastable
)
def
__str__
(
self
):
if
self
.
inplace
:
...
...
@@ -175,6 +178,73 @@ class DimShuffle(Op):
storage
[
0
]
=
res
def
c_code
(
self
,
node
,
name
,
(
input
,),
(
res
,),
sub
):
def
statements
(
lst
):
return
';
\n
'
.
join
(
lst
)
+
';'
nd_in
=
len
(
self
.
input_broadcastable
)
nd_out
=
len
(
self
.
new_order
)
check_input_nd
=
[(
'if (
%(input)
s->nd != '
+
str
(
nd_in
)
+
')'
'{PyErr_SetString(PyExc_NotImplementedError, "input nd");
%(fail)
s;}'
)]
clear_output
=
[
'if (
%(res)
s) {Py_XDECREF(
%(res)
s);}'
]
shape_statements
=
[
'npy_intp dimensions[
%
i]'
%
nd_out
]
shape_statements
+=
[(
'dimensions['
+
str
(
i
)
+
'] =
%(input)
s->dimensions['
+
str
(
o
)
+
']'
)
if
o
!=
'x'
else
(
'dimensions['
+
str
(
i
)
+
'] = 1'
)
for
i
,
o
in
enumerate
(
self
.
new_order
)]
strides_statements
=
[
'npy_intp strides[
%
i]'
%
nd_out
]
strides_statements
+=
[(
'strides['
+
str
(
i
)
+
'] =
%(input)
s->strides['
+
str
(
o
)
+
']'
)
if
o
!=
'x'
else
(
'strides['
+
str
(
i
)
+
'] = 0'
)
for
i
,
o
in
enumerate
(
self
.
new_order
)]
if
self
.
inplace
:
print
"INPLACE"
get_base
=
[
'{ PyArrayObject * base =
%(input)
s'
,
'Py_INCREF((PyObject*)base)'
]
else
:
print
"NOT INPLACE"
get_base
=
[(
'{ PyArrayObject * base = (PyArrayObject*)PyArray_FromAny((PyObject*)
%(input)
s, NULL,'
'0, 0, NPY_ALIGNED|NPY_ENSURECOPY, NULL)'
)]
alloc_output
=
[(
'
%(res)
s = (PyArrayObject*)PyArray_New(&PyArray_Type, '
''
+
str
(
nd_out
)
+
', dimensions, '
'PyArray_TYPE(base), strides, '
'base->data, base->descr->elsize, '
'PyArray_FLAGS(base), NULL)'
),
'
%(res)
s->base = (PyObject*)base'
,
'}'
]
full_code
=
statements
(
check_input_nd
+
clear_output
+
shape_statements
+
strides_statements
+
get_base
+
alloc_output
)
if
0
:
print
'C_CODE'
print
''
print
self
print
"IN BROAD"
,
self
.
input_broadcastable
print
"NEW ORDER"
,
self
.
new_order
print
"SHUFFLE"
,
self
.
shuffle
print
"AUGMENT"
,
self
.
augment
print
'------------'
print
''
print
full_code
if
0
:
import
sys
sys
.
exit
()
return
full_code
%
dict
(
locals
(),
**
sub
)
def
grad
(
self
,
(
x
,
),
(
gz
,
)):
gz
=
as_tensor
(
gz
)
grad_order
=
[
'x'
]
*
len
(
x
.
type
.
broadcastable
)
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论