Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
375b82ef
提交
375b82ef
authored
1月 07, 2014
作者:
AlOa
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add openmp to elemwise non contiguous case
上级
0a92ffef
隐藏空白字符变更
内嵌
并排
正在显示
2 个修改的文件
包含
68 行增加
和
32 行删除
+68
-32
elemwise.py
theano/tensor/elemwise.py
+3
-12
elemwise_cgen.py
theano/tensor/elemwise_cgen.py
+65
-20
没有找到文件。
theano/tensor/elemwise.py
浏览文件 @
375b82ef
...
@@ -1029,14 +1029,6 @@ class Elemwise(OpenMPOp):
...
@@ -1029,14 +1029,6 @@ class Elemwise(OpenMPOp):
# which is allocated, OR, if there are any aliased outputs,
# which is allocated, OR, if there are any aliased outputs,
# the index of the last of these aliased outputs.
# the index of the last of these aliased outputs.
# We declare the scalar variables used in the inner loop to do
# the element-wise computation. Aliased scalar variables need
# not be declared, as they are #defined in defines
task_decl
=
""
.
join
([
"
%
s&
%
s_i = *
%
s_iter;
\n
"
%
(
dtype
,
name
,
name
)
for
name
,
dtype
in
izip
(
inames
+
list
(
real_onames
),
idtypes
+
list
(
real_odtypes
))])
# We generate the C code of the inner loop using the scalar op
# We generate the C code of the inner loop using the scalar op
task_code
=
self
.
scalar_op
.
c_code
(
task_code
=
self
.
scalar_op
.
c_code
(
Apply
(
self
.
scalar_op
,
Apply
(
self
.
scalar_op
,
...
@@ -1051,7 +1043,6 @@ class Elemwise(OpenMPOp):
...
@@ -1051,7 +1043,6 @@ class Elemwise(OpenMPOp):
code
=
"""
code
=
"""
{
{
%(defines)
s
%(defines)
s
%(task_decl)
s
%(task_code)
s
%(task_code)
s
%(undefs)
s
%(undefs)
s
}
}
...
@@ -1069,14 +1060,14 @@ class Elemwise(OpenMPOp):
...
@@ -1069,14 +1060,14 @@ class Elemwise(OpenMPOp):
loop_orders
=
orders
+
[
range
(
nnested
)]
*
len
(
real_onames
),
loop_orders
=
orders
+
[
range
(
nnested
)]
*
len
(
real_onames
),
dtypes
=
(
idtypes
+
list
(
real_odtypes
)),
dtypes
=
(
idtypes
+
list
(
real_odtypes
)),
loop_tasks
=
all_code
,
loop_tasks
=
all_code
,
sub
=
sub
)
sub
=
sub
,
reduce
=
False
,
openmp
=
self
.
openmp
)
else
:
else
:
loop
=
cgen
.
make_reordered_loop
(
loop
=
cgen
.
make_reordered_loop
(
init_loop_orders
=
orders
+
[
range
(
nnested
)]
*
len
(
real_onames
),
init_loop_orders
=
orders
+
[
range
(
nnested
)]
*
len
(
real_onames
),
olv_index
=
olv_index
,
olv_index
=
olv_index
,
dtypes
=
(
idtypes
+
list
(
real_odtypes
)),
dtypes
=
(
idtypes
+
list
(
real_odtypes
)),
inner_task
=
code
,
inner_task
=
code
,
sub
=
sub
)
sub
=
sub
,
openmp
=
self
.
openmp
)
# If all inputs and outputs are contiguous
# If all inputs and outputs are contiguous
# and the scalar op define optimized code for that case
# and the scalar op define optimized code for that case
...
@@ -1562,7 +1553,7 @@ for(int i=0;i<PyArray_NDIM(%(iname)s);i++){
...
@@ -1562,7 +1553,7 @@ for(int i=0;i<PyArray_NDIM(%(iname)s);i++){
all_code
=
[
task0_decl
+
code1
]
all_code
=
[
task0_decl
+
code1
]
loop
=
cgen
.
make_loop
(
loop
=
cgen
.
make_loop
(
[
order
,
range
(
nnested
)
+
[
'x'
]
*
len
(
axis
)],
[
order
,
range
(
nnested
)
+
[
'x'
]
*
len
(
axis
)],
[
idtype
,
adtype
],
all_code
,
sub
)
[
idtype
,
adtype
],
all_code
,
sub
,
reduce
=
True
)
end
=
""
end
=
""
if
adtype
!=
odtype
:
if
adtype
!=
odtype
:
...
...
theano/tensor/elemwise_cgen.py
浏览文件 @
375b82ef
import
theano
def
make_declare
(
loop_orders
,
dtypes
,
sub
):
def
make_declare
(
loop_orders
,
dtypes
,
sub
):
...
@@ -171,7 +172,7 @@ def make_alloc(loop_orders, dtype, sub, fortran='0'):
...
@@ -171,7 +172,7 @@ def make_alloc(loop_orders, dtype, sub, fortran='0'):
"""
%
dict
(
locals
(),
**
sub
)
"""
%
dict
(
locals
(),
**
sub
)
def
make_loop
(
loop_orders
,
dtypes
,
loop_tasks
,
sub
):
def
make_loop
(
loop_orders
,
dtypes
,
loop_tasks
,
sub
,
reduce
=
False
,
openmp
=
None
):
"""
"""
Make a nested loop over several arrays and associate specific code
Make a nested loop over several arrays and associate specific code
to each level of nesting.
to each level of nesting.
...
@@ -195,9 +196,37 @@ def make_loop(loop_orders, dtypes, loop_tasks, sub):
...
@@ -195,9 +196,37 @@ def make_loop(loop_orders, dtypes, loop_tasks, sub):
@type sub: a dictionary.
@type sub: a dictionary.
@param sub: Maps 'lv#' to a suitable variable name.
@param sub: Maps 'lv#' to a suitable variable name.
The 'lvi' variable corresponds to the ith element of loop_orders.
The 'lvi' variable corresponds to the ith element of loop_orders.
@type reduce: boolean
@param reduce: true if this function is called from CAReduce
false if it is called from Elemwise,because in elemnwise to use
openmp the code must be rearranged
"""
"""
def
loop_over_elemwise
(
preloop
,
code
,
indices
,
i
):
iterv
=
'ITER_
%
i'
%
i
update
=
""
suitable_n
=
"1"
for
j
,
index
in
enumerate
(
indices
):
var
=
sub
[
'lv
%
i'
%
j
]
dtype
=
dtypes
[
j
]
update
+=
"
%(dtype)
s &
%(var)
s_i = * (
%(var)
s_iter +
%(iterv)
s *
%(var)
s_jump
%(index)
s_
%(i)
s );
\n
"
%
locals
()
if
index
!=
'x'
:
suitable_n
=
"
%(var)
s_n
%(index)
s"
%
locals
()
if
openmp
:
openmp_minsize
=
theano
.
config
.
openmp_minsize
forloop
=
"""#pragma omp parallel for if(
%(suitable_n)
s >=
%(openmp_minsize)
s)
\n
"""
%
locals
()
else
:
forloop
=
""
forloop
+=
"""for (int
%(iterv)
s = 0;
%(iterv)
s<
%(suitable_n)
s;
%(iterv)
s++)"""
%
locals
()
return
"""
%(preloop)
s
%(forloop)
s {
%(update)
s
%(code)
s
}
"""
%
locals
()
def
loop_over
(
preloop
,
code
,
indices
,
i
):
def
loop_over
_reduce
(
preloop
,
code
,
indices
,
i
):
iterv
=
'ITER_
%
i'
%
i
iterv
=
'ITER_
%
i'
%
i
update
=
""
update
=
""
suitable_n
=
"1"
suitable_n
=
"1"
...
@@ -229,6 +258,11 @@ def make_loop(loop_orders, dtypes, loop_tasks, sub):
...
@@ -229,6 +258,11 @@ def make_loop(loop_orders, dtypes, loop_tasks, sub):
s
=
preloops
.
get
(
0
,
""
)
s
=
preloops
.
get
(
0
,
""
)
else
:
else
:
s
=
""
s
=
""
if
reduce
:
loop_over
=
loop_over_reduce
else
:
loop_over
=
loop_over_elemwise
for
i
,
(
pre_task
,
task
),
indices
in
reversed
(
zip
(
xrange
(
len
(
loop_tasks
)
-
1
),
loop_tasks
,
zip
(
*
loop_orders
))):
for
i
,
(
pre_task
,
task
),
indices
in
reversed
(
zip
(
xrange
(
len
(
loop_tasks
)
-
1
),
loop_tasks
,
zip
(
*
loop_orders
))):
s
=
loop_over
(
preloops
.
get
(
i
,
""
)
+
pre_task
,
s
+
task
,
indices
,
i
)
s
=
loop_over
(
preloops
.
get
(
i
,
""
)
+
pre_task
,
s
+
task
,
indices
,
i
)
...
@@ -236,7 +270,7 @@ def make_loop(loop_orders, dtypes, loop_tasks, sub):
...
@@ -236,7 +270,7 @@ def make_loop(loop_orders, dtypes, loop_tasks, sub):
return
"{
%
s}"
%
s
return
"{
%
s}"
%
s
def
make_reordered_loop
(
init_loop_orders
,
olv_index
,
dtypes
,
inner_task
,
sub
):
def
make_reordered_loop
(
init_loop_orders
,
olv_index
,
dtypes
,
inner_task
,
sub
,
openmp
=
None
):
'''A bit like make_loop, but when only the inner-most loop executes code.
'''A bit like make_loop, but when only the inner-most loop executes code.
All the loops will be reordered so that the loops over the output tensor
All the loops will be reordered so that the loops over the output tensor
...
@@ -325,7 +359,7 @@ def make_reordered_loop(init_loop_orders, olv_index, dtypes, inner_task, sub):
...
@@ -325,7 +359,7 @@ def make_reordered_loop(init_loop_orders, olv_index, dtypes, inner_task, sub):
++
%(ovar)
s_loops_it;
++
%(ovar)
s_loops_it;
"""
%
locals
()
"""
%
locals
()
## Get sorted strides
and jumps
## Get sorted strides
# Get strides in the initial order
# Get strides in the initial order
def
get_loop_strides
(
loop_order
,
i
):
def
get_loop_strides
(
loop_order
,
i
):
"""
"""
...
@@ -344,7 +378,7 @@ def make_reordered_loop(init_loop_orders, olv_index, dtypes, inner_task, sub):
...
@@ -344,7 +378,7 @@ def make_reordered_loop(init_loop_orders, olv_index, dtypes, inner_task, sub):
return
r
return
r
# We declare the initial strides as a 2D array, nvars x nnested
# We declare the initial strides as a 2D array, nvars x nnested
declare_strides
_jumps
=
"""
declare_strides
=
"""
int init_strides[
%(nvars)
i][
%(nnested)
i] = {
int init_strides[
%(nvars)
i][
%(nnested)
i] = {
%(strides)
s
%(strides)
s
};"""
%
dict
(
};"""
%
dict
(
...
@@ -355,46 +389,57 @@ def make_reordered_loop(init_loop_orders, olv_index, dtypes, inner_task, sub):
...
@@ -355,46 +389,57 @@ def make_reordered_loop(init_loop_orders, olv_index, dtypes, inner_task, sub):
for
i
,
lo
in
enumerate
(
init_loop_orders
)
for
i
,
lo
in
enumerate
(
init_loop_orders
)
if
len
(
lo
)
>
0
))
if
len
(
lo
)
>
0
))
# Declare (sorted) stride and
jumps
for each variable
# Declare (sorted) stride and for each variable
# we iterate from innermost loop to outermost loop
# we iterate from innermost loop to outermost loop
declare_strides
_jumps
+=
"""
declare_strides
+=
"""
std::vector< std::pair<int, int> >::reverse_iterator
%(ovar)
s_loops_rit;
std::vector< std::pair<int, int> >::reverse_iterator
%(ovar)
s_loops_rit;
"""
%
locals
()
"""
%
locals
()
for
i
in
xrange
(
nvars
):
for
i
in
xrange
(
nvars
):
var
=
sub
[
"lv
%
i"
%
i
]
var
=
sub
[
"lv
%
i"
%
i
]
declare_strides
_jumps
+=
"""
declare_strides
+=
"""
%(ovar)
s_loops_rit =
%(ovar)
s_loops.rbegin();"""
%
locals
()
%(ovar)
s_loops_rit =
%(ovar)
s_loops.rbegin();"""
%
locals
()
adjust
=
"0"
for
j
in
reversed
(
range
(
nnested
)):
for
j
in
reversed
(
range
(
nnested
)):
jump
=
"(
%
s) - (
%
s)"
%
(
"
%(var)
s_stride_l
%(j)
i"
%
locals
(),
adjust
)
declare_strides
+=
"""
declare_strides_jumps
+=
"""
int
%(var)
s_stride_l
%(j)
i = init_strides[
%(i)
i][
%(ovar)
s_loops_rit->second];
int
%(var)
s_stride_l
%(j)
i = init_strides[
%(i)
i][
%(ovar)
s_loops_rit->second];
int
%(var)
s_jump_l
%(j)
i =
%(jump)
s;
++
%(ovar)
s_loops_rit;
++
%(ovar)
s_loops_rit;
"""
%
locals
()
"""
%
locals
()
adjust
=
"TOTAL_
%(j)
i *
%(var)
s_stride_l
%(j)
i"
%
locals
()
declare_iter
=
""
declare_iter
=
""
for
i
,
dtype
in
enumerate
(
dtypes
):
for
i
,
dtype
in
enumerate
(
dtypes
):
var
=
sub
[
"lv
%
i"
%
i
]
var
=
sub
[
"lv
%
i"
%
i
]
declare_iter
+=
"
%(var)
s_iter = (
%(dtype)
s*)(PyArray_DATA(
%(var)
s));
\n
"
%
locals
()
declare_iter
+=
"
%(var)
s_iter = (
%(dtype)
s*)(PyArray_DATA(
%(var)
s));
\n
"
%
locals
()
pointer_update
=
''
for
j
in
xrange
(
nvars
):
var
=
sub
[
"lv
%
i"
%
j
]
pointer_update
+=
"
%(dtype)
s &
%(var)
s_i = * (
%(var)
s_iter"
%
locals
()
tot_jump
=
''
for
i
in
reversed
(
range
(
nnested
)):
iterv
=
'ITER_
%
i'
%
i
pointer_update
+=
"+
%(var)
s_stride_l
%(i)
i*
%(iterv)
s"
%
locals
()
pointer_update
+=
");
\n
"
loop
=
inner_task
loop
=
inner_task
for
i
in
reversed
(
range
(
nnested
)):
for
i
in
reversed
(
range
(
nnested
)):
iterv
=
'ITER_
%
i'
%
i
iterv
=
'ITER_
%
i'
%
i
total
=
'TOTAL_
%
i'
%
i
total
=
'TOTAL_
%
i'
%
i
update
=
''
update
=
''
for
j
in
xrange
(
nvars
):
forloop
=
''
var
=
sub
[
"lv
%
i"
%
j
]
# The pointers are defined only in the most inner loop
update
+=
"
%(var)
s_iter +=
%(var)
s_jump_l
%(i)
i;
\n
"
%
locals
()
if
i
==
nnested
-
1
:
update
=
pointer_update
if
i
==
0
:
if
openmp
:
openmp_minsize
=
theano
.
config
.
openmp_minsize
forloop
+=
"""#pragma omp parallel for if(
%(total)
s >=
%(openmp_minsize)
s)
\n
"""
%
locals
()
forloop
+=
"for(int
%(iterv)
s = 0;
%(iterv)
s<
%(total)
s;
%(iterv)
s++)"
%
locals
()
loop
=
"""
loop
=
"""
for (int
%(iterv)
s =
%(total)
s;
%(iterv)
s;
%(iterv)
s--)
%(forloop)
s
{ // begin loop
%(i)
i
{ // begin loop
%(i)
i
%(loop)
s
%(update)
s
%(update)
s
%(loop)
s
} // end loop
%(i)
i
} // end loop
%(i)
i
"""
%
locals
()
"""
%
locals
()
...
@@ -402,7 +447,7 @@ def make_reordered_loop(init_loop_orders, olv_index, dtypes, inner_task, sub):
...
@@ -402,7 +447,7 @@ def make_reordered_loop(init_loop_orders, olv_index, dtypes, inner_task, sub):
'{'
,
'{'
,
order_loops
,
order_loops
,
declare_totals
,
declare_totals
,
declare_strides
_jumps
,
declare_strides
,
declare_iter
,
declare_iter
,
loop
,
loop
,
'}
\n
'
,
'}
\n
'
,
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论