Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
9e79f3a0
提交
9e79f3a0
authored
12月 06, 2022
作者:
Adrian Seyboldt
提交者:
Adrian Seyboldt
1月 04, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Initial version of llvm elemwise impl
上级
38dc6c9f
全部展开
隐藏空白字符变更
内嵌
并排
正在显示
3 个修改的文件
包含
274 行增加
和
0 行删除
+274
-0
elemwise.py
pytensor/link/numba/dispatch/elemwise.py
+0
-0
elemwise_codegen.py
pytensor/link/numba/dispatch/elemwise_codegen.py
+231
-0
helpers.py
pytensor/link/numba/dispatch/helpers.py
+43
-0
没有找到文件。
pytensor/link/numba/dispatch/elemwise.py
浏览文件 @
9e79f3a0
差异被折叠。
点击展开。
pytensor/link/numba/dispatch/elemwise_codegen.py
0 → 100644
浏览文件 @
9e79f3a0
from
llvmlite
import
ir
from
numba
import
types
from
numba.np
import
arrayobj
from
numba.core
import
cgutils
import
numba
import
numpy
as
np
def
compute_itershape
(
ctx
,
builder
:
ir
.
IRBuilder
,
in_shapes
,
broadcast_pattern
,
):
one
=
ir
.
IntType
(
64
)(
1
)
ndim
=
len
(
in_shapes
[
0
])
#shape = [ir.IntType(64)(1) for _ in range(ndim)]
shape
=
[
None
]
*
ndim
for
i
in
range
(
ndim
):
# TODO Error checking...
# What if all shapes are 0?
for
bc
,
in_shape
in
zip
(
broadcast_pattern
,
in_shapes
):
if
bc
[
i
]:
# TODO
# raise error if length != 1
pass
else
:
# TODO
# if shape[i] is not None:
# raise Error if !=
shape
[
i
]
=
in_shape
[
i
]
for
i
in
range
(
ndim
):
if
shape
[
i
]
is
None
:
shape
[
i
]
=
one
return
shape
def
make_outputs
(
ctx
,
builder
:
ir
.
IRBuilder
,
iter_shape
,
out_bc
,
dtypes
,
inplace
,
inputs
,
input_types
):
arrays
=
[]
ar_types
:
list
[
types
.
Array
]
=
[]
one
=
ir
.
IntType
(
64
)(
1
)
inplace
=
dict
(
inplace
)
for
i
,
(
bc
,
dtype
)
in
enumerate
(
zip
(
out_bc
,
dtypes
)):
if
i
in
inplace
:
arrays
.
append
(
inputs
[
inplace
[
i
]])
ar_types
.
append
(
input_types
[
inplace
[
i
]])
# We need to incref once we return the inplace objects
continue
dtype
=
numba
.
from_dtype
(
np
.
dtype
(
dtype
))
arrtype
=
types
.
Array
(
dtype
,
len
(
iter_shape
),
"C"
)
ar_types
.
append
(
arrtype
)
# This is actually an interal numba function, I guess we could
# call `numba.nd.unsafe.ndarray` instead?
shape
=
[
length
if
not
bc_dim
else
one
for
length
,
bc_dim
in
zip
(
iter_shape
,
bc
)
]
array
=
arrayobj
.
_empty_nd_impl
(
ctx
,
builder
,
arrtype
,
shape
)
arrays
.
append
(
array
)
# If there is no inplace operation, we know that all output arrays
# don't alias. Informing llvm can make it easier to vectorize.
if
not
inplace
:
# The first argument is the output pointer
arg
=
builder
.
function
.
args
[
0
]
arg
.
add_attribute
(
"noalias"
)
return
arrays
,
ar_types
def
make_loop_call
(
typingctx
,
context
:
numba
.
core
.
base
.
BaseContext
,
builder
:
ir
.
IRBuilder
,
scalar_func
,
scalar_signature
,
iter_shape
,
inputs
,
outputs
,
input_bc
,
output_bc
,
input_types
,
output_types
,
):
safe
=
(
False
,
False
)
n_outputs
=
len
(
outputs
)
#context.printf(builder, "iter shape: " + ', '.join(["%i"] * len(iter_shape)) + "\n", *iter_shape)
# Lower the code of the scalar function so that we can use it in the inner loop
# Caching is set to false to avoid a numba bug TODO ref?
inner_func
=
context
.
compile_subroutine
(
builder
,
# I don't quite understand why we need to access `dispatcher` here.
# The object does seem to be a dispatcher already? But it is missing
# attributes...
scalar_func
.
dispatcher
,
scalar_signature
,
caching
=
False
,
)
inner
=
inner_func
.
fndesc
# Extract shape and stride information from the array.
# For later use in the loop body to do the indexing
def
extract_array
(
aryty
,
obj
):
shape
=
cgutils
.
unpack_tuple
(
builder
,
obj
.
shape
)
strides
=
cgutils
.
unpack_tuple
(
builder
,
obj
.
strides
)
data
=
obj
.
data
layout
=
aryty
.
layout
return
(
data
,
shape
,
strides
,
layout
)
# TODO I think this is better than the noalias attribute
# for the input, but self_ref isn't supported in a released
# llvmlite version yet
# mod = builder.module
# domain = mod.add_metadata([], self_ref=True)
# input_scope = mod.add_metadata([domain], self_ref=True)
# output_scope = mod.add_metadata([domain], self_ref=True)
# input_scope_set = mod.add_metadata([input_scope, output_scope])
# output_scope_set = mod.add_metadata([input_scope, output_scope])
inputs
=
[
extract_array
(
aryty
,
ary
)
for
aryty
,
ary
in
zip
(
input_types
,
inputs
,
strict
=
True
)
]
outputs
=
[
extract_array
(
aryty
,
ary
)
for
aryty
,
ary
in
zip
(
output_types
,
outputs
,
strict
=
True
)
]
zero
=
ir
.
Constant
(
ir
.
IntType
(
64
),
0
)
# Setup loops and initialize accumulators for outputs
# This part corresponds to opening the loops
loop_stack
=
[]
loops
=
[]
output_accumulator
=
[(
None
,
None
)]
*
n_outputs
for
dim
,
length
in
enumerate
(
iter_shape
):
# Find outputs that only have accumulations left
for
output
in
range
(
n_outputs
):
if
output_accumulator
[
output
][
0
]
is
not
None
:
continue
if
all
(
output_bc
[
output
][
dim
:]):
value
=
outputs
[
output
][
0
]
.
type
.
pointee
(
0
)
accu
=
cgutils
.
alloca_once_value
(
builder
,
value
)
output_accumulator
[
output
]
=
(
accu
,
dim
)
loop
=
cgutils
.
for_range
(
builder
,
length
)
loop_stack
.
append
(
loop
)
loops
.
append
(
loop
.
__enter__
())
# Code in the inner most loop...
idxs
=
[
loopval
.
index
for
loopval
in
loops
]
# Load values from input arrays
input_vals
=
[]
for
array_info
,
bc
in
zip
(
inputs
,
input_bc
,
strict
=
True
):
idxs_bc
=
[
zero
if
bc
else
idx
for
idx
,
bc
in
zip
(
idxs
,
bc
,
strict
=
True
)
]
ptr
=
cgutils
.
get_item_pointer2
(
context
,
builder
,
*
array_info
,
idxs_bc
,
*
safe
)
val
=
builder
.
load
(
ptr
)
# val.set_metadata("alias.scope", input_scope_set)
# val.set_metadata("noalias", output_scope_set)
input_vals
.
append
(
val
)
# Call scalar function
output_values
=
context
.
call_internal
(
builder
,
inner
,
scalar_signature
,
input_vals
,
)
if
isinstance
(
scalar_signature
.
return_type
,
types
.
Tuple
):
output_values
=
cgutils
.
unpack_tuple
(
builder
,
output_values
)
else
:
output_values
=
[
output_values
]
# Update output value or accumulators respectively
for
i
,
((
accu
,
_
),
value
)
in
enumerate
(
zip
(
output_accumulator
,
output_values
,
strict
=
True
)
):
if
accu
is
not
None
:
load
=
builder
.
load
(
accu
)
# load.set_metadata("alias.scope", output_scope_set)
# load.set_metadata("noalias", input_scope_set)
new_value
=
builder
.
fadd
(
load
,
value
)
builder
.
store
(
new_value
,
accu
)
# TODO belongs to noalias scope
# store.set_metadata("alias.scope", output_scope_set)
# store.set_metadata("noalias", input_scope_set)
else
:
idxs_bc
=
[
zero
if
bc
else
idx
for
idx
,
bc
in
zip
(
idxs
,
output_bc
[
i
],
strict
=
True
)
]
ptr
=
cgutils
.
get_item_pointer2
(
context
,
builder
,
*
outputs
[
i
],
idxs_bc
)
# store = builder.store(value, ptr)
arrayobj
.
store_item
(
context
,
builder
,
output_types
[
i
],
value
,
ptr
)
# store.set_metadata("alias.scope", output_scope_set)
# store.set_metadata("noalias", input_scope_set)
# Close the loops and write accumulator values to the output arrays
for
depth
,
loop
in
enumerate
(
loop_stack
[::
-
1
]):
for
output
,
(
accu
,
accu_depth
)
in
enumerate
(
output_accumulator
):
if
accu_depth
==
depth
:
idxs_bc
=
[
zero
if
bc
else
idx
for
idx
,
bc
in
zip
(
idxs
,
output_bc
[
output
],
strict
=
True
)
]
ptr
=
cgutils
.
get_item_pointer2
(
context
,
builder
,
*
outputs
[
output
],
idxs_bc
)
load
=
builder
.
load
(
accu
)
# load.set_metadata("alias.scope", output_scope_set)
# load.set_metadata("noalias", input_scope_set)
# store = builder.store(load, ptr)
arrayobj
.
store_item
(
context
,
builder
,
output_types
[
output
],
load
,
ptr
)
# store.set_metadata("alias.scope", output_scope_set)
# store.set_metadata("noalias", input_scope_set)
loop
.
__exit__
(
None
,
None
,
None
)
return
pytensor/link/numba/dispatch/helpers.py
0 → 100644
浏览文件 @
9e79f3a0
from
numba
import
njit
,
types
from
numba.core
import
cgutils
from
numba.extending
import
intrinsic
def
tuple_mapper
(
item_map_func
):
@intrinsic
def
map_tuple
(
typingctx
,
*
input_tuples
):
signatures
=
[
typingctx
.
resolve_function_type
(
item_map_func
,
args
,
{})
for
args
in
zip
(
*
[
in_type
.
types
for
in_type
in
input_tuples
],
strict
=
True
)
]
output_type
=
types
.
Tuple
([
sig
.
return_type
for
sig
in
signatures
])
signature
=
output_type
(
types
.
StarArgTuple
(
input_tuples
))
def
codegen
(
context
,
builder
,
signature
,
args
):
(
input_tuples
,)
=
args
input_values
=
[]
for
val
in
cgutils
.
unpack_tuple
(
builder
,
input_tuples
):
input_values
.
append
(
cgutils
.
unpack_tuple
(
builder
,
val
))
mapped_values
=
[]
for
values
,
sig
in
zip
(
zip
(
*
input_values
),
signatures
,
strict
=
True
):
func
=
context
.
compile_subroutine
(
builder
,
item_map_func
,
sig
)
output
=
context
.
call_internal
(
builder
,
func
.
fndesc
,
sig
,
values
)
mapped_values
.
append
(
output
)
return
context
.
make_tuple
(
builder
,
output_type
,
mapped_values
)
return
signature
,
codegen
return
map_tuple
@njit
def
check_broadcasting
(
array
,
bcs
,
shape
):
assert
array
.
ndim
==
len
(
shape
)
for
bc
,
array_length
,
length
in
zip
(
bcs
,
array
.
shape
,
shape
):
if
bc
:
assert
array_length
==
1
else
:
assert
array_length
==
length
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论