Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
9f80bdcd
Unverified
提交
9f80bdcd
authored
6月 16, 2025
作者:
Ricardo Vieira
提交者:
GitHub
6月 16, 2025
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix bug in gradient of Blockwise'd Scan (#1482)
* Avoid pytest warning for variable name * Respect core type shape in gradient of Blockwise * Refactor Blockwise L_op
上级
b218ffe3
显示空白字符变更
内嵌
并排
正在显示
2 个修改的文件
包含
93 行增加
和
60 行删除
+93
-60
blockwise.py
pytensor/tensor/blockwise.py
+36
-51
test_blockwise.py
tests/tensor/test_blockwise.py
+57
-9
没有找到文件。
pytensor/tensor/blockwise.py
浏览文件 @
9f80bdcd
...
...
@@ -344,81 +344,66 @@ class Blockwise(COp):
return
[[
True
for
_
in
node
.
outputs
]
for
_
in
node
.
inputs
]
def
_bgrad
(
self
,
inputs
,
outputs
,
ograds
):
# Grad, with respect to broadcasted versions of inputs
def
as_core
(
t
,
core_t
):
# Inputs could be NullType or DisconnectedType
if
isinstance
(
t
.
type
,
NullType
|
DisconnectedType
):
return
t
return
core_t
.
type
()
def
L_op
(
self
,
inputs
,
outputs
,
output_gradients
):
batch_ndim
=
self
.
batch_ndim
(
outputs
[
0
]
.
owner
)
# Obtain core_op gradients
with
config
.
change_flags
(
compute_test_value
=
"off"
):
safe_inputs
=
[
tensor
(
dtype
=
inp
.
type
.
dtype
,
shape
=
(
None
,)
*
len
(
sig
))
for
inp
,
sig
in
zip
(
inputs
,
self
.
inputs_sig
,
strict
=
True
)
]
core_node
=
self
.
_create_dummy_core_node
(
safe_inputs
)
core_inputs
=
[
as_core
(
inp
,
core_inp
)
for
inp
,
core_inp
in
zip
(
inputs
,
core_node
.
inputs
,
strict
=
True
)
tensor
(
dtype
=
inp
.
type
.
dtype
,
shape
=
inp
.
type
.
shape
[
batch_ndim
:],
)
for
inp
in
inputs
]
core_ograds
=
[
as_core
(
ograd
,
core_ograd
)
for
ograd
,
core_ograd
in
zip
(
ograds
,
core_node
.
outputs
,
strict
=
True
)
core_outputs
=
self
.
_create_dummy_core_node
(
core_inputs
)
.
outputs
# Define core output_gradients, but keep original disconnected/null output_gradients (if any)
core_output_gradients
=
[
output_grad
if
isinstance
(
output_grad
.
type
,
NullType
|
DisconnectedType
)
else
core_output
.
type
()
for
output_grad
,
core_output
in
zip
(
output_gradients
,
core_outputs
,
strict
=
True
)
]
# FIXME: These core_outputs do not depend on core_inputs, not pretty
# It's not neccessarily a problem because if they are referenced by the gradient,
# they get replaced later in vectorize. But if the Op was to make any decision
# by introspecting the dependencies of output on inputs it would fail badly!
core_outputs
=
core_node
.
outputs
core_igrads
=
self
.
core_op
.
L_op
(
core_inputs
,
core_outputs
,
core_ograds
)
core_input_gradients
=
self
.
core_op
.
L_op
(
core_inputs
,
core_outputs
,
core_output_gradients
)
igrads
=
vectorize_graph
(
[
core_igrad
for
core_igrad
in
core_igrads
if
core_igrad
is
not
None
],
# Vectorize core gradients to original inputs
input_gradients
=
list
(
vectorize_graph
(
core_input_gradients
,
replace
=
dict
(
zip
(
core_inputs
+
core_outputs
+
core_ograd
s
,
inputs
+
outputs
+
ograd
s
,
core_inputs
+
core_outputs
+
core_output_gradient
s
,
inputs
+
outputs
+
output_gradient
s
,
strict
=
True
,
)
),
)
)
igrads_iter
=
iter
(
igrads
)
return
[
None
if
core_igrad
is
None
else
next
(
igrads_iter
)
for
core_igrad
in
core_igrads
]
def
L_op
(
self
,
inputs
,
outs
,
ograds
):
from
pytensor.tensor.math
import
sum
as
pt_sum
# Compute grad with respect to broadcasted input
rval
=
self
.
_bgrad
(
inputs
,
outs
,
ograds
)
# Sum out the broadcasted dimensions
batch_ndims
=
self
.
batch_ndim
(
outs
[
0
]
.
owner
)
batch_shape
=
outs
[
0
]
.
type
.
shape
[:
batch_ndims
]
# Sum out the broadcasted batch dimensions
batch_shape
=
outputs
[
0
]
.
type
.
shape
[:
batch_ndim
]
for
i
,
(
inp
,
sig
)
in
enumerate
(
zip
(
inputs
,
self
.
inputs_sig
,
strict
=
True
)):
if
isinstance
(
rval
[
i
]
.
type
,
NullType
|
DisconnectedType
):
if
isinstance
(
input_gradients
[
i
]
.
type
,
NullType
|
DisconnectedType
):
continue
assert
inp
.
type
.
ndim
==
batch_ndim
s
+
len
(
sig
)
assert
inp
.
type
.
ndim
==
batch_ndim
+
len
(
sig
)
to_sum
=
[
if
to_sum
:
=
[
j
for
j
,
(
inp_s
,
out_s
)
in
enumerate
(
zip
(
inp
.
type
.
shape
,
batch_shape
,
strict
=
False
)
)
if
inp_s
==
1
and
out_s
!=
1
]
if
to_sum
:
rval
[
i
]
=
pt_sum
(
rval
[
i
],
axis
=
to_sum
,
keepdims
=
True
)
]:
input_gradients
[
i
]
=
input_gradients
[
i
]
.
sum
(
axis
=
to_sum
,
keepdims
=
True
)
return
rval
return
input_gradients
def
_create_node_gufunc
(
self
,
node
:
Apply
,
impl
)
->
Callable
:
"""Define (or retrieve) the node gufunc used in `perform`.
...
...
tests/tensor/test_blockwise.py
浏览文件 @
9f80bdcd
...
...
@@ -6,11 +6,11 @@ import pytest
import
scipy.linalg
import
pytensor
from
pytensor
import
In
,
config
,
function
from
pytensor
import
In
,
config
,
function
,
scan
from
pytensor.compile
import
get_default_mode
,
get_mode
from
pytensor.gradient
import
grad
from
pytensor.graph
import
Apply
,
Op
from
pytensor.graph.replace
import
vectorize_node
from
pytensor.graph.replace
import
vectorize_
graph
,
vectorize_
node
from
pytensor.raise_op
import
assert_op
from
pytensor.tensor
import
diagonal
,
dmatrix
,
log
,
ones_like
,
scalar
,
tensor
,
vector
from
pytensor.tensor.blockwise
import
Blockwise
,
vectorize_node_fallback
...
...
@@ -162,13 +162,13 @@ class MyTestOp(Op):
raise
NotImplementedError
(
"Test Op should not be present in final graph"
)
test_op
=
MyTestOp
()
my_
test_op
=
MyTestOp
()
def
test_vectorize_node_default_signature
():
vec
=
tensor
(
shape
=
(
None
,))
mat
=
tensor
(
shape
=
(
5
,
None
))
node
=
test_op
.
make_node
(
vec
,
mat
)
node
=
my_
test_op
.
make_node
(
vec
,
mat
)
vect_node
=
vectorize_node
(
node
,
mat
,
mat
)
assert
isinstance
(
vect_node
.
op
,
Blockwise
)
and
isinstance
(
...
...
@@ -179,9 +179,9 @@ def test_vectorize_node_default_signature():
with
pytest
.
raises
(
ValueError
,
match
=
"Signature not provided nor found in core_op MyTestOp"
):
Blockwise
(
test_op
)
Blockwise
(
my_
test_op
)
vect_node
=
Blockwise
(
test_op
,
signature
=
"(m),(n)->(m),(n)"
)
.
make_node
(
vec
,
mat
)
vect_node
=
Blockwise
(
my_
test_op
,
signature
=
"(m),(n)->(m),(n)"
)
.
make_node
(
vec
,
mat
)
assert
vect_node
.
outputs
[
0
]
.
type
.
shape
==
(
5
,
None
,
...
...
@@ -198,7 +198,7 @@ def test_blockwise_shape():
inp_test
=
np
.
zeros
((
5
,
4
,
3
),
dtype
=
config
.
floatX
)
# Shape can be inferred from inputs
op
=
Blockwise
(
test_op
,
signature
=
"(m, n) -> (n, m)"
)
op
=
Blockwise
(
my_
test_op
,
signature
=
"(m, n) -> (n, m)"
)
out
=
op
(
inp
)
assert
out
.
type
.
shape
==
(
5
,
None
,
None
)
...
...
@@ -210,7 +210,7 @@ def test_blockwise_shape():
assert
tuple
(
shape_fn
(
inp_test
))
==
(
5
,
3
,
4
)
# Shape can only be partially inferred from inputs
op
=
Blockwise
(
test_op
,
signature
=
"(m, n) -> (m, k)"
)
op
=
Blockwise
(
my_
test_op
,
signature
=
"(m, n) -> (m, k)"
)
out
=
op
(
inp
)
assert
out
.
type
.
shape
==
(
5
,
None
,
None
)
...
...
@@ -233,7 +233,7 @@ def test_blockwise_shape():
inp1_test
=
np
.
zeros
((
7
,
1
,
4
,
3
),
dtype
=
config
.
floatX
)
inp2_test
=
np
.
zeros
((
1
,
5
,
4
,
3
),
dtype
=
config
.
floatX
)
op
=
Blockwise
(
test_op
,
signature
=
"(m, n), (m, n) -> (n, m), (m, k)"
)
op
=
Blockwise
(
my_
test_op
,
signature
=
"(m, n), (m, n) -> (n, m), (m, k)"
)
outs
=
op
(
inp1
,
inp2
)
assert
outs
[
0
]
.
type
.
shape
==
(
7
,
5
,
None
,
None
)
assert
outs
[
1
]
.
type
.
shape
==
(
7
,
5
,
None
,
None
)
...
...
@@ -650,3 +650,51 @@ def test_gradient_mixed_discrete_output_core_op():
np
.
ones
(
12
,
dtype
=
config
.
floatX
),
strict
=
True
,
)
def
test_blockwise_grad_core_type
():
class
StrictCoreTypeOp
(
Op
):
def
make_node
(
self
,
x
):
assert
x
.
type
.
shape
[
-
1
]
==
2
return
Apply
(
self
,
[
x
],
[
x
.
type
()])
def
perform
(
self
,
node
,
inputs
,
output_storage
):
output_storage
[
0
][
0
]
=
inputs
[
0
]
+
1
def
L_op
(
self
,
inputs
,
outputs
,
output_grads
):
[
x
]
=
inputs
assert
x
.
type
.
shape
==
(
2
,)
return
[
x
.
zeros_like
()]
strict_core_type_op
=
StrictCoreTypeOp
()
block_strict_core_type_op
=
Blockwise
(
strict_core_type_op
,
signature
=
"(a)->(a)"
)
x
=
tensor
(
"x"
,
shape
=
(
5
,
2
),
dtype
=
"float64"
)
y
=
block_strict_core_type_op
(
x
)
assert
y
.
type
.
shape
==
(
5
,
2
)
grad_y
=
grad
(
y
.
sum
(),
x
)
assert
grad_y
.
type
.
shape
==
(
5
,
2
)
np
.
testing
.
assert_allclose
(
grad_y
.
eval
({
x
:
np
.
ones
((
5
,
2
))}),
np
.
zeros
((
5
,
2
)),
)
def
test_scan_gradient_core_type
():
n_steps
=
3
seq
=
tensor
(
"seq"
,
shape
=
(
n_steps
,
1
),
dtype
=
"float64"
)
out
,
_
=
scan
(
lambda
s
:
s
,
sequences
=
[
seq
],
n_steps
=
n_steps
,
)
vec_seq
=
tensor
(
"vec_seq"
,
shape
=
(
None
,
n_steps
,
1
),
dtype
=
"float64"
)
vec_out
=
vectorize_graph
(
out
,
replace
=
{
seq
:
vec_seq
})
grad_sit_sot0
=
grad
(
vec_out
.
sum
(),
vec_seq
)
np
.
testing
.
assert_allclose
(
grad_sit_sot0
.
eval
({
vec_seq
:
np
.
ones
((
4
,
n_steps
,
1
))}),
np
.
ones
((
4
,
n_steps
,
1
)),
)
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论