Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
P
pytensor
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
图表
比较
统计图
议题
0
议题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
testgroup
pytensor
Commits
f4c3ad95
提交
f4c3ad95
authored
1月 16, 2008
作者:
bergstrj@iro.umontreal.ca
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
rewrite
上级
40f0270a
隐藏空白字符变更
内嵌
并排
正在显示
1 个修改的文件
包含
196 行增加
和
36 行删除
+196
-36
grad.py
grad.py
+196
-36
没有找到文件。
grad.py
浏览文件 @
f4c3ad95
import
gof
import
gof
import
core
import
core
class
_GradD
(
dict
):
class
Grad
(
object
):
"""A dictionary-like class, into which derivative expressions may be added"""
"""A dictionary-like class, into which derivative expressions may be added.
This class maps keys to their ids to deal with the ndarray, which is not
hashable.
Attributes: None
Methods:
add()
bprop()
__call__()
__getitem__()
"""
def
__init__
(
self
,
dct
=
{}):
self
.
map
=
{}
self
.
outputs
=
[]
for
key
,
val
in
dct
.
items
():
self
.
add_output
(
key
,
val
)
def
__contains__
(
self
,
item
):
return
id
(
item
)
in
self
.
map
def
__getitem__
(
self
,
item
):
"""Map item to its id and retrieve it."""
return
self
.
map
[
id
(
item
)]
def
__setitem__
(
self
,
item
,
val
):
"""Map item to its id and store internally."""
self
.
map
[
id
(
item
)]
=
val
def
add_output
(
self
,
r
,
dr
):
self
.
add
(
r
,
dr
)
self
.
outputs
.
append
(
r
)
def
add
(
self
,
r
,
dr
):
def
add
(
self
,
r
,
dr
):
"""Add dv to the sum of gradients associated with v"""
"""Add dr to the sum of gradients associated with r.
This function should be fed as follows:
if dr is UNDEFINED:
r could be anything
else dr might be core.UNCOMPUTED:
r may be uncomputed or NumpyR
else dr will be isinstance(NumpyR):
r may be uncomputed or NumpyR
"""
if
r
is
core
.
UNDEFINED
:
if
dr
is
core
.
UNDEFINED
:
self
[
r
]
=
core
.
UNDEFINED
# nothing to do
elif
r
in
self
:
pass
self
[
r
]
=
self
[
r
]
+
dr
else
:
else
:
self
[
r
]
=
dr
if
r
.
data
is
core
.
UNCOMPUTED
or
dr
.
data
is
core
.
UNCOMPUTED
:
pass
else
:
# try some hacky checks to catch obvious mistakes
def
expand_grad
(
i
,
o
,
cost_derivs
):
if
not
hasattr
(
r
.
data
,
'shape'
):
grad_d
=
_GradD
(
cost_derivs
)
raise
ValueError
((
'Grad::add r lacks shape: type='
,
core
.
build_mode
()
type
(
r
.
data
)))
for
op
in
gof
.
graph
.
io_toposort
(
i
,
o
)
.
__reversed__
():
if
not
hasattr
(
dr
.
data
,
'shape'
):
op
.
update_gradient
(
grad_d
)
raise
ValueError
((
'Grad::add dr lacks shape: type='
,
# inputgs = op.grad(*(op.inputs + [grad_d[output] for output in op.outputs]))
type
(
dr
.
data
)))
# if not isinstance(inputgs, (list, tuple)):
if
r
.
data
.
shape
!=
dr
.
data
.
shape
:
# inputgs = [inputgs] * len(op.inputs)
raise
ValueError
((
'Grad::add r, dr shape mismatch'
,
# for input, inputg in zip(op.inputs, inputgs):
v
.
data
.
shape
,
dv
.
datashape
))
# grad_d.add(input, inputg)
core
.
pop_mode
()
# add dr to self[r]
return
grad_d
#print 'grad::add', id(r),
if
r
in
self
:
print
'adding to self'
def
grad
(
cost
,
wrt
,
cost_grad
=
1.0
):
self
[
r
]
=
self
[
r
]
+
dr
assert
core
.
current_mode
()
!=
'eval'
else
:
# cost, wrt = core.wrap(cost), core.wrap(wrt)
self
[
r
]
=
dr
cost_derivs
=
expand_grad
([
wrt
],
[
cost
],
{
cost
:
core
.
wrap
(
cost_grad
)})
# print wrt
def
bprop
(
self
):
# for k, v in cost_derivs.items():
"""Build a backpropagation graph.
# print k, v
ret
=
cost_derivs
.
get
(
wrt
,
None
)
The gradient associated with each value is stored in <self> which
if
ret
is
core
.
UNDEFINED
:
inherits from dictionary. The idea is that when we call
raise
Exception
(
"The gradient wrt
%
s is undefined."
%
wrt
)
op.update_gradient(self), that the op's update_gradient function calls
return
ret
back into <self>.add(), and says what gradient term goes with each of
its inputs. Most of the time, the gradients of the op's outputs are
necessary for the op to compute the gradient wrt its inputs, so
op.update_gradient will usually call <self>.__getitem__, (via the
[] notation).
It is essential that the gradient of an op's outputs be fully computed
before op.update_gradient is called, or else key errors may be raised
and incorrect gradients will be computed.
bprop sets the omega evaluation mode to be 'build', so no computations
or allocations are done by bprop.
"""
core
.
build_mode
()
try
:
outputs
=
self
.
outputs
inputs
=
gof
.
graph
.
inputs
(
outputs
)
for
op
in
gof
.
graph
.
io_toposort
(
inputs
,
outputs
)
.
__reversed__
():
op
.
update_gradient
(
self
)
finally
:
core
.
pop_mode
()
def
__call__
(
self
,
item
):
"""Return a derivative term.
If the current omega evaluation mode is 'build_eval' then the node is
computed if necessary.
"""
rval
=
self
[
item
]
if
core
.
current_mode
()
==
'build_eval'
:
rval
.
compute
()
return
rval
def
grad
(
cost
,
param
=
None
,
cost_grad
=
1.0
):
"""Return symbolic expression of gradient of <cost> wrt <param>.
If <param> is None, then return a Grad instance, from which the gradients of
multiple objects can be retrieved using the __getitem__ or __call__ methods
(as in function currying in languages such as scheme and OCaML).
If <param> is not None, then return the gradient expression for
d cost / d param.
"""
if
core
.
current_mode
()
==
'eval'
:
raise
NotImplementedError
(
'Gradient-related functions are not available in eval mode'
)
rval
=
Grad
({
cost
:
core
.
wrap
(
cost_grad
)})
rval
.
bprop
()
if
param
is
None
:
return
rval
else
:
return
rval
(
param
)
#
# UNIT TEST
#
import
unittest
import
numpy
import
compile
class
_testCase
(
unittest
.
TestCase
):
def
setUp
(
self
):
numpy
.
random
.
seed
(
1
)
core
.
build_eval_mode
()
def
matinv
(
self
,
dim
):
w
=
core
.
wrap
(
numpy
.
random
.
rand
(
dim
,
dim
))
wi
=
core
.
wrap
(
numpy
.
random
.
rand
(
dim
,
dim
))
ident
=
core
.
wrap
(
numpy
.
identity
(
dim
))
for
i
in
xrange
(
300
):
wwi
=
core
.
dot
(
w
,
wi
)
diff
=
wwi
-
ident
ssdiff
=
core
.
sum
((
diff
**
2
))
if
i
==
0
:
str0
=
str_ssdiff
=
str
(
ssdiff
)
#print ssdiff
g
=
grad
(
ssdiff
)
gw
=
g
(
w
)
w
.
data
+=
-
0.4
*
gw
.
data
return
str0
,
str
(
ssdiff
)
def
matinv_compiled
(
self
,
dim
):
w
=
core
.
wrap
(
numpy
.
random
.
rand
(
dim
,
dim
))
wi
=
core
.
wrap
(
numpy
.
random
.
rand
(
dim
,
dim
))
ident
=
core
.
wrap
(
numpy
.
identity
(
dim
))
wwi
=
core
.
dot
(
w
,
wi
)
diff
=
wwi
-
ident
ssdiff
=
core
.
sum
((
diff
**
2
))
str0
=
str_ssdiff
=
str
(
ssdiff
)
#print ssdiff
g
=
grad
(
ssdiff
)
gw
=
g
(
w
)
prog
=
compile
.
single
(
g
(
w
),
ssdiff
)
for
i
in
xrange
(
300
):
prog
()
w
.
data
+=
-
0.4
*
gw
.
data
return
str0
,
str
(
ssdiff
)
def
test0
(
self
):
self
.
assertEqual
((
'2.67327580893'
,
'0.000438649434819'
),
self
.
matinv
(
3
))
def
test1
(
self
):
self
.
assertEqual
((
'2.67327580893'
,
'0.000438649434819'
),
self
.
matinv_compiled
(
3
))
def
tearDown
(
self
):
core
.
pop_mode
()
if
__name__
==
'__main__'
:
suite
=
unittest
.
TestLoader
()
.
loadTestsFromTestCase
(
_testCase
)
unittest
.
TextTestRunner
(
verbosity
=
3
)
.
run
(
suite
)
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论