提交 f53a585d authored 作者: Ian Goodfellow's avatar Ian Goodfellow

upgraded some op's grad methods to support DisconnectedType

上级 345e4745
...@@ -14,6 +14,7 @@ from theano.compile import optdb ...@@ -14,6 +14,7 @@ from theano.compile import optdb
from theano.gof import Apply from theano.gof import Apply
from theano.tensor.nnet.sigm import sigmoid, softplus from theano.tensor.nnet.sigm import sigmoid, softplus
from theano.gradient import DisconnectedType
############ ############
...@@ -76,6 +77,10 @@ class SoftmaxWithBias(gof.Op): ...@@ -76,6 +77,10 @@ class SoftmaxWithBias(gof.Op):
def grad(self, inp, grads): def grad(self, inp, grads):
x, b = inp x, b = inp
g_sm, = grads g_sm, = grads
if isinstance(g_sm.type, DisconnectedType):
return [ DisconnectedType()(), DisconnectedType()() ]
sm = softmax_with_bias(x, b) sm = softmax_with_bias(x, b)
dx = softmax_grad(g_sm, sm) dx = softmax_grad(g_sm, sm)
db = tensor.sum(dx, axis=0) db = tensor.sum(dx, axis=0)
...@@ -710,28 +715,40 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op): ...@@ -710,28 +715,40 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
def grad(self, inp, grads): def grad(self, inp, grads):
x, b, y_idx = inp x, b, y_idx = inp
g_nll, g_sm, g_am = grads g_nll, g_sm, g_am = grads
#argmax is integer valued, so no gradient flows through it, even if there is
#incoming gradient
g_am = None dx_terms = []
if g_sm is not None: db_terms = []
# There is a gradient w.r.t. the softmax's output itself. d_idx_terms = []
if g_nll is not None:
dx, db = softmax_with_bias.grad((x, b,), (g_sm, ))
nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx) if not isinstance(g_nll.type, DisconnectedType):
dx_nll = crossentropy_softmax_1hot_with_bias_dx(g_nll, sm, y_idx)
dx += dx_nll
db += tensor.sum(dx_nll, axis=[0])
return dx, db, None
return softmax_with_bias.grad((x, b, ), (g_sm, )) + (None, )
else:
# There is a gradient w.r.t. the NLL.
assert g_nll is not None
nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx) nll, sm = crossentropy_softmax_1hot_with_bias(x, b, y_idx)
#dx = CrossentropySoftmax1HotWithBiasDx()(g_nll, sm, y_idx)
dx = crossentropy_softmax_1hot_with_bias_dx(g_nll, sm, y_idx) dx = crossentropy_softmax_1hot_with_bias_dx(g_nll, sm, y_idx)
db = tensor.sum(dx, axis=[0]) db = tensor.sum(dx, axis=[0])
return dx, db, None dx_terms.append(dx)
db_terms.append(db)
if not isinstance(g_sm.type, DisconnectedType):
dx, db = softmax_with_bias.grad((x, b), (g_sm, ))
dx_terms.append(dx)
db_terms.append(db)
if not isinstance(g_am.type, DisconnectedType):
dx_terms.append(x.zeros_like())
db_terms.append(b.zeros_like())
d_idx_terms.append(y_idx.zeros_like())
def fancy_sum( terms ):
if len(terms) == 0:
return DisconnectedType()()
rval = terms[0]
for term in terms[1:]:
rval = rval + term
return rval
return [ fancy_sum(terms) for terms in
[dx_terms, db_terms, d_idx_terms ] ]
def c_headers(self): def c_headers(self):
return ['<iostream>', '<cmath>'] return ['<iostream>', '<cmath>']
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论