提交 7cfd2879 authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #5486 from affanv14/gradchange

Change grad to L_op
差异被折叠。
......@@ -32,12 +32,12 @@ class Erf(UnaryScalarOp):
else:
super(Erf, self).impl(x)
def grad(self, inp, grads):
x, = inp
def L_op(self, inputs, outputs, grads):
x, = inputs
gz, = grads
if x.type in complex_types:
raise NotImplementedError()
if self(x).type in discrete_types:
if outputs[0].type in discrete_types:
if x.type in discrete_types:
return [x.zeros_like(dtype=theano.config.floatX)]
else:
......@@ -63,12 +63,12 @@ class Erfc(UnaryScalarOp):
else:
super(Erfc, self).impl(x)
def grad(self, inp, grads):
x, = inp
def L_op(self, inputs, outputs, grads):
x, = inputs
gz, = grads
if x.type in complex_types:
raise NotImplementedError()
if self(x).type in discrete_types:
if outputs[0].type in discrete_types:
if x.type in discrete_types:
return [x.zeros_like(dtype=theano.config.floatX)]
else:
......@@ -110,12 +110,12 @@ class Erfcx(UnaryScalarOp):
else:
super(Erfcx, self).impl(x)
def grad(self, inp, grads):
x, = inp
def L_op(self, inputs, outputs, grads):
x, = inputs
gz, = grads
if x.type in complex_types:
raise NotImplementedError()
if self(x).type in discrete_types:
if outputs[0].type in discrete_types:
if x.type in discrete_types:
return [x.zeros_like(dtype=theano.config.floatX)]
else:
......@@ -146,12 +146,12 @@ class Erfinv(UnaryScalarOp):
else:
super(Erfinv, self).impl(x)
def grad(self, inp, grads):
x, = inp
def L_op(self, inputs, outputs, grads):
x, = inputs
gz, = grads
if x.type in complex_types:
raise NotImplementedError()
if self(x).type in discrete_types:
if outputs[0].type in discrete_types:
if x.type in discrete_types:
return [x.zeros_like(dtype=theano.config.floatX)]
else:
......@@ -179,12 +179,12 @@ class Erfcinv(UnaryScalarOp):
else:
super(Erfcinv, self).impl(x)
def grad(self, inp, grads):
x, = inp
def L_op(self, inputs, outputs, grads):
x, = inputs
gz, = grads
if x.type in complex_types:
raise NotImplementedError()
if self(x).type in discrete_types:
if outputs[0].type in discrete_types:
if x.type in discrete_types:
return [x.zeros_like(dtype=theano.config.floatX)]
else:
......@@ -216,12 +216,12 @@ class Gamma(UnaryScalarOp):
else:
super(Gamma, self).impl(x)
def grad(self, inputs, gout):
def L_op(self, inputs, outputs, gout):
(x,) = inputs
(gz,) = gout
if x.type in complex_types:
raise NotImplementedError()
if self(x).type in discrete_types:
if outputs[0].type in discrete_types:
if x.type in discrete_types:
return [x.zeros_like(dtype=theano.config.floatX)]
else:
......@@ -253,12 +253,12 @@ class GammaLn(UnaryScalarOp):
else:
super(GammaLn, self).impl(x)
def grad(self, inp, grads):
x, = inp
def L_op(self, inputs, outputs, grads):
x, = inputs
gz, = grads
if x.type in complex_types:
raise NotImplementedError()
if self(x).type in discrete_types:
if outputs[0].type in discrete_types:
if x.type in discrete_types:
return [x.zeros_like(dtype=theano.config.floatX)]
else:
......
......@@ -602,7 +602,7 @@ second dimension
ograds = [x.zeros_like() for x in outs]
ograds[idx] = theano.tensor.ones_like(out)
bgrads = self._bgrad(inputs, ograds)
bgrads = self._bgrad(inputs, outs, ograds)
rop_out = None
for jdx, (inp, eval_point) in enumerate(izip(inputs,
......@@ -636,7 +636,7 @@ second dimension
def L_op(self, inputs, outs, ograds):
# compute grad with respect to broadcasted input
rval = self._bgrad(inputs, ograds)
rval = self._bgrad(inputs, outs, ograds)
# TODO: make sure that zeros are clearly identifiable
# to the gradient.grad method when the outputs have
......@@ -684,7 +684,7 @@ second dimension
return rval
def _bgrad(self, inputs, ograds):
def _bgrad(self, inputs, outputs, ograds):
# returns grad, with respect to broadcasted versions of inputs
with change_flags(compute_test_value='off'):
......@@ -695,7 +695,10 @@ second dimension
scalar_inputs = list(map(as_scalar, inputs))
scalar_ograds = list(map(as_scalar, ograds))
scalar_igrads = self.scalar_op.grad(scalar_inputs, scalar_ograds)
scalar_outputs = self.scalar_op.make_node(
*[get_scalar_type(dtype=i.type.dtype).make_variable()
for i in inputs]).outputs
scalar_igrads = self.scalar_op.L_op(scalar_inputs, scalar_outputs, scalar_ograds)
for igrad in scalar_igrads:
assert igrad is not None, self.scalar_op
......@@ -711,6 +714,8 @@ second dimension
return r
if r in scalar_inputs:
return inputs[scalar_inputs.index(r)]
if r in scalar_outputs:
return outputs[scalar_outputs.index(r)]
if r in scalar_ograds:
return ograds[scalar_ograds.index(r)]
node = r.owner
......
......@@ -100,15 +100,14 @@ class SoftmaxWithBias(gof.Op):
# data type matches.
output_storage[0][0] = e_x.astype(x_dtype, copy=False)
def grad(self, inp, grads):
def L_op(self, inp, outputs, grads):
x, b = inp
g_sm, = grads
if isinstance(g_sm.type, DisconnectedType):
return [DisconnectedType()(), DisconnectedType()()]
sm = softmax_with_bias(x, b)
dx = softmax_grad(g_sm, sm)
dx = softmax_grad(g_sm, outputs[0])
db = tensor.sum(dx, axis=0)
return dx, db
......@@ -440,18 +439,17 @@ class Softmax(gof.Op):
sm = e_x / e_x.sum(axis=1)[:, None]
output_storage[0][0] = sm
def grad(self, inp, grads):
def L_op(self, inp, outputs, grads):
x, = inp
g_sm, = grads
sm = softmax_op(x)
return [softmax_grad(g_sm, sm)]
return [softmax_grad(g_sm, outputs[0])]
def R_op(self, inputs, eval_points):
# I think the Jacobian is symmetric so the R_op
# is the same as the grad
if None in eval_points:
return [None]
return self.grad(inputs, eval_points)
return self.L_op(inputs, [self(*inputs)], eval_points)
def infer_shape(self, node, shape):
return shape
......@@ -1060,7 +1058,7 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
db_terms.append(db)
if not isinstance(g_sm.type, DisconnectedType):
dx, db = softmax_with_bias.grad((x, b), (g_sm, ))
dx, db = softmax_with_bias.L_op((x, b), [softmax_with_bias(x, b)], (g_sm, ))
dx_terms.append(dx)
db_terms.append(db)
......
......@@ -562,15 +562,14 @@ class Pool(OpenMPOp):
pad, self.ndim)
return [shp]
def grad(self, inp, grads):
x, ws, stride, pad = inp
def L_op(self, inputs, outputs, grads):
x, ws, stride, pad = inputs
gz, = grads
disc = [DisconnectedType()() for i in inp[1:]]
disc = [DisconnectedType()() for i in inputs[1:]]
if self.mode == 'max':
maxout = self(x, ws, stride, pad)
return [MaxPoolGrad(ndim=self.ndim,
ignore_border=self.ignore_border)(
x, maxout, gz, ws=ws, stride=stride, pad=pad)] + disc
x, outputs[0], gz, ws=ws, stride=stride, pad=pad)] + disc
else:
return [AveragePoolGrad(ndim=self.ndim,
ignore_border=self.ignore_border,
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论