提交 7cfd2879 authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: GitHub

Merge pull request #5486 from affanv14/gradchange

Change grad to L_op
差异被折叠。
...@@ -32,12 +32,12 @@ class Erf(UnaryScalarOp): ...@@ -32,12 +32,12 @@ class Erf(UnaryScalarOp):
else: else:
super(Erf, self).impl(x) super(Erf, self).impl(x)
def grad(self, inp, grads): def L_op(self, inputs, outputs, grads):
x, = inp x, = inputs
gz, = grads gz, = grads
if x.type in complex_types: if x.type in complex_types:
raise NotImplementedError() raise NotImplementedError()
if self(x).type in discrete_types: if outputs[0].type in discrete_types:
if x.type in discrete_types: if x.type in discrete_types:
return [x.zeros_like(dtype=theano.config.floatX)] return [x.zeros_like(dtype=theano.config.floatX)]
else: else:
...@@ -63,12 +63,12 @@ class Erfc(UnaryScalarOp): ...@@ -63,12 +63,12 @@ class Erfc(UnaryScalarOp):
else: else:
super(Erfc, self).impl(x) super(Erfc, self).impl(x)
def grad(self, inp, grads): def L_op(self, inputs, outputs, grads):
x, = inp x, = inputs
gz, = grads gz, = grads
if x.type in complex_types: if x.type in complex_types:
raise NotImplementedError() raise NotImplementedError()
if self(x).type in discrete_types: if outputs[0].type in discrete_types:
if x.type in discrete_types: if x.type in discrete_types:
return [x.zeros_like(dtype=theano.config.floatX)] return [x.zeros_like(dtype=theano.config.floatX)]
else: else:
...@@ -110,12 +110,12 @@ class Erfcx(UnaryScalarOp): ...@@ -110,12 +110,12 @@ class Erfcx(UnaryScalarOp):
else: else:
super(Erfcx, self).impl(x) super(Erfcx, self).impl(x)
def grad(self, inp, grads): def L_op(self, inputs, outputs, grads):
x, = inp x, = inputs
gz, = grads gz, = grads
if x.type in complex_types: if x.type in complex_types:
raise NotImplementedError() raise NotImplementedError()
if self(x).type in discrete_types: if outputs[0].type in discrete_types:
if x.type in discrete_types: if x.type in discrete_types:
return [x.zeros_like(dtype=theano.config.floatX)] return [x.zeros_like(dtype=theano.config.floatX)]
else: else:
...@@ -146,12 +146,12 @@ class Erfinv(UnaryScalarOp): ...@@ -146,12 +146,12 @@ class Erfinv(UnaryScalarOp):
else: else:
super(Erfinv, self).impl(x) super(Erfinv, self).impl(x)
def grad(self, inp, grads): def L_op(self, inputs, outputs, grads):
x, = inp x, = inputs
gz, = grads gz, = grads
if x.type in complex_types: if x.type in complex_types:
raise NotImplementedError() raise NotImplementedError()
if self(x).type in discrete_types: if outputs[0].type in discrete_types:
if x.type in discrete_types: if x.type in discrete_types:
return [x.zeros_like(dtype=theano.config.floatX)] return [x.zeros_like(dtype=theano.config.floatX)]
else: else:
...@@ -179,12 +179,12 @@ class Erfcinv(UnaryScalarOp): ...@@ -179,12 +179,12 @@ class Erfcinv(UnaryScalarOp):
else: else:
super(Erfcinv, self).impl(x) super(Erfcinv, self).impl(x)
def grad(self, inp, grads): def L_op(self, inputs, outputs, grads):
x, = inp x, = inputs
gz, = grads gz, = grads
if x.type in complex_types: if x.type in complex_types:
raise NotImplementedError() raise NotImplementedError()
if self(x).type in discrete_types: if outputs[0].type in discrete_types:
if x.type in discrete_types: if x.type in discrete_types:
return [x.zeros_like(dtype=theano.config.floatX)] return [x.zeros_like(dtype=theano.config.floatX)]
else: else:
...@@ -216,12 +216,12 @@ class Gamma(UnaryScalarOp): ...@@ -216,12 +216,12 @@ class Gamma(UnaryScalarOp):
else: else:
super(Gamma, self).impl(x) super(Gamma, self).impl(x)
def grad(self, inputs, gout): def L_op(self, inputs, outputs, gout):
(x,) = inputs (x,) = inputs
(gz,) = gout (gz,) = gout
if x.type in complex_types: if x.type in complex_types:
raise NotImplementedError() raise NotImplementedError()
if self(x).type in discrete_types: if outputs[0].type in discrete_types:
if x.type in discrete_types: if x.type in discrete_types:
return [x.zeros_like(dtype=theano.config.floatX)] return [x.zeros_like(dtype=theano.config.floatX)]
else: else:
...@@ -253,12 +253,12 @@ class GammaLn(UnaryScalarOp): ...@@ -253,12 +253,12 @@ class GammaLn(UnaryScalarOp):
else: else:
super(GammaLn, self).impl(x) super(GammaLn, self).impl(x)
def grad(self, inp, grads): def L_op(self, inputs, outputs, grads):
x, = inp x, = inputs
gz, = grads gz, = grads
if x.type in complex_types: if x.type in complex_types:
raise NotImplementedError() raise NotImplementedError()
if self(x).type in discrete_types: if outputs[0].type in discrete_types:
if x.type in discrete_types: if x.type in discrete_types:
return [x.zeros_like(dtype=theano.config.floatX)] return [x.zeros_like(dtype=theano.config.floatX)]
else: else:
......
...@@ -602,7 +602,7 @@ second dimension ...@@ -602,7 +602,7 @@ second dimension
ograds = [x.zeros_like() for x in outs] ograds = [x.zeros_like() for x in outs]
ograds[idx] = theano.tensor.ones_like(out) ograds[idx] = theano.tensor.ones_like(out)
bgrads = self._bgrad(inputs, ograds) bgrads = self._bgrad(inputs, outs, ograds)
rop_out = None rop_out = None
for jdx, (inp, eval_point) in enumerate(izip(inputs, for jdx, (inp, eval_point) in enumerate(izip(inputs,
...@@ -636,7 +636,7 @@ second dimension ...@@ -636,7 +636,7 @@ second dimension
def L_op(self, inputs, outs, ograds): def L_op(self, inputs, outs, ograds):
# compute grad with respect to broadcasted input # compute grad with respect to broadcasted input
rval = self._bgrad(inputs, ograds) rval = self._bgrad(inputs, outs, ograds)
# TODO: make sure that zeros are clearly identifiable # TODO: make sure that zeros are clearly identifiable
# to the gradient.grad method when the outputs have # to the gradient.grad method when the outputs have
...@@ -684,7 +684,7 @@ second dimension ...@@ -684,7 +684,7 @@ second dimension
return rval return rval
def _bgrad(self, inputs, ograds): def _bgrad(self, inputs, outputs, ograds):
# returns grad, with respect to broadcasted versions of inputs # returns grad, with respect to broadcasted versions of inputs
with change_flags(compute_test_value='off'): with change_flags(compute_test_value='off'):
...@@ -695,7 +695,10 @@ second dimension ...@@ -695,7 +695,10 @@ second dimension
scalar_inputs = list(map(as_scalar, inputs)) scalar_inputs = list(map(as_scalar, inputs))
scalar_ograds = list(map(as_scalar, ograds)) scalar_ograds = list(map(as_scalar, ograds))
scalar_igrads = self.scalar_op.grad(scalar_inputs, scalar_ograds) scalar_outputs = self.scalar_op.make_node(
*[get_scalar_type(dtype=i.type.dtype).make_variable()
for i in inputs]).outputs
scalar_igrads = self.scalar_op.L_op(scalar_inputs, scalar_outputs, scalar_ograds)
for igrad in scalar_igrads: for igrad in scalar_igrads:
assert igrad is not None, self.scalar_op assert igrad is not None, self.scalar_op
...@@ -711,6 +714,8 @@ second dimension ...@@ -711,6 +714,8 @@ second dimension
return r return r
if r in scalar_inputs: if r in scalar_inputs:
return inputs[scalar_inputs.index(r)] return inputs[scalar_inputs.index(r)]
if r in scalar_outputs:
return outputs[scalar_outputs.index(r)]
if r in scalar_ograds: if r in scalar_ograds:
return ograds[scalar_ograds.index(r)] return ograds[scalar_ograds.index(r)]
node = r.owner node = r.owner
......
...@@ -100,15 +100,14 @@ class SoftmaxWithBias(gof.Op): ...@@ -100,15 +100,14 @@ class SoftmaxWithBias(gof.Op):
# data type matches. # data type matches.
output_storage[0][0] = e_x.astype(x_dtype, copy=False) output_storage[0][0] = e_x.astype(x_dtype, copy=False)
def grad(self, inp, grads): def L_op(self, inp, outputs, grads):
x, b = inp x, b = inp
g_sm, = grads g_sm, = grads
if isinstance(g_sm.type, DisconnectedType): if isinstance(g_sm.type, DisconnectedType):
return [DisconnectedType()(), DisconnectedType()()] return [DisconnectedType()(), DisconnectedType()()]
sm = softmax_with_bias(x, b) dx = softmax_grad(g_sm, outputs[0])
dx = softmax_grad(g_sm, sm)
db = tensor.sum(dx, axis=0) db = tensor.sum(dx, axis=0)
return dx, db return dx, db
...@@ -440,18 +439,17 @@ class Softmax(gof.Op): ...@@ -440,18 +439,17 @@ class Softmax(gof.Op):
sm = e_x / e_x.sum(axis=1)[:, None] sm = e_x / e_x.sum(axis=1)[:, None]
output_storage[0][0] = sm output_storage[0][0] = sm
def grad(self, inp, grads): def L_op(self, inp, outputs, grads):
x, = inp x, = inp
g_sm, = grads g_sm, = grads
sm = softmax_op(x) return [softmax_grad(g_sm, outputs[0])]
return [softmax_grad(g_sm, sm)]
def R_op(self, inputs, eval_points): def R_op(self, inputs, eval_points):
# I think the Jacobian is symmetric so the R_op # I think the Jacobian is symmetric so the R_op
# is the same as the grad # is the same as the grad
if None in eval_points: if None in eval_points:
return [None] return [None]
return self.grad(inputs, eval_points) return self.L_op(inputs, [self(*inputs)], eval_points)
def infer_shape(self, node, shape): def infer_shape(self, node, shape):
return shape return shape
...@@ -1060,7 +1058,7 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op): ...@@ -1060,7 +1058,7 @@ class CrossentropySoftmaxArgmax1HotWithBias(gof.Op):
db_terms.append(db) db_terms.append(db)
if not isinstance(g_sm.type, DisconnectedType): if not isinstance(g_sm.type, DisconnectedType):
dx, db = softmax_with_bias.grad((x, b), (g_sm, )) dx, db = softmax_with_bias.L_op((x, b), [softmax_with_bias(x, b)], (g_sm, ))
dx_terms.append(dx) dx_terms.append(dx)
db_terms.append(db) db_terms.append(db)
......
...@@ -562,15 +562,14 @@ class Pool(OpenMPOp): ...@@ -562,15 +562,14 @@ class Pool(OpenMPOp):
pad, self.ndim) pad, self.ndim)
return [shp] return [shp]
def grad(self, inp, grads): def L_op(self, inputs, outputs, grads):
x, ws, stride, pad = inp x, ws, stride, pad = inputs
gz, = grads gz, = grads
disc = [DisconnectedType()() for i in inp[1:]] disc = [DisconnectedType()() for i in inputs[1:]]
if self.mode == 'max': if self.mode == 'max':
maxout = self(x, ws, stride, pad)
return [MaxPoolGrad(ndim=self.ndim, return [MaxPoolGrad(ndim=self.ndim,
ignore_border=self.ignore_border)( ignore_border=self.ignore_border)(
x, maxout, gz, ws=ws, stride=stride, pad=pad)] + disc x, outputs[0], gz, ws=ws, stride=stride, pad=pad)] + disc
else: else:
return [AveragePoolGrad(ndim=self.ndim, return [AveragePoolGrad(ndim=self.ndim,
ignore_border=self.ignore_border, ignore_border=self.ignore_border,
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论