提交 14ab4945 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #1537 from lamblin/fix_grad_none

Do not return None in scalar op's grad, add tests
差异被折叠。
......@@ -2,11 +2,12 @@
#as scipy is not always available, we treat them separatly
import numpy
import theano
from theano.scalar.basic import (UnaryScalarOp, BinaryScalarOp,
exp, upgrade_to_float,
float_types)
from theano.scalar.basic import (upgrade_to_float_no_complex,
complex_types,
complex_types, discrete_types,
upcast)
imported_scipy_special = False
......@@ -32,12 +33,15 @@ class Erf(UnaryScalarOp):
gz, = grads
if x.type in complex_types:
raise NotImplementedError()
elif x.type in float_types:
cst = numpy.asarray(2. / numpy.sqrt(numpy.pi),
dtype=upcast(x.type.dtype, gz.type.dtype))
return gz * cst * exp(-x * x),
else:
return None,
if self(x).type in discrete_types:
if x.type in discrete_types:
return [x.zeros_like(dtype=theano.config.floatX)]
else:
return [x.zeros_like()]
cst = numpy.asarray(2. / numpy.sqrt(numpy.pi),
dtype=upcast(x.type.dtype, gz.type.dtype))
return gz * cst * exp(-x * x),
def c_code(self, node, name, inp, out, sub):
x, = inp
......@@ -60,12 +64,15 @@ class Erfc(UnaryScalarOp):
gz, = grads
if x.type in complex_types:
raise NotImplementedError()
elif x.type in float_types:
cst = numpy.asarray(2. / numpy.sqrt(numpy.pi),
dtype=upcast(x.type.dtype, gz.type.dtype))
return - gz * cst * exp(-x * x),
else:
return None,
if self(x).type in discrete_types:
if x.type in discrete_types:
return [x.zeros_like(dtype=theano.config.floatX)]
else:
return [x.zeros_like()]
cst = numpy.asarray(2. / numpy.sqrt(numpy.pi),
dtype=upcast(x.type.dtype, gz.type.dtype))
return - gz * cst * exp(-x * x),
def c_code(self, node, name, inp, out, sub):
x, = inp
......@@ -99,12 +106,15 @@ class Erfinv(UnaryScalarOp):
gz, = grads
if x.type in complex_types:
raise NotImplementedError()
elif x.type in float_types:
cst = numpy.asarray(numpy.sqrt(numpy.pi) / 2.,
dtype=upcast(x.type.dtype, gz.type.dtype))
return gz * cst * exp(erfinv(x) ** 2),
else:
return None,
if self(x).type in discrete_types:
if x.type in discrete_types:
return [x.zeros_like(dtype=theano.config.floatX)]
else:
return [x.zeros_like()]
cst = numpy.asarray(numpy.sqrt(numpy.pi) / 2.,
dtype=upcast(x.type.dtype, gz.type.dtype))
return gz * cst * exp(erfinv(x) ** 2),
# TODO: erfinv() is not provided by the C standard library
#def c_code(self, node, name, inp, out, sub):
......@@ -129,12 +139,15 @@ class Erfcinv(UnaryScalarOp):
gz, = grads
if x.type in complex_types:
raise NotImplementedError()
elif x.type in float_types:
cst = numpy.asarray(numpy.sqrt(numpy.pi) / 2.,
dtype=upcast(x.type.dtype, gz.type.dtype))
return - gz * cst * exp(erfcinv(x) ** 2),
else:
return None,
if self(x).type in discrete_types:
if x.type in discrete_types:
return [x.zeros_like(dtype=theano.config.floatX)]
else:
return [x.zeros_like()]
cst = numpy.asarray(numpy.sqrt(numpy.pi) / 2.,
dtype=upcast(x.type.dtype, gz.type.dtype))
return - gz * cst * exp(erfcinv(x) ** 2),
# TODO: erfcinv() is not provided by the C standard library
#def c_code(self, node, name, inp, out, sub):
......@@ -159,6 +172,14 @@ class Gamma(UnaryScalarOp):
super(Gamma, self).impl(x)
def grad(self, (x, ), (gz, )):
if x.type in complex_types:
raise NotImplementedError()
if self(x).type in discrete_types:
if x.type in discrete_types:
return [x.zeros_like(dtype=theano.config.floatX)]
else:
return [x.zeros_like()]
return gz * gamma(x) * psi(x),
def c_code(self, node, name, (x, ), (z, ), sub):
......@@ -190,6 +211,14 @@ class GammaLn(UnaryScalarOp):
def grad(self, inp, grads):
x, = inp
gz, = grads
if x.type in complex_types:
raise NotImplementedError()
if self(x).type in discrete_types:
if x.type in discrete_types:
return [x.zeros_like(dtype=theano.config.floatX)]
else:
return [x.zeros_like()]
return [gz * psi(x)]
def c_code(self, node, name, inp, out, sub):
......@@ -224,7 +253,6 @@ class Psi(UnaryScalarOp):
def grad(self, inputs, outputs_gradients):
raise NotImplementedError()
return [None]
def c_support_code(self):
return (
......
......@@ -419,6 +419,54 @@ def makeTester(name, op, expected, checks=None, good=None, bad_build=None,
finally:
config.warn.sum_div_dimshuffle_bug = backup
def test_grad_none(self):
# Check that None is never returned as input gradient
# when calling self.op.grad
# We use all values in self.good because this has to be true
# whether or not the values work for utt.verify_grad.
if skip:
raise SkipTest(skip)
if not hasattr(self.op, 'grad'):
# This is not actually an Op
return
for testname, inputs in self.good.items():
inputs = [copy(input) for input in inputs]
inputrs = [TensorType(
dtype=input.dtype,
broadcastable=[shape_elem == 1
for shape_elem in input.shape]
)() for input in inputs]
if (isinstance(self.expected, dict)
and testname in self.expected):
expecteds = self.expected[testname]
# with numpy version, when we print a number and read it
# back, we don't get exactly the same result, so we accept
# rounding error in that case.
else:
expecteds = self.expected(*inputs)
if not isinstance(expecteds, (list, tuple)):
expecteds = (expecteds, )
out_grad_vars = []
for out in expecteds:
if str(out.dtype) in tensor.discrete_dtypes:
dtype = floatX
else:
dtype = str(out.dtype)
bcast = [shape_elem == 1 for shape_elem in out.shape]
var = TensorType(dtype=dtype, broadcastable=bcast)()
out_grad_vars.append(var)
try:
in_grad_vars = self.op.grad(inputrs, out_grad_vars)
except (gof.utils.MethodNotDefined, NotImplementedError):
pass
else:
assert None not in in_grad_vars
Checker.__name__ = name
return Checker
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论