提交 83d77e84 authored 作者: lamblin's avatar lamblin

Merge pull request #1326 from delallea/notimplemented_elemwise_grad

Fixed crash for unimplemented elemwise gradient
...@@ -127,7 +127,8 @@ class DimShuffle(Op): ...@@ -127,7 +127,8 @@ class DimShuffle(Op):
for i, j in enumerate(new_order): for i, j in enumerate(new_order):
if j != 'x': if j != 'x':
# There is a bug in numpy that results in isinstance(x, int) returning False for numpy integers. # There is a bug in numpy that results in isinstance(x, int)
# returning False for numpy integers.
# See <http://projects.scipy.org/numpy/ticket/2235>. # See <http://projects.scipy.org/numpy/ticket/2235>.
if not isinstance(j, (int, numpy.integer)): if not isinstance(j, (int, numpy.integer)):
raise TypeError( raise TypeError(
...@@ -135,7 +136,7 @@ class DimShuffle(Op): ...@@ -135,7 +136,7 @@ class DimShuffle(Op):
if j >= len(input_broadcastable): if j >= len(input_broadcastable):
raise ValueError(("new_order[%d] is %d, but the input " raise ValueError(("new_order[%d] is %d, but the input "
"only has %d axes.") % "only has %d axes.") %
(i,j,len(input_broadcastable))) (i, j, len(input_broadcastable)))
if j in new_order[(i + 1):]: if j in new_order[(i + 1):]:
raise ValueError(( raise ValueError((
"The same input dimension may not appear twice in the " "The same input dimension may not appear twice in the "
...@@ -659,11 +660,9 @@ class Elemwise(Op): ...@@ -659,11 +660,9 @@ class Elemwise(Op):
def grad(self, inputs, ograds): def grad(self, inputs, ograds):
outs = self(*inputs) outs = self(*inputs)
if not isinstance(outs, (list,tuple)): if not isinstance(outs, (list, tuple)):
outs = [ outs ] outs = [outs]
#compute grad with respect to broadcasted input #compute grad with respect to broadcasted input
rval = self._bgrad(inputs, ograds) rval = self._bgrad(inputs, ograds)
...@@ -694,7 +693,6 @@ class Elemwise(Op): ...@@ -694,7 +693,6 @@ class Elemwise(Op):
new_rval.append(elem) new_rval.append(elem)
return new_rval return new_rval
#sum out the broadcasted dimensions #sum out the broadcasted dimensions
for i, ipt in enumerate(inputs): for i, ipt in enumerate(inputs):
if rval[i] is None: if rval[i] is None:
...@@ -758,7 +756,7 @@ class Elemwise(Op): ...@@ -758,7 +756,7 @@ class Elemwise(Op):
def transform(r): def transform(r):
# From a graph of ScalarOps, make a graph of Broadcast ops. # From a graph of ScalarOps, make a graph of Broadcast ops.
if isinstance(r.type, DisconnectedType): if isinstance(r.type, (NullType, DisconnectedType)):
return r return r
if r in scalar_inputs: if r in scalar_inputs:
return inputs[scalar_inputs.index(r)] return inputs[scalar_inputs.index(r)]
...@@ -1183,7 +1181,8 @@ class CAReduce(Op): ...@@ -1183,7 +1181,8 @@ class CAReduce(Op):
if axis is None: if axis is None:
self.axis = axis self.axis = axis
# There is a bug in numpy that results in isinstance(x, int) returning False for numpy integers. # There is a bug in numpy that results in isinstance(x, int) returning
# False for numpy integers.
# See <http://projects.scipy.org/numpy/ticket/2235>. # See <http://projects.scipy.org/numpy/ticket/2235>.
elif isinstance(axis, (int, numpy.integer)): elif isinstance(axis, (int, numpy.integer)):
self.axis = (axis,) self.axis = (axis,)
......
...@@ -98,6 +98,7 @@ class test_DimShuffle(unittest_tools.InferShapeTester): ...@@ -98,6 +98,7 @@ class test_DimShuffle(unittest_tools.InferShapeTester):
y = x.dimshuffle(('x',) * (numpy.MAXDIMS + 1)) y = x.dimshuffle(('x',) * (numpy.MAXDIMS + 1))
self.assertRaises(ValueError, y.eval, {x: 0}) self.assertRaises(ValueError, y.eval, {x: 0})
class test_Broadcast(unittest.TestCase): class test_Broadcast(unittest.TestCase):
def setUp(self): def setUp(self):
unittest_tools.seed_rng() unittest_tools.seed_rng()
...@@ -749,7 +750,8 @@ class T_mean_dtype(unittest.TestCase): ...@@ -749,7 +750,8 @@ class T_mean_dtype(unittest.TestCase):
x = tensor.matrix(dtype=input_dtype) x = tensor.matrix(dtype=input_dtype)
for sum_dtype in imap(str, theano.scalar.all_types): for sum_dtype in imap(str, theano.scalar.all_types):
axis = axes[idx % len(axes)] axis = axes[idx % len(axes)]
# If the inner sum cannot be created, it will raise a TypeError. # If the inner sum cannot be created, it will raise a
# TypeError.
try: try:
mean_var = x.mean(dtype=sum_dtype, axis=axis) mean_var = x.mean(dtype=sum_dtype, axis=axis)
except TypeError: except TypeError:
...@@ -757,10 +759,11 @@ class T_mean_dtype(unittest.TestCase): ...@@ -757,10 +759,11 @@ class T_mean_dtype(unittest.TestCase):
else: else:
# Executed if no TypeError was raised # Executed if no TypeError was raised
if sum_dtype in tensor.discrete_dtypes: if sum_dtype in tensor.discrete_dtypes:
assert mean_var.dtype == 'float64', (mean_var.dtype, sum_dtype) assert mean_var.dtype == 'float64', (
(mean_var.dtype, sum_dtype))
else: else:
assert mean_var.dtype == sum_dtype, (mean_var.dtype, sum_dtype) assert mean_var.dtype == sum_dtype, (
(mean_var.dtype, sum_dtype))
# Check that we can take the gradient, when implemented # Check that we can take the gradient, when implemented
if "complex" in mean_var.dtype: if "complex" in mean_var.dtype:
continue continue
...@@ -920,7 +923,7 @@ class T_prod_without_zeros_dtype(unittest.TestCase): ...@@ -920,7 +923,7 @@ class T_prod_without_zeros_dtype(unittest.TestCase):
def test_prod_without_zeros_custom_dtype(self): def test_prod_without_zeros_custom_dtype(self):
""" """
Test the ability to provide your own output dtype for a ProdWithoutZeros(). Test ability to provide your own output dtype for a ProdWithoutZeros().
""" """
# We try multiple axis combinations even though axis should not matter. # We try multiple axis combinations even though axis should not matter.
axes = [None, 0, 1, [0], [1], [0, 1]] axes = [None, 0, 1, [0], [1], [0, 1]]
...@@ -936,7 +939,7 @@ class T_prod_without_zeros_dtype(unittest.TestCase): ...@@ -936,7 +939,7 @@ class T_prod_without_zeros_dtype(unittest.TestCase):
def test_prod_without_zeros_custom_acc_dtype(self): def test_prod_without_zeros_custom_acc_dtype(self):
""" """
Test the ability to provide your own acc_dtype for a ProdWithoutZeros(). Test ability to provide your own acc_dtype for a ProdWithoutZeros().
""" """
# We try multiple axis combinations even though axis should not matter. # We try multiple axis combinations even though axis should not matter.
axes = [None, 0, 1, [0], [1], [0, 1]] axes = [None, 0, 1, [0], [1], [0, 1]]
...@@ -1010,7 +1013,8 @@ def test_gt_grad(): ...@@ -1010,7 +1013,8 @@ def test_gt_grad():
T = theano.tensor T = theano.tensor
input_ = T.vector(dtype=floatX) input_ = T.vector(dtype=floatX)
random_values = numpy.random.RandomState(1234).uniform(low=-1, high=1, size=(2,2)) random_values = numpy.random.RandomState(1234).uniform(
low=-1, high=1, size=(2, 2))
W_values = numpy.asarray(random_values, dtype=floatX) W_values = numpy.asarray(random_values, dtype=floatX)
W = theano.shared(value=W_values, name='weights') W = theano.shared(value=W_values, name='weights')
correct_score = T.dot(input_, W) correct_score = T.dot(input_, W)
...@@ -1032,15 +1036,17 @@ if __name__ == '__main__': ...@@ -1032,15 +1036,17 @@ if __name__ == '__main__':
unittest.TextTestRunner().run(suite) unittest.TextTestRunner().run(suite)
""" """
def test_clip_grad(): def test_clip_grad():
# test the gradient of clip # test the gradient of clip
def func(x,y,z): def func(x, y, z):
return theano.tensor.clip(x,y,z) return theano.tensor.clip(x, y, z)
# use an x value less than y, an x value between y and z, and an x value # use an x value less than y, an x value between y and z, and an x value
# greater than z # greater than z
unittest_tools.verify_grad(func, unittest_tools.verify_grad(func,
[ numpy.asarray([-1.,0.5,2.]), 0., 1.]) [numpy.asarray([-1., 0.5, 2.]), 0., 1.])
def test_clip_grad_int(): def test_clip_grad_int():
...@@ -1048,10 +1054,40 @@ def test_clip_grad_int(): ...@@ -1048,10 +1054,40 @@ def test_clip_grad_int():
x = tensor.iscalar() x = tensor.iscalar()
y = tensor.iscalar() y = tensor.iscalar()
z = tensor.iscalar() z = tensor.iscalar()
c = tensor.clip(x,y,z) c = tensor.clip(x, y, z)
tensor.grad(c, [x, y, z]) tensor.grad(c, [x, y, z])
def test_not_implemented_elemwise_grad():
"""
Regression test for unimplemented gradient in an Elemwise Op.
"""
class TestOp(scalar.ScalarOp):
def __init__(self):
self.output_types_preference = scalar.upgrade_to_float
def impl(self, n, x):
return x * n
def grad(self, (n, x), (gz,)):
dy_dx = n
return [theano.gradient.grad_not_implemented(self, 0, n),
gz * dy_dx]
test_op = tensor.Elemwise(TestOp())
x = tensor.scalar()
# The call to `grad` used to crash.
tensor.grad(test_op(2, x), x)
# Verify that trying to use the not implemented gradient fails.
try:
tensor.grad(test_op(x, 2), x)
assert False
except theano.gradient.NullTypeGradError:
pass
if __name__ == '__main__': if __name__ == '__main__':
t = TestElemwise('setUp') t = TestElemwise('setUp')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论