提交 1ddb1eda authored 作者: Razvan Pascanu's avatar Razvan Pascanu

Merge pull request #203 from nouiz/max_and_argmax_grad

Implement the grad for inner dimensions in MaxAndArgMax. The pull request is fine. There is one question about gradient of max, that I'm not sure how to tackle, but the changes introduced in this code do not change the behaviour of Theano in this respect. The problem is how should the grad of max look if there are several instances of max ? Should all get the full gradient (like it is happening now), should only one of them receive the gradinet ?
......@@ -1941,32 +1941,36 @@ class MaxAndArgmax(Op):
return [eval_points[0][arange(eval_points[0].shape[0]),
max_pos], None]
def grad(self, inp, grads):
# @warning: This only works if axis is 0, else the max is
# broadcasted wrong in the call to eq.
# @note: This function should work correctly for L{vector}s.
# (x, y), (gz, gw)
# gz*dz/dx + gw*dw/dx, gz*dz/dy + gw*dw/dy
# gMax * dMax/dx + gArgMax * dArgMax/dx, gMax * dMax/daxis + gArgMax * dArgMax/daxis
# g_max has one less dimension than x, so you need to complete g_max to x's shape
# when axis=0 the broadcasting mechanism does it automatically
# gMax * dMax/dx + gArgMax * dArgMax/dx,
# gMax * dMax/daxis + gArgMax * dArgMax/daxis
# g_max has one less dimension than x, so you need to complete
# g_max to x's shape when axis=0 the broadcasting mechanism
# does it automatically
x, axis = inp
g_max, g_max_idx = grads
if not ( axis.data == 0 or axis.data == x.ndim-1):
raise NotImplementedError('MaxAndArgmax gradient with axis corresponding to internal dimension')
if axis.data==0:
g_max_pad = shape_padleft(g_max)
else:
g_max_pad = shape_padright(g_max)
xmax = max(x, axis)
if axis.data==0:
xmax_pad = shape_padleft(xmax)
else:
xmax_pad = shape_padright(xmax)
# Raise the g_max and xmax to the same number of dim as the input.
pattern = []
out_dim = 0
for i in range(inp[0].ndim):
if i == axis.data:
pattern.append('x')
else:
pattern.append(out_dim)
out_dim += 1
g_max_pad = DimShuffle(g_max.broadcastable, pattern)(g_max)
xmax_pad = DimShuffle(xmax.broadcastable, pattern)(xmax)
# Set the grad to the correct position.
g_x = eq(xmax_pad, x) * g_max_pad
return g_x, None
def __str__(self):
return self.__class__.__name__
......
......@@ -1561,13 +1561,16 @@ class T_max_and_argmax(unittest.TestCase):
n = as_tensor_variable(data)
def check_grad_max(data, max_grad_data, axis=None):
"""
Why this is needed? verify_grad is not enought?
"""
#This work only for axis in [0,None]
assert axis in [0,None]
z = numpy.zeros_like(data)
z = z.flatten()
argmax=numpy.argmax(data,axis=axis)
if argmax.ndim==0:
z[numpy.argmax(data,axis=axis)]+=1
z[argmax]+=1
else:
for id,v in enumerate(argmax):
z[v*numpy.prod(data.shape[data.ndim-1:axis:-1])+id]+=1
......@@ -1592,6 +1595,14 @@ class T_max_and_argmax(unittest.TestCase):
utt.verify_grad(lambda v: max_and_argmax(v.flatten())[1], [data])
check_grad_max(data,eval_outputs(grad(max_and_argmax(n.flatten())[0],n)))
# Test 4d inner dimensions
data = numpy.random.rand(2, 3, 4, 5)
n = as_tensor_variable(data)
for i in [0, 1, 2, 3]:
utt.verify_grad(lambda v: max_and_argmax(v, axis=[i])[0], [data])
utt.verify_grad(lambda v: max_and_argmax(v, axis=[i])[1], [data])
class T_argmin_argmax(unittest.TestCase):
def setUp(self):
utt.seed_rng()
......
......@@ -3,7 +3,12 @@
Tests for the R operator / L operator
For the list of op with r op defined, with or without missing test see this file: defined see this file
For the list of op with r op defined, with or without missing test
see this file: doc/library/tensor/basic.txt
For function to automatically test your Rop implementation, look at
the docstring of the functions: check_mat_rop_lop, check_rop_lop,
check_nondiff_rop,
"""
......@@ -41,7 +46,9 @@ class BreakRop(Op):
break_op = BreakRop()
class test_RopLop(unittest.TestCase):
class RopLop_checker(unittest.TestCase):
""" Don't peform any test, but provide the function to test the
Rop to class that inherit from it."""
def setUp(self):
# Using vectors make things a lot simpler for generating the same
......@@ -56,6 +63,8 @@ class test_RopLop(unittest.TestCase):
5+self.rng.randint(30))
def check_nondiff_rop(self, y):
""" If you op is not differentiable(so you can't define Rop)
test that an error is raised."""
raised = False
try:
tmp = tensor.Rop(y, self.x, self.v)
......@@ -67,6 +76,24 @@ class test_RopLop(unittest.TestCase):
' is not differentiable'))
def check_mat_rop_lop(self, y, out_shape):
""" Test the Rop/Lop when input is a matrix and the output is a vector
:param y: the output variable of the op applied to self.mx
:param out_shape: Used to generate a random tensor
corresponding to the evaluation point of the Rop
(i.e. the tensor with which you multiply the
Jacobian). It should be a tuple of ints.
If the Op have more then 1 input, one of them must be mx, the
other must be shared variable/constant. We will test only
again the input self.mx, so you must call
check_mat_rop_lop/check_rop_lop for the others input.
We expect all inputs/outputs have dtype floatX.
If you want to test an out with an output matrix, add a sum
after the Op you want to test.
"""
vx = numpy.asarray(self.rng.uniform(size=self.mat_in_shape), theano.config.floatX)
vv = numpy.asarray(self.rng.uniform(size=self.mat_in_shape), theano.config.floatX)
yv = tensor.Rop(y, self.mx, self.mv)
......@@ -97,9 +124,12 @@ class test_RopLop(unittest.TestCase):
v2 = scan_f(vx,vv)
assert numpy.allclose(v1,v2), ('LOP mismatch: %s %s' % (v1, v2))
def check_rop_lop(self, y, out_shape):
"""
As check_mat_rop_lop, except the input is self.x witch is a
vector. The output is still a vector.
"""
# TEST ROP
vx = numpy.asarray(self.rng.uniform(size=self.in_shape), theano.config.floatX)
vv = numpy.asarray(self.rng.uniform(size=self.in_shape), theano.config.floatX)
......@@ -138,6 +168,7 @@ class test_RopLop(unittest.TestCase):
assert numpy.allclose(v1,v2), ('LOP mismatch: %s %s' % (v1, v2))
class test_RopLop(RopLop_checker):
def test_shape(self):
self.check_nondiff_rop( self.x.shape[0])
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论