提交 834ad203 authored 作者: Frederic's avatar Frederic

Implement the grad for inner dimensions in MaxAndArgMax.

上级 dba5815e
......@@ -1941,32 +1941,36 @@ class MaxAndArgmax(Op):
return [eval_points[0][arange(eval_points[0].shape[0]),
max_pos], None]
def grad(self, inp, grads):
# @warning: This only works if axis is 0, else the max is
# broadcasted wrong in the call to eq.
# @note: This function should work correctly for L{vector}s.
# (x, y), (gz, gw)
# gz*dz/dx + gw*dw/dx, gz*dz/dy + gw*dw/dy
# gMax * dMax/dx + gArgMax * dArgMax/dx, gMax * dMax/daxis + gArgMax * dArgMax/daxis
# g_max has one less dimension than x, so you need to complete g_max to x's shape
# when axis=0 the broadcasting mechanism does it automatically
# gMax * dMax/dx + gArgMax * dArgMax/dx,
# gMax * dMax/daxis + gArgMax * dArgMax/daxis
# g_max has one less dimension than x, so you need to complete
# g_max to x's shape when axis=0 the broadcasting mechanism
# does it automatically
x, axis = inp
g_max, g_max_idx = grads
if not ( axis.data == 0 or axis.data == x.ndim-1):
raise NotImplementedError('MaxAndArgmax gradient with axis corresponding to internal dimension')
if axis.data==0:
g_max_pad = shape_padleft(g_max)
else:
g_max_pad = shape_padright(g_max)
xmax = max(x, axis)
if axis.data==0:
xmax_pad = shape_padleft(xmax)
else:
xmax_pad = shape_padright(xmax)
# Raise the g_max and xmax to the same number of dim as the input.
pattern = []
out_dim = 0
for i in range(inp[0].ndim):
if i == axis.data:
pattern.append('x')
else:
pattern.append(out_dim)
out_dim += 1
g_max_pad = DimShuffle(g_max.broadcastable, pattern)(g_max)
xmax_pad = DimShuffle(xmax.broadcastable, pattern)(xmax)
# Set the grad to the correct position.
g_x = eq(xmax_pad, x) * g_max_pad
return g_x, None
def __str__(self):
return self.__class__.__name__
......
......@@ -1561,13 +1561,16 @@ class T_max_and_argmax(unittest.TestCase):
n = as_tensor_variable(data)
def check_grad_max(data, max_grad_data, axis=None):
"""
Why this is needed? verify_grad is not enought?
"""
#This work only for axis in [0,None]
assert axis in [0,None]
z = numpy.zeros_like(data)
z = z.flatten()
argmax=numpy.argmax(data,axis=axis)
if argmax.ndim==0:
z[numpy.argmax(data,axis=axis)]+=1
z[argmax]+=1
else:
for id,v in enumerate(argmax):
z[v*numpy.prod(data.shape[data.ndim-1:axis:-1])+id]+=1
......@@ -1592,6 +1595,14 @@ class T_max_and_argmax(unittest.TestCase):
utt.verify_grad(lambda v: max_and_argmax(v.flatten())[1], [data])
check_grad_max(data,eval_outputs(grad(max_and_argmax(n.flatten())[0],n)))
# Test 4d inner dimensions
data = numpy.random.rand(2, 3, 4, 5)
n = as_tensor_variable(data)
for i in [0, 1, 2, 3]:
utt.verify_grad(lambda v: max_and_argmax(v, axis=[i])[0], [data])
utt.verify_grad(lambda v: max_and_argmax(v, axis=[i])[1], [data])
class T_argmin_argmax(unittest.TestCase):
def setUp(self):
utt.seed_rng()
......
......@@ -3,7 +3,12 @@
Tests for the R operator / L operator
For the list of op with r op defined, with or without missing test see this file: defined see this file
For the list of op with r op defined, with or without missing test
see this file: doc/library/tensor/basic.txt
For function to automatically test your Rop implementation, look at
the docstring of the functions: check_mat_rop_lop, check_rop_lop,
check_nondiff_rop,
"""
......@@ -41,7 +46,9 @@ class BreakRop(Op):
break_op = BreakRop()
class test_RopLop(unittest.TestCase):
class RopLop_checker(unittest.TestCase):
""" Don't peform any test, but provide the function to test the
Rop to class that inherit from it."""
def setUp(self):
# Using vectors make things a lot simpler for generating the same
......@@ -56,6 +63,8 @@ class test_RopLop(unittest.TestCase):
5+self.rng.randint(30))
def check_nondiff_rop(self, y):
""" If you op is not differentiable(so you can't define Rop)
test that an error is raised."""
raised = False
try:
tmp = tensor.Rop(y, self.x, self.v)
......@@ -67,6 +76,24 @@ class test_RopLop(unittest.TestCase):
' is not differentiable'))
def check_mat_rop_lop(self, y, out_shape):
""" Test the Rop/Lop when input is a matrix and the output is a vector
:param y: the output variable of the op applied to self.mx
:param out_shape: Used to generate a random tensor
corresponding to the evaluation point of the Rop
(i.e. the tensor with which you multiply the
Jacobian). It should be a tuple of ints.
If the Op have more then 1 input, one of them must be mx, the
other must be shared variable/constant. We will test only
again the input self.mx, so you must call
check_mat_rop_lop/check_rop_lop for the others input.
We expect all inputs/outputs have dtype floatX.
If you want to test an out with an output matrix, add a sum
after the Op you want to test.
"""
vx = numpy.asarray(self.rng.uniform(size=self.mat_in_shape), theano.config.floatX)
vv = numpy.asarray(self.rng.uniform(size=self.mat_in_shape), theano.config.floatX)
yv = tensor.Rop(y, self.mx, self.mv)
......@@ -97,9 +124,12 @@ class test_RopLop(unittest.TestCase):
v2 = scan_f(vx,vv)
assert numpy.allclose(v1,v2), ('LOP mismatch: %s %s' % (v1, v2))
def check_rop_lop(self, y, out_shape):
"""
As check_mat_rop_lop, except the input is self.x witch is a
vector. The output is still a vector.
"""
# TEST ROP
vx = numpy.asarray(self.rng.uniform(size=self.in_shape), theano.config.floatX)
vv = numpy.asarray(self.rng.uniform(size=self.in_shape), theano.config.floatX)
......@@ -138,6 +168,7 @@ class test_RopLop(unittest.TestCase):
assert numpy.allclose(v1,v2), ('LOP mismatch: %s %s' % (v1, v2))
class test_RopLop(RopLop_checker):
def test_shape(self):
self.check_nondiff_rop( self.x.shape[0])
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论