提交 834ad203 authored 作者: Frederic's avatar Frederic

Implement the grad for inner dimensions in MaxAndArgMax.

上级 dba5815e
...@@ -1941,32 +1941,36 @@ class MaxAndArgmax(Op): ...@@ -1941,32 +1941,36 @@ class MaxAndArgmax(Op):
return [eval_points[0][arange(eval_points[0].shape[0]), return [eval_points[0][arange(eval_points[0].shape[0]),
max_pos], None] max_pos], None]
def grad(self, inp, grads): def grad(self, inp, grads):
# @warning: This only works if axis is 0, else the max is
# broadcasted wrong in the call to eq.
# @note: This function should work correctly for L{vector}s. # @note: This function should work correctly for L{vector}s.
# (x, y), (gz, gw) # (x, y), (gz, gw)
# gz*dz/dx + gw*dw/dx, gz*dz/dy + gw*dw/dy # gz*dz/dx + gw*dw/dx, gz*dz/dy + gw*dw/dy
# gMax * dMax/dx + gArgMax * dArgMax/dx, gMax * dMax/daxis + gArgMax * dArgMax/daxis # gMax * dMax/dx + gArgMax * dArgMax/dx,
# g_max has one less dimension than x, so you need to complete g_max to x's shape # gMax * dMax/daxis + gArgMax * dArgMax/daxis
# when axis=0 the broadcasting mechanism does it automatically # g_max has one less dimension than x, so you need to complete
# g_max to x's shape when axis=0 the broadcasting mechanism
# does it automatically
x, axis = inp x, axis = inp
g_max, g_max_idx = grads g_max, g_max_idx = grads
if not ( axis.data == 0 or axis.data == x.ndim-1):
raise NotImplementedError('MaxAndArgmax gradient with axis corresponding to internal dimension')
if axis.data==0:
g_max_pad = shape_padleft(g_max)
else:
g_max_pad = shape_padright(g_max)
xmax = max(x, axis) xmax = max(x, axis)
if axis.data==0:
xmax_pad = shape_padleft(xmax) # Raise the g_max and xmax to the same number of dim as the input.
else: pattern = []
xmax_pad = shape_padright(xmax) out_dim = 0
for i in range(inp[0].ndim):
if i == axis.data:
pattern.append('x')
else:
pattern.append(out_dim)
out_dim += 1
g_max_pad = DimShuffle(g_max.broadcastable, pattern)(g_max)
xmax_pad = DimShuffle(xmax.broadcastable, pattern)(xmax)
# Set the grad to the correct position.
g_x = eq(xmax_pad, x) * g_max_pad g_x = eq(xmax_pad, x) * g_max_pad
return g_x, None return g_x, None
def __str__(self): def __str__(self):
return self.__class__.__name__ return self.__class__.__name__
......
...@@ -1561,13 +1561,16 @@ class T_max_and_argmax(unittest.TestCase): ...@@ -1561,13 +1561,16 @@ class T_max_and_argmax(unittest.TestCase):
n = as_tensor_variable(data) n = as_tensor_variable(data)
def check_grad_max(data, max_grad_data, axis=None): def check_grad_max(data, max_grad_data, axis=None):
"""
Why this is needed? verify_grad is not enought?
"""
#This work only for axis in [0,None] #This work only for axis in [0,None]
assert axis in [0,None] assert axis in [0,None]
z = numpy.zeros_like(data) z = numpy.zeros_like(data)
z = z.flatten() z = z.flatten()
argmax=numpy.argmax(data,axis=axis) argmax=numpy.argmax(data,axis=axis)
if argmax.ndim==0: if argmax.ndim==0:
z[numpy.argmax(data,axis=axis)]+=1 z[argmax]+=1
else: else:
for id,v in enumerate(argmax): for id,v in enumerate(argmax):
z[v*numpy.prod(data.shape[data.ndim-1:axis:-1])+id]+=1 z[v*numpy.prod(data.shape[data.ndim-1:axis:-1])+id]+=1
...@@ -1592,6 +1595,14 @@ class T_max_and_argmax(unittest.TestCase): ...@@ -1592,6 +1595,14 @@ class T_max_and_argmax(unittest.TestCase):
utt.verify_grad(lambda v: max_and_argmax(v.flatten())[1], [data]) utt.verify_grad(lambda v: max_and_argmax(v.flatten())[1], [data])
check_grad_max(data,eval_outputs(grad(max_and_argmax(n.flatten())[0],n))) check_grad_max(data,eval_outputs(grad(max_and_argmax(n.flatten())[0],n)))
# Test 4d inner dimensions
data = numpy.random.rand(2, 3, 4, 5)
n = as_tensor_variable(data)
for i in [0, 1, 2, 3]:
utt.verify_grad(lambda v: max_and_argmax(v, axis=[i])[0], [data])
utt.verify_grad(lambda v: max_and_argmax(v, axis=[i])[1], [data])
class T_argmin_argmax(unittest.TestCase): class T_argmin_argmax(unittest.TestCase):
def setUp(self): def setUp(self):
utt.seed_rng() utt.seed_rng()
......
...@@ -3,7 +3,12 @@ ...@@ -3,7 +3,12 @@
Tests for the R operator / L operator Tests for the R operator / L operator
For the list of op with r op defined, with or without missing test see this file: defined see this file For the list of op with r op defined, with or without missing test
see this file: doc/library/tensor/basic.txt
For function to automatically test your Rop implementation, look at
the docstring of the functions: check_mat_rop_lop, check_rop_lop,
check_nondiff_rop,
""" """
...@@ -41,7 +46,9 @@ class BreakRop(Op): ...@@ -41,7 +46,9 @@ class BreakRop(Op):
break_op = BreakRop() break_op = BreakRop()
class test_RopLop(unittest.TestCase): class RopLop_checker(unittest.TestCase):
""" Don't peform any test, but provide the function to test the
Rop to class that inherit from it."""
def setUp(self): def setUp(self):
# Using vectors make things a lot simpler for generating the same # Using vectors make things a lot simpler for generating the same
...@@ -56,6 +63,8 @@ class test_RopLop(unittest.TestCase): ...@@ -56,6 +63,8 @@ class test_RopLop(unittest.TestCase):
5+self.rng.randint(30)) 5+self.rng.randint(30))
def check_nondiff_rop(self, y): def check_nondiff_rop(self, y):
""" If you op is not differentiable(so you can't define Rop)
test that an error is raised."""
raised = False raised = False
try: try:
tmp = tensor.Rop(y, self.x, self.v) tmp = tensor.Rop(y, self.x, self.v)
...@@ -67,6 +76,24 @@ class test_RopLop(unittest.TestCase): ...@@ -67,6 +76,24 @@ class test_RopLop(unittest.TestCase):
' is not differentiable')) ' is not differentiable'))
def check_mat_rop_lop(self, y, out_shape): def check_mat_rop_lop(self, y, out_shape):
""" Test the Rop/Lop when input is a matrix and the output is a vector
:param y: the output variable of the op applied to self.mx
:param out_shape: Used to generate a random tensor
corresponding to the evaluation point of the Rop
(i.e. the tensor with which you multiply the
Jacobian). It should be a tuple of ints.
If the Op have more then 1 input, one of them must be mx, the
other must be shared variable/constant. We will test only
again the input self.mx, so you must call
check_mat_rop_lop/check_rop_lop for the others input.
We expect all inputs/outputs have dtype floatX.
If you want to test an out with an output matrix, add a sum
after the Op you want to test.
"""
vx = numpy.asarray(self.rng.uniform(size=self.mat_in_shape), theano.config.floatX) vx = numpy.asarray(self.rng.uniform(size=self.mat_in_shape), theano.config.floatX)
vv = numpy.asarray(self.rng.uniform(size=self.mat_in_shape), theano.config.floatX) vv = numpy.asarray(self.rng.uniform(size=self.mat_in_shape), theano.config.floatX)
yv = tensor.Rop(y, self.mx, self.mv) yv = tensor.Rop(y, self.mx, self.mv)
...@@ -97,9 +124,12 @@ class test_RopLop(unittest.TestCase): ...@@ -97,9 +124,12 @@ class test_RopLop(unittest.TestCase):
v2 = scan_f(vx,vv) v2 = scan_f(vx,vv)
assert numpy.allclose(v1,v2), ('LOP mismatch: %s %s' % (v1, v2)) assert numpy.allclose(v1,v2), ('LOP mismatch: %s %s' % (v1, v2))
def check_rop_lop(self, y, out_shape): def check_rop_lop(self, y, out_shape):
"""
As check_mat_rop_lop, except the input is self.x witch is a
vector. The output is still a vector.
"""
# TEST ROP # TEST ROP
vx = numpy.asarray(self.rng.uniform(size=self.in_shape), theano.config.floatX) vx = numpy.asarray(self.rng.uniform(size=self.in_shape), theano.config.floatX)
vv = numpy.asarray(self.rng.uniform(size=self.in_shape), theano.config.floatX) vv = numpy.asarray(self.rng.uniform(size=self.in_shape), theano.config.floatX)
...@@ -138,6 +168,7 @@ class test_RopLop(unittest.TestCase): ...@@ -138,6 +168,7 @@ class test_RopLop(unittest.TestCase):
assert numpy.allclose(v1,v2), ('LOP mismatch: %s %s' % (v1, v2)) assert numpy.allclose(v1,v2), ('LOP mismatch: %s %s' % (v1, v2))
class test_RopLop(RopLop_checker):
def test_shape(self): def test_shape(self):
self.check_nondiff_rop( self.x.shape[0]) self.check_nondiff_rop( self.x.shape[0])
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论