提交 a72f34bc authored 作者: carriepl's avatar carriepl

Merge pull request #2362 from aalmah/ticket_2257

Ticket #2257: tile function with grad support
...@@ -4047,6 +4047,8 @@ def flatten(x, outdim=1): ...@@ -4047,6 +4047,8 @@ def flatten(x, outdim=1):
class Tile(Op): class Tile(Op):
""" """
DEPRECATED: use tile() instead.
Construct an array by repeating the input x according to reps pattern. Construct an array by repeating the input x according to reps pattern.
Tiles its input according to reps. The length of reps is the number of Tiles its input according to reps. The length of reps is the number of
...@@ -4069,6 +4071,9 @@ class Tile(Op): ...@@ -4069,6 +4071,9 @@ class Tile(Op):
return self.__class__.__name__ + "{ndim=%d}" % self.ndim return self.__class__.__name__ + "{ndim=%d}" % self.ndim
def make_node(self, x, reps): def make_node(self, x, reps):
warnings.warn((
"Tile op is deprecated, use tile function instead."),
stacklevel=3)
x = as_tensor_variable(x) x = as_tensor_variable(x)
reps = as_tensor_variable(reps) reps = as_tensor_variable(reps)
return gof.Apply(self, [x, reps], [tensor(x.type.dtype, [False] * return gof.Apply(self, [x, reps], [tensor(x.type.dtype, [False] *
...@@ -4139,18 +4144,19 @@ def tile(x, reps, ndim=None): ...@@ -4139,18 +4144,19 @@ def tile(x, reps, ndim=None):
raise ValueError("if specified, ndim must be equal to both x.ndim and " raise ValueError("if specified, ndim must be equal to both x.ndim and "
"len(reps)") "len(reps)")
if not hasattr(tile, 'op'):
tile.op = {}
if ndim is None: if ndim is None:
ndim = len(reps) ndim = len(reps)
reps = list(reps)
# backport shape = [x.shape[i] for i in xrange(ndim)]
# ndim = len(reps) if ndim is None else ndim alloc_shape = reps + shape
# not sure if len(shp) is going to work. y = alloc(x, *alloc_shape)
if ndim not in tile.op: shuffle_ind = numpy.arange(ndim*2).reshape(2, ndim)
tile.op[ndim] = Tile(ndim) shuffle_ind = shuffle_ind.transpose().flatten()
return tile.op[ndim](x, reps) y = y.dimshuffle(*shuffle_ind)
new_shapes = [sh*reps[i] for i, sh in enumerate(shape)]
y = y.reshape(new_shapes)
return y
class ARange(Op): class ARange(Op):
......
...@@ -4831,22 +4831,34 @@ def test_tile(): ...@@ -4831,22 +4831,34 @@ def test_tile():
x_ = rng.randn(2, 4, 3).astype(config.floatX) x_ = rng.randn(2, 4, 3).astype(config.floatX)
assert numpy.all(f(x_) == numpy.tile(x_, (2, 3, 4))) assert numpy.all(f(x_) == numpy.tile(x_, (2, 3, 4)))
# Test the four-dimensional case.
x = tensor4()
f = function([x], tile(x, (2, 3, 4, 6)))
x_ = rng.randn(2, 4, 3, 5).astype(config.floatX)
assert numpy.all(f(x_) == numpy.tile(x_, (2, 3, 4, 6)))
def test_tile_grad():
def grad_tile(x, reps, np_x):
y = tile(x, reps)
z = y.sum()
g = theano.function([x], grad(z, x))
grad_res = g(np_x)
# The gradient should be the product of the tiling dimensions
# (since the gradients are additive through the tiling operation)
assert numpy.all(grad_res == numpy.prod(reps))
# XXX: It turns out that almost no cases of the tile gradient actually work.
# This is a test that should pass if the proper implementation is filled in.
def test_tile_grad_3d():
# N.B.: we should also use verify_grad in this test.
raise SkipTest() # Remove me when this is implemented.
rng = numpy.random.RandomState(utt.fetch_seed()) rng = numpy.random.RandomState(utt.fetch_seed())
w = rng.randn(3, 4, 2)
w_tiled = numpy.tile(w, (2, 3, 4)) # test vector
x = tensor.tensor3() grad_tile(vector('x'), [3], rng.randn(5))
c = (as_tensor_variable(w_tiled) * tile(x, (2, 3, 4))).sum() # test matrix
f = function([x], grad(c, x)) grad_tile(matrix('x'), [3, 4], rng.randn(2, 3))
x_ = rng.randn(3, 4, 2) # test tensor3
# The gradient should be w, multiplied by its tiling dimensions (since grad_tile(tensor3('x'), [3, 4, 5], rng.randn(2, 4, 3))
# the gradients are additive through the tiling operation) # test tensor4
assert numpy.all(f(x_) == 2 * 3 * 4 * w) grad_tile(tensor4('x'), [3, 4, 5, 6], rng.randn(2, 4, 3, 5))
class TestARange(unittest.TestCase): class TestARange(unittest.TestCase):
...@@ -6865,21 +6877,22 @@ class TestInferShape(utt.InferShapeTester): ...@@ -6865,21 +6877,22 @@ class TestInferShape(utt.InferShapeTester):
# [Reshape(ndim)(adtens4, aivec)], # [Reshape(ndim)(adtens4, aivec)],
# [adtens4_val, [1, 3, 10, 4]], Reshape) # [adtens4_val, [1, 3, 10, 4]], Reshape)
# Tile # Tile op is deprecated so the tile function doesn't use it
# anymore, we'll test here the op directly
advec = dvector() advec = dvector()
advec_val = rand(5) advec_val = rand(5)
aivec_val = [3] aivec_val = [3]
ndim = 1 ndim = 1
self._compile_and_check([advec], self._compile_and_check([advec],
[tile(advec, aivec_val, ndim)], [Tile(ndim)(advec, aivec_val)],
[advec_val], Tile) [advec_val], Tile)
admat = dmatrix() admat = dmatrix()
admat_val = rand(2, 4) admat_val = rand(2, 4)
aivec_val = [2, 3] aivec_val = [2, 3]
ndim = None ndim = 2
self._compile_and_check([admat], self._compile_and_check([admat],
[tile(admat, aivec_val)], [Tile(ndim)(admat, aivec_val)],
[admat_val], Tile) [admat_val], Tile)
adtens4 = dtensor4() adtens4 = dtensor4()
...@@ -6887,9 +6900,10 @@ class TestInferShape(utt.InferShapeTester): ...@@ -6887,9 +6900,10 @@ class TestInferShape(utt.InferShapeTester):
aivec_val = [2, 3, 1, 4] aivec_val = [2, 3, 1, 4]
ndim = 4 ndim = 4
self._compile_and_check([adtens4], self._compile_and_check([adtens4],
[tile(adtens4, aivec_val, ndim)], [Tile(ndim)(adtens4, aivec_val)],
[adtens4_val], Tile) [adtens4_val], Tile)
class TestTensorInstanceMethods(unittest.TestCase): class TestTensorInstanceMethods(unittest.TestCase):
def setUp(self): def setUp(self):
self.vars = matrices('X', 'Y') self.vars = matrices('X', 'Y')
......
...@@ -51,6 +51,7 @@ from theano.tensor import ( ...@@ -51,6 +51,7 @@ from theano.tensor import (
join, join,
Subtensor, Subtensor,
TensorType, TensorType,
Tile,
) )
from theano.tensor.elemwise import DimShuffle from theano.tensor.elemwise import DimShuffle
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
...@@ -1723,7 +1724,7 @@ class test_local_subtensor_make_vector(unittest.TestCase): ...@@ -1723,7 +1724,7 @@ class test_local_subtensor_make_vector(unittest.TestCase):
assert len(prog) == 1 assert len(prog) == 1
assert isinstance(prog[0].op, theano.compile.ops.DeepCopyOp) assert isinstance(prog[0].op, theano.compile.ops.DeepCopyOp)
assert f(0, 1, 2) == 0 assert f(0, 1, 2) == 0
def test_slice_idx_stop(self): def test_slice_idx_stop(self):
x, y, z = tensor.lscalars('xyz') x, y, z = tensor.lscalars('xyz')
v = make_vector(x, y, z) v = make_vector(x, y, z)
...@@ -1735,7 +1736,7 @@ class test_local_subtensor_make_vector(unittest.TestCase): ...@@ -1735,7 +1736,7 @@ class test_local_subtensor_make_vector(unittest.TestCase):
assert len(prog[0].inputs) == 2 assert len(prog[0].inputs) == 2
r = f(0, 1, 2) r = f(0, 1, 2)
assert r[0] == 0 and r[1] == 1 assert r[0] == 0 and r[1] == 1
def test_slice_idx_step(self): def test_slice_idx_step(self):
x, y, z = tensor.lscalars('xyz') x, y, z = tensor.lscalars('xyz')
v = make_vector(x, y, z) v = make_vector(x, y, z)
...@@ -1747,7 +1748,7 @@ class test_local_subtensor_make_vector(unittest.TestCase): ...@@ -1747,7 +1748,7 @@ class test_local_subtensor_make_vector(unittest.TestCase):
assert len(prog[0].inputs) == 2 assert len(prog[0].inputs) == 2
r = f(0, 1, 2) r = f(0, 1, 2)
assert r[0] == 0 and r[1] == 2 assert r[0] == 0 and r[1] == 2
def test_AdvancedSubtensor1_idx(self): def test_AdvancedSubtensor1_idx(self):
x, y, z = tensor.lscalars('xyz') x, y, z = tensor.lscalars('xyz')
v = make_vector(x, y, z) v = make_vector(x, y, z)
...@@ -3061,6 +3062,24 @@ class Test_local_useless_alloc(unittest.TestCase): ...@@ -3061,6 +3062,24 @@ class Test_local_useless_alloc(unittest.TestCase):
op_classes = [node.op.__class__ for node in f.maker.fgraph.toposort()] op_classes = [node.op.__class__ for node in f.maker.fgraph.toposort()]
assert tensor.Alloc not in op_classes assert tensor.Alloc not in op_classes
def test2(self):
# Test that alloc never gets instantiated during optimization
mode = mode_opt.excluding('local_useless_alloc')
x = tensor.matrix('x')
y = tensor.tile(x, (1,)*2)
f = function([x], [y], mode=mode)
op_classes = [node.op.__class__ for node in f.maker.fgraph.toposort()]
print op_classes
# We are supposed to test if tensr.Alloc is not in op_classes,
# but since the proper proper optimization is not currently
# implemented it will fail. Once the correct optimization is in place,
# we have to change the following we should not see tensor.Alloc
# in op_classes and we have to change the assert.
assert tensor.Alloc in op_classes
class test_shapeoptimizer(unittest.TestCase): class test_shapeoptimizer(unittest.TestCase):
def setUp(self): def setUp(self):
...@@ -3404,6 +3423,8 @@ def test_local_mul_specialize(): ...@@ -3404,6 +3423,8 @@ def test_local_mul_specialize():
class T_Tile(unittest.TestCase): class T_Tile(unittest.TestCase):
def test_local_useless_tile(self): def test_local_useless_tile(self):
# Tile op is deprecated so the tile function doesn't use it
# anymore, we'll test here the op directly
v = T.vector() v = T.vector()
m = T.matrix() m = T.matrix()
mode = None mode = None
...@@ -3412,7 +3433,7 @@ class T_Tile(unittest.TestCase): ...@@ -3412,7 +3433,7 @@ class T_Tile(unittest.TestCase):
for var, data in [(v, [1, 2, 3]), (m, [[1, 2], [3, 4]])]: for var, data in [(v, [1, 2, 3]), (m, [[1, 2], [3, 4]])]:
# Currently, only a repeat patter == ndim is supported. # Currently, only a repeat patter == ndim is supported.
for ndim in [var.ndim]: # range(1, var.ndim): for ndim in [var.ndim]: # range(1, var.ndim):
f = theano.function([var], T.tile(var, (1,)*ndim), mode=mode) f = theano.function([var], Tile(ndim)(var, (1,)*ndim), mode=mode)
topo = f.maker.fgraph.toposort() topo = f.maker.fgraph.toposort()
assert len(topo) == 1 assert len(topo) == 1
assert isinstance(topo[0].op, compile.DeepCopyOp) assert isinstance(topo[0].op, compile.DeepCopyOp)
...@@ -3422,6 +3443,7 @@ class T_Tile(unittest.TestCase): ...@@ -3422,6 +3443,7 @@ class T_Tile(unittest.TestCase):
# replace it with a DimShuffle to add the extra parameter. # replace it with a DimShuffle to add the extra parameter.
# But it isn't supported for now, so assert that we raise an # But it isn't supported for now, so assert that we raise an
# error. # error.
self.assertRaises(ValueError, T.tile, v, (1,)*(v.ndim+1)) self.assertRaises(ValueError, T.tile, v, (1,)*(v.ndim+1))
# If the repeat parameter is shorter then m.ndim, it should # If the repeat parameter is shorter then m.ndim, it should
# pad tot he left the repeat patter with 1. It is not supported for now. # pad tot he left the repeat patter with 1. It is not supported for now.
...@@ -3429,6 +3451,7 @@ class T_Tile(unittest.TestCase): ...@@ -3429,6 +3451,7 @@ class T_Tile(unittest.TestCase):
#topo = f.maker.fgraph.toposort() #topo = f.maker.fgraph.toposort()
#assert len(topo) == 1 #assert len(topo) == 1
#assert isinstance(topo[0].op, DimShuffe) #assert isinstance(topo[0].op, DimShuffe)
self.assertRaises(ValueError, T.tile, m, (1,)*(m.ndim-1)) self.assertRaises(ValueError, T.tile, m, (1,)*(m.ndim-1))
#f = theano.function([var], T.tile(m, (1,)*(m.ndim-1))) #f = theano.function([var], T.tile(m, (1,)*(m.ndim-1)))
#topo = f.maker.fgraph.toposort() #topo = f.maker.fgraph.toposort()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论