提交 a72f34bc authored 作者: carriepl's avatar carriepl

Merge pull request #2362 from aalmah/ticket_2257

Ticket #2257: tile function with grad support
......@@ -4047,6 +4047,8 @@ def flatten(x, outdim=1):
class Tile(Op):
"""
DEPRECATED: use tile() instead.
Construct an array by repeating the input x according to reps pattern.
Tiles its input according to reps. The length of reps is the number of
......@@ -4069,6 +4071,9 @@ class Tile(Op):
return self.__class__.__name__ + "{ndim=%d}" % self.ndim
def make_node(self, x, reps):
warnings.warn((
"Tile op is deprecated, use tile function instead."),
stacklevel=3)
x = as_tensor_variable(x)
reps = as_tensor_variable(reps)
return gof.Apply(self, [x, reps], [tensor(x.type.dtype, [False] *
......@@ -4139,18 +4144,19 @@ def tile(x, reps, ndim=None):
raise ValueError("if specified, ndim must be equal to both x.ndim and "
"len(reps)")
if not hasattr(tile, 'op'):
tile.op = {}
if ndim is None:
ndim = len(reps)
# backport
# ndim = len(reps) if ndim is None else ndim
# not sure if len(shp) is going to work.
if ndim not in tile.op:
tile.op[ndim] = Tile(ndim)
return tile.op[ndim](x, reps)
reps = list(reps)
shape = [x.shape[i] for i in xrange(ndim)]
alloc_shape = reps + shape
y = alloc(x, *alloc_shape)
shuffle_ind = numpy.arange(ndim*2).reshape(2, ndim)
shuffle_ind = shuffle_ind.transpose().flatten()
y = y.dimshuffle(*shuffle_ind)
new_shapes = [sh*reps[i] for i, sh in enumerate(shape)]
y = y.reshape(new_shapes)
return y
class ARange(Op):
......
......@@ -4831,22 +4831,34 @@ def test_tile():
x_ = rng.randn(2, 4, 3).astype(config.floatX)
assert numpy.all(f(x_) == numpy.tile(x_, (2, 3, 4)))
# Test the four-dimensional case.
x = tensor4()
f = function([x], tile(x, (2, 3, 4, 6)))
x_ = rng.randn(2, 4, 3, 5).astype(config.floatX)
assert numpy.all(f(x_) == numpy.tile(x_, (2, 3, 4, 6)))
def test_tile_grad():
def grad_tile(x, reps, np_x):
y = tile(x, reps)
z = y.sum()
g = theano.function([x], grad(z, x))
grad_res = g(np_x)
# The gradient should be the product of the tiling dimensions
# (since the gradients are additive through the tiling operation)
assert numpy.all(grad_res == numpy.prod(reps))
# XXX: It turns out that almost no cases of the tile gradient actually work.
# This is a test that should pass if the proper implementation is filled in.
def test_tile_grad_3d():
# N.B.: we should also use verify_grad in this test.
raise SkipTest() # Remove me when this is implemented.
rng = numpy.random.RandomState(utt.fetch_seed())
w = rng.randn(3, 4, 2)
w_tiled = numpy.tile(w, (2, 3, 4))
x = tensor.tensor3()
c = (as_tensor_variable(w_tiled) * tile(x, (2, 3, 4))).sum()
f = function([x], grad(c, x))
x_ = rng.randn(3, 4, 2)
# The gradient should be w, multiplied by its tiling dimensions (since
# the gradients are additive through the tiling operation)
assert numpy.all(f(x_) == 2 * 3 * 4 * w)
# test vector
grad_tile(vector('x'), [3], rng.randn(5))
# test matrix
grad_tile(matrix('x'), [3, 4], rng.randn(2, 3))
# test tensor3
grad_tile(tensor3('x'), [3, 4, 5], rng.randn(2, 4, 3))
# test tensor4
grad_tile(tensor4('x'), [3, 4, 5, 6], rng.randn(2, 4, 3, 5))
class TestARange(unittest.TestCase):
......@@ -6865,21 +6877,22 @@ class TestInferShape(utt.InferShapeTester):
# [Reshape(ndim)(adtens4, aivec)],
# [adtens4_val, [1, 3, 10, 4]], Reshape)
# Tile
# Tile op is deprecated so the tile function doesn't use it
# anymore, we'll test here the op directly
advec = dvector()
advec_val = rand(5)
aivec_val = [3]
ndim = 1
self._compile_and_check([advec],
[tile(advec, aivec_val, ndim)],
[Tile(ndim)(advec, aivec_val)],
[advec_val], Tile)
admat = dmatrix()
admat_val = rand(2, 4)
aivec_val = [2, 3]
ndim = None
ndim = 2
self._compile_and_check([admat],
[tile(admat, aivec_val)],
[Tile(ndim)(admat, aivec_val)],
[admat_val], Tile)
adtens4 = dtensor4()
......@@ -6887,9 +6900,10 @@ class TestInferShape(utt.InferShapeTester):
aivec_val = [2, 3, 1, 4]
ndim = 4
self._compile_and_check([adtens4],
[tile(adtens4, aivec_val, ndim)],
[Tile(ndim)(adtens4, aivec_val)],
[adtens4_val], Tile)
class TestTensorInstanceMethods(unittest.TestCase):
def setUp(self):
self.vars = matrices('X', 'Y')
......
......@@ -51,6 +51,7 @@ from theano.tensor import (
join,
Subtensor,
TensorType,
Tile,
)
from theano.tensor.elemwise import DimShuffle
from theano.tests import unittest_tools as utt
......@@ -3061,6 +3062,24 @@ class Test_local_useless_alloc(unittest.TestCase):
op_classes = [node.op.__class__ for node in f.maker.fgraph.toposort()]
assert tensor.Alloc not in op_classes
def test2(self):
# Test that alloc never gets instantiated during optimization
mode = mode_opt.excluding('local_useless_alloc')
x = tensor.matrix('x')
y = tensor.tile(x, (1,)*2)
f = function([x], [y], mode=mode)
op_classes = [node.op.__class__ for node in f.maker.fgraph.toposort()]
print op_classes
# We are supposed to test if tensr.Alloc is not in op_classes,
# but since the proper proper optimization is not currently
# implemented it will fail. Once the correct optimization is in place,
# we have to change the following we should not see tensor.Alloc
# in op_classes and we have to change the assert.
assert tensor.Alloc in op_classes
class test_shapeoptimizer(unittest.TestCase):
def setUp(self):
......@@ -3404,6 +3423,8 @@ def test_local_mul_specialize():
class T_Tile(unittest.TestCase):
def test_local_useless_tile(self):
# Tile op is deprecated so the tile function doesn't use it
# anymore, we'll test here the op directly
v = T.vector()
m = T.matrix()
mode = None
......@@ -3412,7 +3433,7 @@ class T_Tile(unittest.TestCase):
for var, data in [(v, [1, 2, 3]), (m, [[1, 2], [3, 4]])]:
# Currently, only a repeat patter == ndim is supported.
for ndim in [var.ndim]: # range(1, var.ndim):
f = theano.function([var], T.tile(var, (1,)*ndim), mode=mode)
f = theano.function([var], Tile(ndim)(var, (1,)*ndim), mode=mode)
topo = f.maker.fgraph.toposort()
assert len(topo) == 1
assert isinstance(topo[0].op, compile.DeepCopyOp)
......@@ -3422,6 +3443,7 @@ class T_Tile(unittest.TestCase):
# replace it with a DimShuffle to add the extra parameter.
# But it isn't supported for now, so assert that we raise an
# error.
self.assertRaises(ValueError, T.tile, v, (1,)*(v.ndim+1))
# If the repeat parameter is shorter then m.ndim, it should
# pad tot he left the repeat patter with 1. It is not supported for now.
......@@ -3429,6 +3451,7 @@ class T_Tile(unittest.TestCase):
#topo = f.maker.fgraph.toposort()
#assert len(topo) == 1
#assert isinstance(topo[0].op, DimShuffe)
self.assertRaises(ValueError, T.tile, m, (1,)*(m.ndim-1))
#f = theano.function([var], T.tile(m, (1,)*(m.ndim-1)))
#topo = f.maker.fgraph.toposort()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论