提交 dfdcd682 authored 作者: Xavier Bouthillier's avatar Xavier Bouthillier

Merge pull request #3318 from Thrandis/ccw

Numpy-like interface for stack.
...@@ -583,6 +583,20 @@ dimensions, see :meth:`_tensor_py_operators.dimshuffle`. ...@@ -583,6 +583,20 @@ dimensions, see :meth:`_tensor_py_operators.dimshuffle`.
:type n_ones: int :type n_ones: int
:type n_ones: number of dimension to be added to `x` :type n_ones: number of dimension to be added to `x`
.. function:: shape_padaxis(t, axis)
Reshape `t` by adding 1 at the dimension `axis`. Note that this new
dimension will be broadcastable. To make it non-broadcastable
see the :func:`unbroadcast`.
:type x: any TensorVariable (or compatible)
:param x: variable to be reshaped
:type axis: int
:param axis: axis where to add the new dimension to `x`
.. autofunction:: unbroadcast(x, *axes) .. autofunction:: unbroadcast(x, *axes)
.. autofunction:: addbroadcast(x, *axes) .. autofunction:: addbroadcast(x, *axes)
...@@ -678,6 +692,26 @@ Creating Tensor ...@@ -678,6 +692,26 @@ Creating Tensor
except for the main diagonal, whose values are equal to one. The output except for the main diagonal, whose values are equal to one. The output
will have same dtype as `x`. will have same dtype as `x`.
.. function:: stack(tensors, axis=0)
Warning: The interface stack(*tensors) is deprecated!
Return a Tensor representing for the arguments all stacked up into a single Tensor.
(of 1 rank greater).
:param tensors: a list or a tuple of one or more tensors of the same rank.
:param axis: the axis along which the tensors will be stacked. Default value is 0.
:returns: A tensor such that rval[0] == tensors[0], rval[1] == tensors[1], etc.
>>> x0 = T.scalar()
>>> x1 = T.scalar()
>>> x2 = T.scalar()
>>> x = T.stack([x0, x1, x2])
>>> x.ndim # x is a vector of length 3.
1
.. function:: stack(*tensors) .. function:: stack(*tensors)
Return a Tensor representing for the arguments all stacked up into a single Tensor. Return a Tensor representing for the arguments all stacked up into a single Tensor.
......
...@@ -856,7 +856,7 @@ def local_gpu_careduce(node): ...@@ -856,7 +856,7 @@ def local_gpu_careduce(node):
new_in_shp.append(x_shape[i]) new_in_shp.append(x_shape[i])
new_greduce = GpuCAReduce(new_mask, scalar_op) new_greduce = GpuCAReduce(new_mask, scalar_op)
reshaped_x = x.reshape(tensor.stack(*new_in_shp)) reshaped_x = x.reshape(tensor.stack(new_in_shp))
gpu_reshaped_x = as_cuda_ndarray_variable(reshaped_x) gpu_reshaped_x = as_cuda_ndarray_variable(reshaped_x)
reshaped_gpu_inputs = [gpu_reshaped_x] reshaped_gpu_inputs = [gpu_reshaped_x]
if new_greduce.supports_c_code(reshaped_gpu_inputs): if new_greduce.supports_c_code(reshaped_gpu_inputs):
...@@ -865,7 +865,7 @@ def local_gpu_careduce(node): ...@@ -865,7 +865,7 @@ def local_gpu_careduce(node):
if reduce_reshaped_x.ndim != out.ndim: if reduce_reshaped_x.ndim != out.ndim:
rval = reduce_reshaped_x.reshape( rval = reduce_reshaped_x.reshape(
tensor.stack(*shape_of[out])) tensor.stack(shape_of[out]))
else: else:
rval = reduce_reshaped_x rval = reduce_reshaped_x
else: else:
......
...@@ -595,7 +595,7 @@ def local_gpua_careduce(node): ...@@ -595,7 +595,7 @@ def local_gpua_careduce(node):
dtype=getattr(node.op, 'dtype', None), dtype=getattr(node.op, 'dtype', None),
acc_dtype=getattr(node.op, 'acc_dtype', None)) acc_dtype=getattr(node.op, 'acc_dtype', None))
reshaped_x = x.reshape(tensor.stack(*new_in_shp)) reshaped_x = x.reshape(tensor.stack(new_in_shp))
gpu_reshaped_x = gpu_from_host(reshaped_x) gpu_reshaped_x = gpu_from_host(reshaped_x)
gvar = greduce(gpu_reshaped_x) gvar = greduce(gpu_reshaped_x)
# We need to have the make node called, otherwise the mask can # We need to have the make node called, otherwise the mask can
...@@ -607,7 +607,7 @@ def local_gpua_careduce(node): ...@@ -607,7 +607,7 @@ def local_gpua_careduce(node):
if reduce_reshaped_x.ndim != node.outputs[0].ndim: if reduce_reshaped_x.ndim != node.outputs[0].ndim:
unreshaped_reduce = reduce_reshaped_x.reshape( unreshaped_reduce = reduce_reshaped_x.reshape(
tensor.stack(*shape_of[node.outputs[0]])) tensor.stack(shape_of[node.outputs[0]]))
else: else:
unreshaped_reduce = reduce_reshaped_x unreshaped_reduce = reduce_reshaped_x
return [unreshaped_reduce] return [unreshaped_reduce]
......
...@@ -3013,7 +3013,7 @@ class HStack(gof.op.Op): ...@@ -3013,7 +3013,7 @@ class HStack(gof.op.Op):
split = tensor.Split(len(inputs))(gz, 1, split = tensor.Split(len(inputs))(gz, 1,
tensor.stack( tensor.stack(
*[x.shape[1] [x.shape[1]
for x in inputs])) for x in inputs]))
if not isinstance(split, list): if not isinstance(split, list):
split = [split] split = [split]
...@@ -3094,7 +3094,7 @@ class VStack(HStack): ...@@ -3094,7 +3094,7 @@ class VStack(HStack):
split = tensor.Split(len(inputs))(gz, 0, split = tensor.Split(len(inputs))(gz, 0,
tensor.stack( tensor.stack(
*[x.shape[0] [x.shape[0]
for x in inputs])) for x in inputs]))
if not isinstance(split, list): if not isinstance(split, list):
split = [split] split = [split]
......
...@@ -185,7 +185,7 @@ def as_tensor_variable(x, name=None, ndim=None): ...@@ -185,7 +185,7 @@ def as_tensor_variable(x, name=None, ndim=None):
if isinstance(x, (tuple, list)) and python_any(isinstance(xi, Variable) if isinstance(x, (tuple, list)) and python_any(isinstance(xi, Variable)
for xi in x): for xi in x):
try: try:
return stack(*x) return stack(x)
except (TypeError, ValueError): except (TypeError, ValueError):
pass pass
...@@ -1682,7 +1682,7 @@ def smallest(*args): ...@@ -1682,7 +1682,7 @@ def smallest(*args):
a, b = args a, b = args
return switch(a < b, a, b) return switch(a < b, a, b)
else: else:
return min(stack(*args), axis=0) return min(stack(args), axis=0)
@constructor @constructor
...@@ -1697,7 +1697,7 @@ def largest(*args): ...@@ -1697,7 +1697,7 @@ def largest(*args):
a, b = args a, b = args
return switch(a > b, a, b) return switch(a > b, a, b)
else: else:
return max(stack(*args), axis=0) return max(stack(args), axis=0)
########################## ##########################
...@@ -3803,7 +3803,7 @@ class Join(Op): ...@@ -3803,7 +3803,7 @@ class Join(Op):
if 'float' in out_dtype or 'complex' in out_dtype: if 'float' in out_dtype or 'complex' in out_dtype:
# assume that this is differentiable # assume that this is differentiable
split = Split(len(tensors)) split = Split(len(tensors))
split_gz = split(gz, axis, stack(*[shape(x)[axis] split_gz = split(gz, axis, stack([shape(x)[axis]
for x in tensors])) for x in tensors]))
# If there is only one split, it might not be in a list. # If there is only one split, it might not be in a list.
if not isinstance(split_gz, list): if not isinstance(split_gz, list):
...@@ -3960,16 +3960,78 @@ def shape_padright(t, n_ones=1): ...@@ -3960,16 +3960,78 @@ def shape_padright(t, n_ones=1):
@constructor @constructor
def stack(*tensors): def shape_padaxis(t, axis):
"""Reshape `t` by adding 1 at the dimension `axis`.
See Also
--------
shape_padleft
shape_padright
Dimshuffle
"""
_t = as_tensor_variable(t)
ndim = _t.ndim + 1
if not -ndim <= axis < ndim:
msg = 'axis {0} is out of bounds [-{1}, {1})'.format(axis, ndim)
raise IndexError(msg)
if axis < 0:
axis += ndim
pattern = [i for i in xrange(_t.type.ndim)]
pattern.insert(axis, 'x')
return DimShuffle(_t.broadcastable, pattern)(_t)
@constructor
def stack(*tensors, **kwargs):
"""Insert the arguments as slices into a tensor of 1 rank greater. """Insert the arguments as slices into a tensor of 1 rank greater.
The size in dimension 0 of the result will be equal to the number The size in dimension `axis` of the result will be equal to the number
of tensors passed. of tensors passed.
Note: The interface stack(*tensors) is deprecated, you should use
stack(tensors, axis=0) insted.
Parameters
----------
tensors : list or tuple of tensors
A list of tensors to be stacked.
axis : int
The index of the new axis. Default value is 0.
""" """
if len(tensors) == 0: # ---> Remove this when moving to the new interface:
raise Exception('theano.tensor.stack(*tensors) must have at least' if not tensors and not kwargs:
raise Exception('theano.tensor.stack(tensors, axis) must have at least'
' one parameter') ' one parameter')
if not kwargs and not isinstance(tensors[0], (list, tuple)):
warnings.warn('stack(*tensors) interface is deprecated, use'
' stack(tensors, axis=0) instead.', DeprecationWarning,
stacklevel=3)
axis = 0
elif 'tensors' in kwargs:
tensors = kwargs['tensors']
if 'axis' in kwargs:
axis = kwargs['axis']
else:
axis = 0
else:
if len(tensors) == 2:
axis = tensors[1]
elif 'axis' in kwargs:
axis = kwargs['axis']
else:
axis = 0
tensors = tensors[0]
# <--- Until here.
if len(tensors) == 0:
raise Exception('tensors is empty. You should at least provide one'
' tensor to theano.tensor.stack(tensors, axis).')
# If all tensors are scalars of the same type, call make_vector. # If all tensors are scalars of the same type, call make_vector.
# It makes the graph simpler, by not adding DimShuffles and Rebroadcasts # It makes the graph simpler, by not adding DimShuffles and Rebroadcasts
...@@ -3991,7 +4053,7 @@ def stack(*tensors): ...@@ -3991,7 +4053,7 @@ def stack(*tensors):
tensors = list(map(as_tensor_variable, tensors)) tensors = list(map(as_tensor_variable, tensors))
dtype = scal.upcast(*[i.dtype for i in tensors]) dtype = scal.upcast(*[i.dtype for i in tensors])
return theano.tensor.opt.MakeVector(dtype)(*tensors) return theano.tensor.opt.MakeVector(dtype)(*tensors)
return join(0, *[shape_padleft(t, 1) for t in tensors]) return join(axis, *[shape_padaxis(t, axis) for t in tensors])
@constructor @constructor
...@@ -5662,7 +5724,7 @@ def stacklists(arg): ...@@ -5662,7 +5724,7 @@ def stacklists(arg):
""" """
if isinstance(arg, (tuple, list)): if isinstance(arg, (tuple, list)):
return stack(*list(map(stacklists, arg))) return stack(list(map(stacklists, arg)))
else: else:
return arg return arg
......
...@@ -83,7 +83,7 @@ class Fourier(gof.Op): ...@@ -83,7 +83,7 @@ class Fourier(gof.Op):
list(shape_a[axis.data + 1:])) list(shape_a[axis.data + 1:]))
else: else:
l = len(shape_a) l = len(shape_a)
shape_a = tensor.stack(*shape_a) shape_a = tensor.stack(shape_a)
out_shape = tensor.concatenate((shape_a[0: axis], [n], out_shape = tensor.concatenate((shape_a[0: axis], [n],
shape_a[axis + 1:])) shape_a[axis + 1:]))
n_splits = [1] * l n_splits = [1] * l
......
...@@ -365,7 +365,7 @@ def _infer_ndim_bcast(ndim, shape, *args): ...@@ -365,7 +365,7 @@ def _infer_ndim_bcast(ndim, shape, *args):
if len(pre_v_shape) == 0: if len(pre_v_shape) == 0:
v_shape = tensor.constant([], dtype='int32') v_shape = tensor.constant([], dtype='int32')
else: else:
v_shape = tensor.stack(*pre_v_shape) v_shape = tensor.stack(pre_v_shape)
elif shape is None: elif shape is None:
# The number of drawn samples will be determined automatically, # The number of drawn samples will be determined automatically,
......
...@@ -3380,7 +3380,7 @@ class T_Join_and_Split(unittest.TestCase): ...@@ -3380,7 +3380,7 @@ class T_Join_and_Split(unittest.TestCase):
a = as_tensor_variable(1) a = as_tensor_variable(1)
b = as_tensor_variable(2.0) b = as_tensor_variable(2.0)
c = tensor._shared(numpy.asarray(3.0, dtype=self.floatX)) c = tensor._shared(numpy.asarray(3.0, dtype=self.floatX))
s = stack(a, b, c) s = stack([a, b, c])
want = numpy.array([1, 2, 3]) want = numpy.array([1, 2, 3])
out = self.eval_outputs_and_check_vector([s], opt.MakeVector()) out = self.eval_outputs_and_check_vector([s], opt.MakeVector())
self.assertTrue((out == want).all()) self.assertTrue((out == want).all())
...@@ -3389,7 +3389,7 @@ class T_Join_and_Split(unittest.TestCase): ...@@ -3389,7 +3389,7 @@ class T_Join_and_Split(unittest.TestCase):
a = self.shared(numpy.asarray(1., dtype=self.floatX)) a = self.shared(numpy.asarray(1., dtype=self.floatX))
b = as_tensor_variable(2.) b = as_tensor_variable(2.)
c = as_tensor_variable(3.) c = as_tensor_variable(3.)
s = stack(a, b, c) s = stack([a, b, c])
want = numpy.array([1, 2, 3]) want = numpy.array([1, 2, 3])
out = self.eval_outputs_and_check_vector([s]) out = self.eval_outputs_and_check_vector([s])
...@@ -3401,7 +3401,7 @@ class T_Join_and_Split(unittest.TestCase): ...@@ -3401,7 +3401,7 @@ class T_Join_and_Split(unittest.TestCase):
to int64""" to int64"""
a = tensor.scalar('a', dtype=self.floatX) a = tensor.scalar('a', dtype=self.floatX)
b = tensor.scalar('b', dtype=self.floatX) b = tensor.scalar('b', dtype=self.floatX)
s = stack(a, b, a, b) s = stack([a, b, a, b])
f = function([a, b], s, mode=self.mode) f = function([a, b], s, mode=self.mode)
val = f(1, 2) val = f(1, 2)
# print val # print val
...@@ -3416,7 +3416,7 @@ class T_Join_and_Split(unittest.TestCase): ...@@ -3416,7 +3416,7 @@ class T_Join_and_Split(unittest.TestCase):
event when the scalar don't have the same dtype.''' event when the scalar don't have the same dtype.'''
a = tensor.iscalar('a') a = tensor.iscalar('a')
b = tensor.lscalar('b') b = tensor.lscalar('b')
s = stack(a, b, a, b) s = stack([a, b, a, b])
f = function([a, b], s, mode=self.mode) f = function([a, b], s, mode=self.mode)
val = f(1, 2) val = f(1, 2)
self.assertTrue(numpy.all(val == [1, 2, 1, 2])) self.assertTrue(numpy.all(val == [1, 2, 1, 2]))
...@@ -3432,7 +3432,7 @@ class T_Join_and_Split(unittest.TestCase): ...@@ -3432,7 +3432,7 @@ class T_Join_and_Split(unittest.TestCase):
b = tensor.lscalar('b') b = tensor.lscalar('b')
# test when the constant is the first element. # test when the constant is the first element.
# The first element is used in a special way # The first element is used in a special way
s = stack(10, a, b, numpy.int8(3)) s = stack([10, a, b, numpy.int8(3)])
f = function([a, b], s, mode=self.mode) f = function([a, b], s, mode=self.mode)
val = f(1, 2) val = f(1, 2)
self.assertTrue(numpy.all(val == [10, 1, 2, 3])) self.assertTrue(numpy.all(val == [10, 1, 2, 3]))
...@@ -3441,11 +3441,65 @@ class T_Join_and_Split(unittest.TestCase): ...@@ -3441,11 +3441,65 @@ class T_Join_and_Split(unittest.TestCase):
assert len([n for n in topo if isinstance(n, type(self.join_op))]) == 0 assert len([n for n in topo if isinstance(n, type(self.join_op))]) == 0
assert f.maker.fgraph.outputs[0].dtype == 'int64' assert f.maker.fgraph.outputs[0].dtype == 'int64'
def test_stack_new_interface(self):
"""Test the new numpy-like interface: stack(tensors, axis=0)."""
# Testing against old interface
warnings.simplefilter('always', DeprecationWarning)
a = tensor.imatrix('a')
b = tensor.imatrix('b')
s1 = stack(a, b)
s2 = stack([a, b])
f = function([a, b], [s1, s2], mode=self.mode)
v1, v2 = f([[1, 2]], [[3, 4]])
self.assertTrue(v1.shape == v2.shape)
self.assertTrue(numpy.all(v1 == v2))
# Testing axis parameter
s3 = stack([a, b], 1)
f = function([a, b], s3, mode=self.mode)
v3 = f([[1, 2]], [[3, 4]])
v4 = numpy.array([[[1, 2], [3, 4]]])
self.assertTrue(v3.shape == v4.shape)
self.assertTrue(numpy.all(v3 == v4))
# Testing negative axis
v1 = [[1, 2, 3], [4, 5, 6]]
v2 = [[7, 8, 9], [10, 11, 12]]
s = stack([a, b], axis=-1)
f = function([a, b], s, mode=self.mode)
v = numpy.zeros((2, 3, 2))
v[:,:,0] = v1
v[:,:,1] = v2
out = f(v1, v2)
self.assertTrue(v.shape == out.shape)
self.assertTrue(numpy.all(v == out))
s = stack([a, b], axis=-2)
f = function([a, b], s, mode=self.mode)
v = numpy.zeros((2, 2, 3))
v[:,0,:] = v1
v[:,1,:] = v2
out = f(v1, v2)
self.assertTrue(v.shape == out.shape)
self.assertTrue(numpy.all(v == out))
# Testing out-of-bounds axis
self.assertRaises(IndexError, stack, [a, b], 4)
self.assertRaises(IndexError, stack, [a, b], -4)
# Testing depreciation warning
with warnings.catch_warnings(record=True) as w:
s = stack(a, b)
assert len(w) == 1
assert issubclass(w[-1].category, DeprecationWarning)
with warnings.catch_warnings(record=True) as w:
s = stack([a, b])
s = stack([a, b], 1)
s = stack([a, b], axis=1)
s = stack(tensors=[a, b])
s = stack(tensors=[a, b], axis=1)
assert not w
def test_stack_hessian(self): def test_stack_hessian(self):
# Test the gradient of stack when used in hessian, see gh-1589 # Test the gradient of stack when used in hessian, see gh-1589
a = tensor.dvector('a') a = tensor.dvector('a')
b = tensor.dvector('b') b = tensor.dvector('b')
A = stack(a, b) A = stack([a, b])
B = A.T.dot(A) B = A.T.dot(A)
Ha, Hb = hessian(B.sum(), [a, b]) Ha, Hb = hessian(B.sum(), [a, b])
...@@ -3544,7 +3598,7 @@ class T_Join_and_Split(unittest.TestCase): ...@@ -3544,7 +3598,7 @@ class T_Join_and_Split(unittest.TestCase):
a = self.shared(numpy.array([1, 2, 3], dtype=self.floatX)) a = self.shared(numpy.array([1, 2, 3], dtype=self.floatX))
b = as_tensor_variable(numpy.array([7, 8, 9], dtype=self.floatX)) b = as_tensor_variable(numpy.array([7, 8, 9], dtype=self.floatX))
s = stack(a, b) s = stack([a, b])
want = numpy.array([[1, 2, 3], [7, 8, 9]]) want = numpy.array([[1, 2, 3], [7, 8, 9]])
out = self.eval_outputs_and_check_join([s]) out = self.eval_outputs_and_check_join([s])
self.assertTrue((out == want).all()) self.assertTrue((out == want).all())
...@@ -5971,7 +6025,7 @@ class test_tensordot(unittest.TestCase): ...@@ -5971,7 +6025,7 @@ class test_tensordot(unittest.TestCase):
def test_smallest_stack(): def test_smallest_stack():
sx, sy = dscalar(), dscalar() sx, sy = dscalar(), dscalar()
rval = inplace_func([sx, sy], stack(sx, sy))(-4.0, -2.0) rval = inplace_func([sx, sy], stack([sx, sy]))(-4.0, -2.0)
assert type(rval) == numpy.ndarray assert type(rval) == numpy.ndarray
assert [-4, -2] == list(rval) assert [-4, -2] == list(rval)
...@@ -6610,13 +6664,13 @@ def test_dimshuffle_duplicate(): ...@@ -6610,13 +6664,13 @@ def test_dimshuffle_duplicate():
class T_get_scalar_constant_value(unittest.TestCase): class T_get_scalar_constant_value(unittest.TestCase):
def test_get_scalar_constant_value(self): def test_get_scalar_constant_value(self):
a = tensor.stack(1, 2, 3) a = tensor.stack([1, 2, 3])
assert get_scalar_constant_value(a[0]) == 1 assert get_scalar_constant_value(a[0]) == 1
assert get_scalar_constant_value(a[1]) == 2 assert get_scalar_constant_value(a[1]) == 2
assert get_scalar_constant_value(a[2]) == 3 assert get_scalar_constant_value(a[2]) == 3
b = tensor.iscalar() b = tensor.iscalar()
a = tensor.stack(b, 2, 3) a = tensor.stack([b, 2, 3])
self.assertRaises(tensor.basic.NotScalarConstantError, get_scalar_constant_value, a[0]) self.assertRaises(tensor.basic.NotScalarConstantError, get_scalar_constant_value, a[0])
assert get_scalar_constant_value(a[1]) == 2 assert get_scalar_constant_value(a[1]) == 2
assert get_scalar_constant_value(a[2]) == 3 assert get_scalar_constant_value(a[2]) == 3
...@@ -6624,7 +6678,7 @@ class T_get_scalar_constant_value(unittest.TestCase): ...@@ -6624,7 +6678,7 @@ class T_get_scalar_constant_value(unittest.TestCase):
# For now get_scalar_constant_value goes through only MakeVector and Join of # For now get_scalar_constant_value goes through only MakeVector and Join of
# scalars. # scalars.
v = tensor.ivector() v = tensor.ivector()
a = tensor.stack(v, [2], [3]) a = tensor.stack([v, [2], [3]])
self.assertRaises(tensor.NotScalarConstantError, get_scalar_constant_value, a[0]) self.assertRaises(tensor.NotScalarConstantError, get_scalar_constant_value, a[0])
self.assertRaises(tensor.NotScalarConstantError, get_scalar_constant_value, a[1]) self.assertRaises(tensor.NotScalarConstantError, get_scalar_constant_value, a[1])
self.assertRaises(tensor.NotScalarConstantError, get_scalar_constant_value, a[2]) self.assertRaises(tensor.NotScalarConstantError, get_scalar_constant_value, a[2])
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论