提交 6a1ac4b0 authored 作者: Razvan Pascanu's avatar Razvan Pascanu

Scan now uses Rebroadcast

上级 9243f3ef
......@@ -450,13 +450,6 @@ TensorVariable
Returns a view of this tensor with permuted dimensions. Typically the
pattern will include the integers 0, 1, ... ndim-1, and any number of
'x' characters in dimensions where this tensor should be broadcasted.
Beside 'x' you can also use 'f' for dimension of length 1 which (from
the point of view of Theano) are not broadcastable. Note that for
Numpy, a dimension of length 1 is implicitly broadcastable, therefore
the same will be true for dimension marked with 'f' of the value of
the symbolic variable. Nonetheless, when you try to compile/write your
symbolic expression that tries to broadcast the dimension marked with
'f', Theano will catch it and will trow on exception.
See :func:`dimshuffle`.
......
......@@ -592,14 +592,15 @@ def scan(fn, sequences=[], outputs_info=[], non_sequences=[],
elif not isinstance(input.variable, SharedVariable):
inner_fn_inputs.append(input.variable)
n_fixed_steps = int(n_steps) if type(n_steps) in (float,int) else None
# check if it is actually a Theano constant
try :
n_fixed_steps = opt.get_constant_value(n_steps)
except:
n_fixed_steps = None
if type(n_steps) in (float,int):
n_fixed_steps = int(n_steps)
else:
# check if it is actually a Theano constant
try :
n_fixed_steps = opt.get_constant_value(n_steps)
except:
n_fixed_steps = None
print '>>> ',n_fixed_steps
if (n_steps == None or n_steps == numpy.inf or n_steps == numpy.nan) and n_seqs == 0 :
raise ValueError('Scan does not know for how many steps to iterate. '
'You need to provide the number of steps through the '
......@@ -1383,8 +1384,6 @@ class ScanRemoveFromGraph(Optimizer):
op = node.op
# If it is a scan Op
if isinstance(op, Scan) and op.n_fixed_steps != None:
print ':::::::::',op.n_fixed_steps
print '---------', abs(op.n_fixed_steps) < 2
if abs(op.n_fixed_steps) < 2:
# Step 1 replace the inputs of the inner function
# with the inputs of scan
......@@ -1498,8 +1497,7 @@ class ScanRemoveFromGraph(Optimizer):
for idx in xrange(len(my_outs)):
t = my_outs[idx]
p = ['f'] + [i for i in range(t.type.ndim)]
nwout = elemwise.DimShuffle(t.broadcastable,p)(t)
nwout = tensor.Rebroadcast((0,False))(tensor.shape_padleft(t))
env.replace(node.outputs[idx],nwout)
# we are done ...
......
......@@ -904,18 +904,15 @@ class _tensor_py_operators:
def dimshuffle(self, *pattern):
"""Reorder the dimensions of this variable, optionally inserting broadcasted dimensions.
:param pattern: list/tuple of int mixed with 'x' for broadcastable dimensions and 'f' for
non-broadcastable dimensions
:param pattern: list/tuple of int mixed with 'x' for broadcastable dimensions
For example, to create a 3D view of a [2D] matrix, call ``dimshuffle([0,'x',1])``. This
will create a 3D view such that the middle dimension is an implicit broadcasted
dimension. To do the same thing on the transpose of that matrix, call ``dimshuffle([1,
'x', 0])``. 'f' behaves exactly like 'x' (i.e. adds a 1-length dimension at that position)
just that from the viewpoint of Theano this dimension is not broadcastable.
'x', 0])``.
This function supports the pattern passed as a tuple, or as a variable-length argument (e.g. ``a.dimshuffle(pattern)`` is equivalent to ``a.dimshuffle(*pattern)`` where ``pattern`` is a list/tuple of ints mixed with 'x' characters).
For more information, see `DimShuffle`.
"""
if (len(pattern) == 1) and (isinstance(pattern[0], (list, tuple))):
......
......@@ -43,17 +43,6 @@ class DimShuffle(Op):
dimension and a numerical index represents the dimension of the same
rank in the tensor passed to perform.
Note (2.04.2010 RP) Added 'f' - means that we insert a non-broadcastable
dimension; 'f'. This is useful because Theano in some cases is strongly
typed, and will not allow you to replace (in an optimization for example),
identical tensors, where the broadcastable patterns differ. Note that numpy
does not offer this option (from what I have researched), a dimension of 1
means automatically that it is broadcastable. This will be true for the
value of the Theano variable as well on the fixed dimension (the one with 'f').
However, when you express your computation symbolically Theano should catch
the fact that you will try to broadcast an unbroadcastable dimension and will
not allow you (throw an exception ..).
Examples:
DimShuffle((False, False, False), ['x', 2, 'x', 0, 1])
......@@ -102,9 +91,6 @@ class DimShuffle(Op):
If new_order[i] is 'x', the output's ith dimension will
be 1 and Broadcast operations will be allowed to do broadcasting
over that dimension.
if new_order[i] is 'f' the outputs's ith dimension will
be 1 and Broadcast operations will not be allowed to do broadcasting
over that dimension.
If input.broadcastable[i] == False then i must be found in new_order.
Broadcastable dimensions, on the other hand, can be discarded.
......@@ -134,10 +120,10 @@ class DimShuffle(Op):
# transposition of non-broadcastable dimensions
# This is how the dimensions will be permuted, without accounting for the extra
# 'x' broadcastable dimensions to insert.
self.shuffle = [i2j[x] for x in new_order if x != 'x' and x != 'f']
self.shuffle = [i2j[x] for x in new_order if x != 'x']
# list of dimensions of the output that are broadcastable and were not in the original input
self.augment = [i for i, x in enumerate(new_order) if x == 'x' or x == 'f']
self.augment = [i for i, x in enumerate(new_order) if x == 'x']
if self.inplace:
self.view_map = {0: [0]}
......@@ -161,8 +147,6 @@ class DimShuffle(Op):
for value in self.new_order:
if value == 'x':
ob.append(True)
elif value == 'f':
ob.append(False)
else:
ob.append(ib[value])
......@@ -251,7 +235,7 @@ class DimShuffle(Op):
shape_statements = ['npy_intp dimensions[%i]'%nd_out]
for i, o in enumerate(self.new_order):
if o != 'x' and o != 'f':
if o != 'x':
shape_statements += [('dimensions['+str(i)+'] = %(basename)s->dimensions['+str(o)+']')]
else:
shape_statements += [('dimensions['+str(i)+'] = 1')]
......@@ -266,7 +250,7 @@ class DimShuffle(Op):
#set the strides of the non-broadcasted dimensions
for i, o in enumerate(self.new_order):
if o != 'x' and o != 'f':
if o != 'x':
strides_statements += [('strides['+str(i)+'] = %(basename)s->strides['+str(o)+']')]
else:
strides_statements += [('strides['+str(i)+'] = 0')]
......@@ -333,7 +317,7 @@ class DimShuffle(Op):
gz = as_tensor_variable(gz)
grad_order = ['x'] * len(x.type.broadcastable)
for i, v in enumerate(self.new_order):
if v != 'x' and v !='f':
if v != 'x':
grad_order[v] = i
return [DimShuffle(gz.type.broadcastable, grad_order, inplace=True)(Elemwise(scalar.identity)(gz))]
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论