提交 78d2b9a7 authored 作者: Razvan Pascanu's avatar Razvan Pascanu

Updated the interface and documentation of scan following James suggestions; I…

Updated the interface and documentation of scan following James suggestions; I also implemented a map function using scan
上级 90cf38f3
......@@ -32,8 +32,7 @@ The equivalent Theano code would be
# Symbolic description of the result
result,updates = theano.scan(fn = lambda x_tm1,A: x_tm1*A,\
sequences = [], \
initial_states = T.ones_like(A),\
info_outputs = T.ones_like(A),\
non_sequences = A, \
n_steps = k)
......@@ -46,13 +45,12 @@ construct a function (using a lambda expression) that given `x_tm1` and
is the value of our output at time step ``t-1``. Therefore
``x_t`` (value of output at time `t`) is `A` times value of output
at `t-1`.
Next we assign an empy list to ``sequences`` (since we do not need to
iterate over anything) and initialize the output as a tensor with same
shape as A filled with ones. We give A as a non sequence parameter and
tell scan to iterate for k steps.
Next we initialize the output as a tensor with same
shape as A filled with ones. We give A to scan as a non sequence parameter and
specify the number of steps k to iterate over our lambda expression.
Scan will return a tuple, containing our result (``result``) and a
dictionary of updates ( empty for this example). Note that the result
dictionary of updates ( empty in this case). Note that the result
is not a matrix, but a 3D tensor containing the value of ``A**k`` for
each step. We want the last value ( after k steps ) so we compile
a function to return just that.
......@@ -69,8 +67,8 @@ that our RNN is defined as follows :
y(n) = W^{out} x(n- 3)
Note that this network is far away from a classical recurrent neural
network and might be in practice useless. The reason we defined as such
Note that this network is far from a classical recurrent neural
network and might be useless. The reason we defined as such
is to better ilustrate the features of scan.
In this case we have a sequence over which we need to iterate ``u``,
......@@ -89,12 +87,15 @@ construct a function that computes one iteration step :
return [x_t, y_t]
As naming convention for the variables we used ``a_tmb`` to mean ``a`` at
``t-b`` and ``a_tpb`` to be ``a`` at ``t+b``.
Note the order in which the parameters are given, and in which the
result is returned. Try to respect cronological order among
the taps ( time slices of sequences or outputs) used. In practice what
is crucial to happen for the computation to work is to give the slices
in the same order as provided in the ``sequence_taps``/``outputs_taps`` dictionaries and to have same
order of inputs here as when applying scan. Given that we have all
the taps ( time slices of sequences or outputs) used. For scan is crucial only
for the variables representing the different time taps to be in the same order
as the one in which these taps are given. Also, not only taps should respect
an order, but also variables, since this is how scan figures out what should
be represented by what. Given that we have all
the Theano variables needed we construct our RNN as follows :
.. code-block:: python
......@@ -106,12 +107,10 @@ the Theano variables needed we construct our RNN as follows :
# y[-1]
([x_vals, y_vals],updates) = theano.scan(fn = oneStep, \
sequences = [u], \
initial_states = [x0,y0], \
non_sequences = [W,W_in_1,W_in_2,W_feedback, W_out], \
sequences_taps = {0:[-4,0] },\
outputs_taps = {0:[-3,-1] },)
([x_vals, y_vals],updates) = theano.scan(fn = oneStep, \
sequences = dict(input = u, taps= [-4,-0]), \
info_outputs = [dict(initial = x0, taps = [-3,-1]),y0], \
non_sequences = [W,W_in_1,W_in_2,W_feedback, W_out])
# for second input y, scan adds -1 in output_taps by default
......@@ -153,7 +152,7 @@ the following:
sample = theano.tensor.vector()
values, updates = theano.scan( OneStep, [],sample, [], n_steps = 10 )
values, updates = theano.scan( OneStep, info_outputs = sample, n_steps = 10 )
gibbs10 = theano.function([sample], values[-1], updates = updates)
......@@ -177,7 +176,7 @@ afterwards. Look at this example :
.. code-block:: python
a = theano.shared(1)
values,updates = theano.scan( lambda : {a:a+1}, [],[],[], n_steps = 10 )
values,updates = theano.scan( lambda : {a:a+1}, n_steps = 10 )
In this case the lambda expression does not require any input parameters
and returns an update dictionary which tells how ``a`` should be updated
......
......@@ -60,12 +60,18 @@ def hash_listsDictsTuples(x):
pass
return hash_value
def _map(fn, sequences, non_sequences=[]):
#TODO
#UGLY HACK: instead of figuring out how many outputs there are, we
# will assume there are less than 100 of them
return scan(fn, sequences=sequences,
outputs_taps=dict([(i,[]) for i in xrange(100)]))
###################################
## Implement specific function calls : map, reduce, generate
def map(fn, sequences, non_sequences = [], n_steps =0, truncate_gradient = -1, \
go_backwards = False, mode = 'FAST_RUN'):
return scan(fn, sequences= sequences, non_sequences = non_sequences,
truncate_gradient = truncate_gradient, go_backwards = go_backwards,
mode = mode)
# CONSIDER ALTERNATE CALLING CONVENTIONS:
# simple:
......@@ -91,11 +97,13 @@ def _map(fn, sequences, non_sequences=[]):
# If the larger (in absolute values) the sequence_taps, the shorter the output
# right? If the sequence_taps = {0: [-10, 10]}, and I pass an input with 22
# rows, then the scan will output something of length <=2 right?
#
#
# ANSWER:
# Yes, actually it will be exactly 2 ( if there are no other constraints)
def scan(fn, sequences=[], initial_states=[], non_sequences=[], inplace_map={}, \
sequences_taps={}, outputs_taps = {}, n_steps = 0, \
truncate_gradient = -1, go_backwards = False,
def scan(fn, sequences=[], info_outputs=[], non_sequences=[],
n_steps = 0, truncate_gradient = -1, go_backwards = False,
mode = None):
'''Function that constructs and applies a Scan op
......@@ -108,14 +116,14 @@ def scan(fn, sequences=[], initial_states=[], non_sequences=[], inplace_map={},
should have the following order:
* all time slices of the first sequence (as given in the
``sequences`` list) ordered cronologically
``sequences`` list) ordered in the same fashion as the time taps provided
* all time slices of the second sequence (as given in the
``sequences`` list) ordered cronologically
``sequences`` list) ordered in the same fashion as the time taps provided
* ...
* all time slices of the first output (as given in the
``initial_state`` list) ordered cronologically
``initial_state`` list) ordered in the same fashion as the time taps provided
* all time slices of the second otuput (as given in the
``initial_state`` list) ordered cronologically
``initial_state`` list) ordered in the same fashion as the time taps provided
* ...
* all other parameters over which scan doesn't iterate given
......@@ -128,21 +136,50 @@ def scan(fn, sequences=[], initial_states=[], non_sequences=[], inplace_map={},
them as a tuple : (outputs, updates) or (updates, outputs).
Outputs can be just a theano expression if you have only one outputs or
a list of theano expressions. Updates can be given either as a list of
a list of theano expressions. Updates can be given either as a list of tuples or
as a dictionary. If you have a list of outputs, the order of these
should match that of their ``initial_states``.
:param sequences:
list of Theano variables over which scan needs to iterate.
:param initial_states:
list of Theano variables containing the initial state used for the
output. Note that if the function applied recursively uses only the
previous value of the output or none, this initial state should have
list of Theano variables or dictionaries containing Theano variables over which
scan needs to iterate. The reason you might want to wrap a certain Theano
variable in a dictionary is to provide auxiliary information about how to iterate
over that variable. For example this is how you specify that you want to use
several time slices of this sequence at each iteration step. The dictionary
should have the following keys :
* ``input`` -- Theano variable representing the sequence
* ``taps`` -- temporal taps to use for this sequence. They are given as a list
of ints, where a value ``k`` means that at iteration step ``t`` scan needs to
provide also the slice ``t+k`` The order in which you provide these int values
here is the same order in which the slices will be provided to ``fn``.
If you do not wrap a variable around a dictionary, scan will do it for you, under
the assumption that you use only one slice, defined as a tap of offset 0. This
means that at step ``t`` scan will provide the slice at position ``t``.
:param info_outputs:
list of Theano variables or dictionaries containing Theano variables used
to initialize the outputs of scan. As before (for ``sequences``) the reason
you would wrap a Theano variable in a dictionary is to provide additional
information about how scan should deal with that specific output. The dictionary
should contain the following keys:
* ``initial`` -- Theano variable containing the initial state of the output
* ``taps`` -- temporal taps to use for this output. The taps are given as a
list of ints (only negative .. since you can not use future values of outputs),
with the same meaning as for ``sequences`` (see above).
* ``inplace`` -- theano variable pointing to one of the input sequences; this
flag tells scan that the output should be computed in the memory spaced occupied
by that input sequence. Note that scan will only do this if allowed by the
rest of your computational graph.
If the function applied recursively uses only the
previous value of the output, the initial state should have
same shape as one time step of the output; otherwise, the initial state
should have the same number of dimension as output. This can easily be
understand through an example. For computing ``y[t]`` let assume that we
need ``y[t-1]``, ``y[t-2]`` and ``y(t-4)``. Through an abuse of
should have the same number of dimension as output. This is easily
understood through an example. For computing ``y[t]`` let us assume that we
need ``y[t-1]``, ``y[t-2]`` and ``y[t-4]``. Through an abuse of
notation, when ``t = 0``, we would need values for ``y[-1]``, ``y[-2]``
and ``y[-4]``. These values are provided by the initial state of ``y``,
which should have same number of dimension as ``y``, where the first
......@@ -150,52 +187,28 @@ def scan(fn, sequences=[], initial_states=[], non_sequences=[], inplace_map={},
case is 4. If ``init_y`` is the variable containing the initial state
of ``y``, then ``init_y[0]`` corresponds to ``y[-4]``, ``init_y[1]``
corresponds to ``y[-3]``, ``init_y[2]`` corresponds to ``y[-2]``,
``init_y[3]`` corresponds to ``y[-1]``. By default, scan is set to use
the last time step for each output.
``init_y[3]`` corresponds to ``y[-1]``. The default behaviour of scan is
the following :
* if you do not wrap an output in a dictionary, scan will wrap it for you
assuming that you use only the last step of the output ( i.e. it makes your tap
value list equal to [-1]) and that it is not computed inplace
* if you wrap an output in a dictionary but you do not provide any taps, but
you provide an initial state it will assume that you are using only a tap value
of -1
* if you wrap an output in a dictionary but you do not provide any initial state,
it assumes that you are not using any form of taps
:param non_sequences:
Parameters over which scan should not iterate. These parameters are
given at each time step to the function applied recursively.
:param inplace_map:
Dictionary describing outputs computed *inplace*. ``inplace_map`` is a
dictionary where keys are output indexes, and values are sequence
indexes. Assigning a value ``j`` to a key ``i`` means that output
number ``j`` will be computed inplace (in the same memory buffer) as the
input number ``i``.
:param sequences_taps:
Dictionary describing what slices of the input sequences scan should
use. At each step of the iteration you can use different slices of your
input sequences(called here taps), and this dictionary lets you define
exactly that. The keys of the dictionary are sequence indexes, the
values are list of numbers. Having the following entry ``i :
[k_1,k_2,k_3]``, means that at step ``t``, for sequence ``x``, that has
the index ``i`` in the list of sequences, you would use the values
``x[t+k_1]``, ``x[t+k_2]`` and ``x[t+k_3]``. ``k_1``, ``k_2``, ``k_3``
values can be positive or negative and the sequence for you request this
taps should be large enough to accomodate them. If in the chronological
order, ``k`` is the first past value of sequence ``x``, then index 0 of
``x`` will correspond to step ``k`` (if ``k`` is -3, then, abusing
notation ``x[0]`` will be seen by scan as ``x[-3]``). If you do not want
to use any taps for a given sequence you need to set the corresponding
entry in the dictionary to the empy list. By default, for each sequence
that is not represented in the dictionary scan will assume that the at
every step it needs to provide the current value of that sequence.
:param outputs_taps:
Dictionary describing what slices of the input sequences scan should
use. The ``outputs_taps`` are defined in an analogous way to
``sequences_taps``, just that the taps are for the outputs generated by
scan. As such they can only be negative, i.e. refer to past value of
outputs. By default scan will expect to use for any output the last time
step, if nothing else is specified.
:param n_steps:
Number of steps to iterate. Sometimes you want to either enforce a fixed
number of steps, or you might not even have any sequences you want to
iterate over, but rather just to repeat some computation for a fixed
number of steps. It can be a theano scalar or a number.
Number of steps to iterate. If this value is provided scan will run only for
this amount of steps (given that the input sequences are sufficiently long).
If there is no input sequence (for example in case of a generator network) scan
will iterate for this number of steps. It can be a theano scalar or a number.
:param truncate_gradient:
Number of steps to use in truncated BPTT. If you compute gradients
......@@ -221,10 +234,10 @@ def scan(fn, sequences=[], initial_states=[], non_sequences=[], inplace_map={},
else:
seqs = sequences
if not (type(initial_states) in (list,tuple)):
init_outs = [initial_states]
if not (type(info_outputs) in (list,tuple)):
info_outs = [info_outputs]
else:
init_outs = initial_states
info_outs = info_outputs
if not (type(non_sequences) in (list,tuple)):
non_seqs = [non_sequences]
......@@ -233,49 +246,85 @@ def scan(fn, sequences=[], initial_states=[], non_sequences=[], inplace_map={},
# compute number of sequences and number of seqs
# compute number of sequences and number of outputs
n_seqs = len(seqs)
n_init_outs = len(init_outs)
n_outs = len(info_outs)
# update sequences_taps[idx] to contain 0 if it is not defined
inplace_map = {}
sequences_taps = {}
outputs_taps = {}
# wrap sequences in a dictionary if they are not already
# in the same pass create a sequences_taps dictionary
for i in xrange(n_seqs):
if not sequences_taps.has_key(i):
sequences_taps.update({i:[0]})
# if input sequence is not actually used by the recursive function
elif sequences_taps[i] == []:
sequences_taps.__delitem__(i)
elif not (type(sequences_taps[i]) in (list,tuple)):
sequences_taps[i] = [sequences_taps[i]]
# update outputs_taps[idx] to contain -1 if it is not defined
for i in xrange(n_init_outs):
if not outputs_taps.has_key(i):
outputs_taps.update({i:[-1]})
elif outputs_taps[i] == []:
outputs_taps.__delitem__(i)
elif not(type(outputs_taps[i]) in (list,tuple)):
outputs_taps[i] = [outputs_taps[i]]
if not type(seqs[i]) == dict :
seqs[i] = dict(input=seqs[i], taps=[0])
# see if taps values are provided as a list
elif seqs[i].get('taps',None):
if not type(seqs[i]['taps']) in (tuple,list):
seqs[i]['taps'] = [seqs[i]['taps']]
else:
seqs[i][taps] = [0]
if seqs[i].get('taps',None):
sequences_taps[i] = seqs[i]['taps']
# wrap outputs info in a dictionary if they are not already
# in the same pass create a init_outs_taps dictionary and a inplace map
for i in xrange(n_outs):
if info_outs[i]:
if not type(info_outs[i]) == dict:
info_outs[i] = dict(initial=info_outs[i], taps = [-1])
# if there is no initial state but there are taps
elif (not info_outs[i].get('initial',None)) and(info_outs[i].get('taps',None)):
raise ValueError('If you are using slices of an output you need to '\
'provide a initial state for it', info_outs[i])
elif info_outs[i].get('initial',None) and (not info_outs[i].get('taps',None)):
info_outs[i]['taps'] = [-1]
else:
info_outs[i] = dict()
if info_outs[i].get('taps', None):
outputs_taps[i] = info_outs[i]['taps']
if info_outs[i].get('inplace', None):
# look for that variable to get the index
found = None
for k in xrange(n_seqs):
if seqs[k].get('input', None) == info_outs[i].get('inplace',None):
found = k
if found != None:
inplace_map[i] = k
else:
raise ValueError('Asked to compute in place of a non-input variable',\
info_outs[i].get('inplace', None))
# create theano inputs for the recursive function
args = []
_ins = 0
_outs = 0
for (i,seq) in enumerate(seqs):
if sequences_taps.has_key(i):
for k in xrange(len(sequences_taps[i])):
args += [seq[0].type() ]
_ins += 1
for (i,init_out) in enumerate(init_outs):
if outputs_taps.has_key(i):
for k in xrange(len(outputs_taps[i])):
if outputs_taps[i] == [-1]:
args += [init_out.type() ]
_outs += 1
else:
args += [init_out[0].type() ]
_outs += 1
# go through sequences picking up time slices as needed
for seq in seqs:
if seq.get('taps', None):
slices = [ seq['input'][0].type() for k in seq['taps'] ]
args += slices
_ins += len(seq['taps'])
# go through outputs picking up time slices as needed
for init_out in info_outs:
if init_out.get('taps', None) == [-1]:
args += [init_out['initial'].type()]
_outs += 1
elif init_out.get('taps',None):
if numpy.any(numpy.array(init_out.get('taps',[])) > 0):
raise ValueError('Can not use future taps of outputs', init_out)
slices = [ init_out['initial'][0].type() for k in init_out['taps'] ]
args += slices
_outs += len(init_out['taps'])
# remove shared variables from the non sequences list
noshared = []
for non_seq in non_seqs:
if not isinstance(non_seq, theano.compile.SharedVariable):
......@@ -331,26 +380,39 @@ def scan(fn, sequences=[], initial_states=[], non_sequences=[], inplace_map={},
ls_outputs = [ sout.variable for sout in dummy_f.maker.outputs]
update_map = {}
n_actual_outs = len(dummy_f.maker.outputs)
shared_outs = []
shared_non_seqs = []
givens = {}
# if the number of outputs to the function does not match the number of
# assumed outputs
if len(ls_outputs) != n_outs:
if info_outs == []:
# We know how to deal with this case, assume that none of the outputs
# are required to have any sort of time taps
# we just need to update the number of actual outputs
n_outs = len(ls_outputs)
else:
raise ValueError('There has been a terrible mistake in our input arguments'
' and scan is totally lost. Make sure that you indicate for every '
' output what taps you want to use, or None, if you do not want to '
' use any !')
ls_inputs=[inp.variable for inp in \
dummy_f.maker.expanded_inputs[:_ins+_outs]]
fromIdx = _ins + _outs
stored_steps_output = [ 0 for i in xrange(n_actual_outs)]
stored_steps_output = [ 0 for i in xrange(n_outs)]
# add shared variable that act as outputs
#
n_outs = n_actual_outs
n_outs_extended = n_outs
for inp in dummy_f.maker.expanded_inputs[fromIdx:] :
if isinstance(inp.variable, theano.compile.SharedVariable) and inp.update:
ls_inputs.append(inp.variable.type())
ls_outputs += [inp.update]
update_map[ inp.variable ] = n_outs
outputs_taps[ n_outs ] = [-1]
n_outs += 1
update_map[ inp.variable ] = n_outs_extended
outputs_taps[ n_outs_extended ] = [-1]
n_outs_extended += 1
stored_steps_output += [1]
shared_outs += [inp.variable]
givens[inp.variable] = ls_inputs[-1]
......@@ -365,15 +427,17 @@ def scan(fn, sequences=[], initial_states=[], non_sequences=[], inplace_map={},
ls_inputs.append(inp.variable)
# Create the Scan op object
local_op = Scan( (ls_inputs,ls_outputs, givens ), n_seqs, n_outs, inplace_map,
sequences_taps, outputs_taps, truncate_gradient,
local_op = Scan( (ls_inputs,ls_outputs, givens ), n_seqs, n_outs_extended,
inplace_map, sequences_taps, outputs_taps, truncate_gradient,
go_backwards, stored_steps_output, mode)
# Call the object on the input sequences, initial values for outs,
# and non sequences
unwrapped_seqs = [ seq.get('input',theano.tensor.as_tensor(0)) for seq in seqs ]
unwrapped_outs = [ out.get('initial',theano.tensor.as_tensor(0)) for out in info_outs ]
values = local_op( *( [theano.tensor.as_tensor(n_steps)] \
+ seqs \
+ init_outs \
+ unwrapped_seqs \
+ unwrapped_outs \
+ shared_outs \
+ noshared
+ shared_non_seqs))
......@@ -383,11 +447,11 @@ def scan(fn, sequences=[], initial_states=[], non_sequences=[], inplace_map={},
for k in update_map.keys():
update_map[k] = values [ update_map[k] ]
if n_actual_outs != n_outs :
if n_actual_outs == 1:
if n_outs != n_outs_extended :
if n_outs == 1:
values = values[0]
else:
values = values[:n_actual_outs]
values = values[:n_outs]
return (values, update_map)
......@@ -618,7 +682,7 @@ class Scan(theano.Op):
def scan(self,fn, args, n_seqs, n_outs, seqs_taps, outs_taps, n_steps,
def scan(self, fn, args, n_seqs, n_outs, seqs_taps, outs_taps, n_steps,
go_backwards, inplace_map):
y = []
......@@ -704,9 +768,21 @@ class Scan(theano.Op):
#update outputs
for j in xrange(n_outs):
if self.stored_steps_output[j] <1:
y[j][i] = something[j]
# if you have provided no size for the missing output you might find yourself
# here with a incorect array .. if that happens realocate memory for the needed
# array
try :
y[j][i] = something[j]
except :
y[j] = numpy.empty( (n_steps,)+something[j].shape , dtype =
something[j].dtype)
y[j][i] = something[j]
elif self.stored_steps_output[j] == 1:
y[j] = something[j]
try:
y[j] = something[j]
except:
y[j] = numpy.empty( something[j].shape, dtype = something[j].dtype)
y[j] = something[j]
else:
raise NotImplementedError('This will be implemented in the near future')
return y
......
......@@ -103,7 +103,7 @@ class T_Scan(unittest.TestCase):
# generator network, only one output , type scalar ; no sequence or
# non sequence arguments
def test_1(self):
def test_generator_one_output_scalar(self):
def f_pow2(x_tm1):
......@@ -117,7 +117,7 @@ class T_Scan(unittest.TestCase):
assert(compareArrays(f1(1,3), [2,4,8]))
# simple rnn, one input, one state, weights for each; input/state are
# vectors, weights are scalars
def test_2(self):
def test_one_sequence_one_output_weights(self):
def f_rnn(u_t,x_tm1,W_in, W):
......@@ -138,9 +138,9 @@ class T_Scan(unittest.TestCase):
# simple rnn, one input, one state, weights for each; input/state are
# vectors, weights are scalars; using shared variables
def test_3(self):
def test_one_sequence_one_output_weights_shared(self):
u = theano.tensor.dvector()
u = theano.tensor.dvector()
x0 = theano.tensor.dscalar()
W_in = theano.shared(.1, name = 'w_in')
W = theano.shared(1., name ='w')
......@@ -158,7 +158,7 @@ class T_Scan(unittest.TestCase):
# some rnn with multiple outputs and multiple inputs; other dimension
# instead of scalars/vectors
def test_4(self):
def test_multiple_inputs_multiple_outputs(self):
W_in2 = theano.shared(numpy.array([1.,2.]), name='win2')
W = theano.shared(numpy.array([[2.,1.],[1.,1.]]), name='w')
......@@ -191,7 +191,7 @@ class T_Scan(unittest.TestCase):
# simple rnn, one input, one state, weights for each; input/state are
# vectors, weights are scalars; using shared variables and past
# taps (sequences and outputs)
def test_5(self):
def test_using_taps_input_output(self):
u = theano.tensor.dvector()
x0 = theano.tensor.dvector()
......@@ -201,8 +201,8 @@ class T_Scan(unittest.TestCase):
def f_rnn_shared(u_tm2, x_tm1, x_tm2):
return u_tm2*W_in+x_tm1*W+x_tm2
Y, updates = theano.scan(f_rnn_shared, u,x0, [], \
sequences_taps = {0:[-2]}, outputs_taps = {0:[-1,-2]})
Y, updates = theano.scan(f_rnn_shared, dict(input=u, taps=-2),
dict(initial = x0, taps = [-1,-2]), [])
f7 = theano.function([u,x0], Y, updates = updates)
v_u = numpy.asarray([1.,2.,3.,4.])
......@@ -213,7 +213,7 @@ class T_Scan(unittest.TestCase):
# simple rnn, one input, one state, weights for each; input/state are
# vectors, weights are scalars; using shared variables and past
# taps (sequences and outputs) and future taps for sequences
def test_6(self):
def test_past_future_taps_shared(self):
u = theano.tensor.dvector()
x0 = theano.tensor.dvector()
......@@ -223,8 +223,8 @@ class T_Scan(unittest.TestCase):
def f_rnn_shared(u_tm2,u_tp2, x_tm1, x_tm2):
return (u_tm2+u_tp2)*W_in+x_tm1*W+x_tm2
Y,updts = theano.scan(f_rnn_shared, u,x0, [], \
sequences_taps = {0:[-2,2]}, outputs_taps = {0:[-1,-2]})
Y,updts = theano.scan(f_rnn_shared, dict( input = u, taps=[-2,2]),\
dict(initial = x0, taps = [-1,-2]), [])
f8 = theano.function([u,x0], Y, updates = updts)
v_u = numpy.array([1.,2.,3.,4.,5.,6.])
......@@ -234,7 +234,7 @@ class T_Scan(unittest.TestCase):
assert (compareArrays( out, f8(v_u, v_x0) ) )
# simple rnn ; compute inplace
def test_7(self):
def test_inplace(self):
u = theano.tensor.dvector()
mu = theano.Param( u, mutable = True)
......@@ -244,8 +244,7 @@ class T_Scan(unittest.TestCase):
def f_rnn_shared(u_t, x_tm1):
return u_t*W_in + x_tm1*W
Y, updts = theano.scan(f_rnn_shared, u, x0,[], \
inplace_map={0:0} )
Y, updts = theano.scan(f_rnn_shared, u, dict( initial = x0, inplace = u),[] )
f9 = theano.function([mu,x0], Y , updates = updts)
v_u = numpy.array([1.,2.,3.])
v_x0 = numpy.array(1.)
......@@ -257,7 +256,7 @@ class T_Scan(unittest.TestCase):
assert (compareArrays(v_u, out))
# Shared variable with updates
def test_8(self):
def test_shared_arguments_with_updates(self):
W1_vals = numpy.random.rand(20,30)
W2_vals = numpy.random.rand(30,20)
u1_vals = numpy.random.rand(3,20)
......@@ -266,11 +265,11 @@ class T_Scan(unittest.TestCase):
y1_vals = numpy.random.rand(20)
y2_vals = numpy.random.rand(30)
W1 = theano.shared(W1_vals)
W2 = theano.shared(W2_vals)
W1 = theano.shared(W1_vals,'W1')
W2 = theano.shared(W2_vals,'W2')
u1 = theano.shared(u1_vals)
y1 = theano.shared(y1_vals)
u1 = theano.shared(u1_vals,'u1')
y1 = theano.shared(y1_vals,'y1')
def f(u1_t, u2_t, y0_tm3, y0_tm2, y0_tm1, y1_tm1):
y0_t = theano.dot(theano.dot(u1_t,W1),W2) + 0.1*y0_tm1 + \
......@@ -279,18 +278,17 @@ class T_Scan(unittest.TestCase):
y2_t = theano.dot(u1_t, W1)
nwW1 = W1 + .1
nwW2 = W2 + .05
return ([y0_t, y1_t, y2_t], [(W1,nwW1), (W2, nwW2)])
return ([y0_t, y1_t, y2_t], [( W1,nwW1), (W2, nwW2)])
u2 = theano.tensor.matrix()
y0 = theano.tensor.matrix()
y2 = theano.tensor.vector()
u2 = theano.tensor.matrix('u2')
y0 = theano.tensor.matrix('y0')
Y,upds = theano.scan(f, [u1,u2], [y0,y1,y2],[], outputs_taps = {0:[-3,-2,-1], 2:[]})
Y,upds = theano.scan(f, [u1,u2], [ dict(initial = y0, taps = [-3,-2,-1]),y1, None])
f = theano.function([u2,y0,y2], Y, updates = upds)
f = theano.function([u2,y0], Y, updates = upds)
vls = f(u2_vals, y0_vals, y2_vals)
vls = f(u2_vals, y0_vals)
# do things in numpy
v_y0 = numpy.zeros((6,20))
......@@ -308,7 +306,7 @@ class T_Scan(unittest.TestCase):
vW1 = vW1 + .1
vW2 = vW2 + .05
def test_9(self):
def test_gibbs_chain(self):
W_vals = numpy.random.rand(20,30) -.5
vis_val = numpy.random.binomial(1,0.5, size=(3,20))
......@@ -331,8 +329,7 @@ class T_Scan(unittest.TestCase):
return trng.binomial(vsample.shape,1,vsample)
v_vals, updts = theano.scan(f, [], [vis],[], n_steps = 10,
sequences_taps = {}, outputs_taps = {})
v_vals, updts = theano.scan(f, [], [vis],[], n_steps = 10)
my_f = theano.function([vis], v_vals[-1], updates = updts)
......@@ -356,19 +353,16 @@ class T_Scan(unittest.TestCase):
assert (compareArrays(t_res, n_res))
def test_10(self):
s = theano.shared(1)
def test_only_shared_no_input_no_output(self):
def f_pow2():
return {s: 2*s}
n_steps = theano.tensor.dscalar()
Y, updts = theano.scan(f_pow2, [],[], [],n_steps = n_steps)
f1 = theano.function([n_steps], Y, updates = updts)
f1(3)
assert compareArrays(s.value, 8)
s = theano.shared(1)
def f_pow2():
return {s: 2*s}
n_steps = theano.tensor.dscalar()
Y, updts = theano.scan(f_pow2, [],[], [],n_steps = n_steps)
f1 = theano.function([n_steps], Y, updates = updts)
f1(3)
assert compareArrays(s.value, 8)
'''
# test gradient simple network
......@@ -386,14 +380,12 @@ class T_Scan(unittest.TestCase):
'''
def test_map_functionality(self):
raise SkipTest('Map functionality not implemented yet')
def f_rnn(u_t):
return u_t + 3
u = theano.tensor.dvector()
Y, updts = theano.scan(f_rnn, sequences=u, outputs_taps={0:[]})
Y, updts = theano.scan(f_rnn, u, [None])
f2 = theano.function([u], Y, updates = updts)
v_u = numpy.array([1.,2.,3.,4.])
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论