提交 9563676f authored 作者: Razvan Pascanu's avatar Razvan Pascanu

Deleted sandbox versions

上级 edeb4be8
"""Provide Scan and related functions
Scanning a function over sequential input(s) producing sequential output(s).
Scanning is a general form of recurrence, which can be used for looping.
The idea is that you 'scan' a function along some input sequence, producing
an output at each time-step that can be seen (but not modified) by the
function at the next time-step. (Technically, the function can see the
previous K time-steps of your outputs and L time steps of your inputs,
future and past.)
So for example, ``sum()`` could be computed by scanning the ``z+x_i``
function over a list, given an initial state of ``z=0``.
Special cases:
- A ``reduce()`` operation can be performed by returning only the last
output of a scan.
- A ``map()`` operation can be performed by applying a function that
ignores each previous output.
Often a for loop can be expressed as a scan() operation, and scan is the
closest that theano comes to looping.
This module provides scanning functionality with the `Scan` Op.
"""
__docformat__ = 'restructedtext en'
import theano
from theano.tensor import opt
from theano import gof
from theano.compile import optdb
# Logging function for sending warning or info
import logging
_logger = logging.getLogger('theano.scan')
def warning(*msg):
_logger.warning('WARNING theano.scan: '+' '.join(msg))
def info(*msg):
_logger.info('INFO theano.scan: '+' '.join(msg))
# Hashing a dictionary or a list or a tuple or any type that is hashable with
# the hash() function
def hash_listsDictsTuples(x):
hash_value = 0
if type(x) == dict :
for k,v in x.iteritems():
hash_value ^= hash_listsDictsTuples(k)
hash_value ^= hash_listsDictsTuples(v)
elif type(x) in (list,tuple):
for v in x:
hash_value ^= hash_listsDictsTuples(v)
else:
try:
hash_value ^= hash(x)
except:
pass
return hash_value
def scan(fn, sequences, initial_states, non_sequences, inplace_map={},
sequences_taps={}, outputs_taps = {},
n_steps = theano.tensor.zero(),
truncate_gradient = -1, go_backwards = False, mode = 'FAST_RUN'):
'''Function that constructs and applies a scan op
:param fn: given variables representing all the slices of input and
past values of outputs and other non sequences parameters, fn should
produce variables describing the output of one time step of scan.
The order in which the argument to this function are given is very
important. You should have the following order:
* all time slices of the first sequence (as given in the ``sequences``
list) ordered cronologically
* all time slices of the second sequence (as given in the
``sequences`` list) ordered cronologically
..
* all time slices of the first output (as given in the
``initial_state`` list) ordered cronologically
* all time slices of the second otuput (as given in the
``initial_state`` list) ordered cronologically
...
* all other parameters over which scan doesn't iterate given in
the same order as in ``non_sequences``
The outputs of these function should have the same order as in the list
``initial_states`
:param sequences: list of Theano variables over which scan needs to
iterate
:param initial_states: list of Theano variables containing the initial
state used for the output. Note that if the function applied recursively
uses only the previous value of the output or none, this initial state
should have same shape as one time step of the output; otherwise, the
initial state should have the same number of dimension as output. This
can easily be understand through an example. For computing y(t) let
assume that we need y(t-1), y(t-2) y(t-4). Through an abuse of notation,
when t = 0, we would need values for y(-1), y(-2) and y(-4). These values
are provided by the initial state of y, which should have same number
of dimension as y, where the first dimension should be 4 in this case.
If init_y is the initial values of y, then init_y[0] corresponds to
y[-4], init_y[1] corresponds to y[-3], init_y[2] corresponds to y[-2],
init_y[3] corresponds to y[-1]. By default, scan is set to use the
last time step for each output.
:param non_sequences: These are parameters used by the recursive function
over which scan shouldn't iterate
:param inplace_map: It is a dictionary where keys are output indexes,
and values are sequence indexes. Assigning to a key a value, means that
the output represented by key will be computed inplace (in the same
memory buffer) as the input represented by the value
:param sequences_taps: At each step you can use different time slices
of sequences, and this dictionary lets you define exactly that. The
keys of the dictionary are sequence indexes, the values are list of
numbers. Having the following entry : i : [t_1,t_2,t_3], means that
at time step k, for sequence x, that has the index i in the list of
sequences, you would use the values x[k+t_1], x[k+t_2], x[k+t_3].
t_1, t_2, t_3 values can be positive or negative. If you do not want
to use any time slice of the sequence you need to give to that entry
the empy list. By default, for each entry the dictionary will contain
the list [0].
:param outputs_taps: This has the same meaning as the parameter
sequences_taps, with the only differences that these taps or for
the outputs, and that they have to be negative (smaller than 0). To
enforce an output to not use any past values, you have the specify
in the dictionary for that entry the emty list, otherwise, by default,
scan will expect to use the last time step
:param n_steps: in case you do not have any sequences over which you want
to iterate, but rather apply some set of computation for a number of
steps, or when you want to restrict to a certain length, you provide
that length as n_steps. It can be a theano scalar or a value.
:param truncate_gradient: if you compute gradients through a scan op,
this can be computed using backpropagation through time. As such you
have the option to truncate the BPTT to a given number of steps (to
increase speed). If set to -1 no truncation is done.
:param go_backwards: This give you the option to move backwards through
your sequences instead of forward
'''
# check if inputs are just single variables instead of lists
if not (type(sequences) in (list, tuple)):
seqs = [sequences]
else:
seqs = sequences
if not (type(initial_states) in (list,tuple)):
init_outs = [initial_states]
else:
init_outs = initial_states
if not (type(non_sequences) in (list,tuple)):
non_seqs = [non_sequences]
else:
non_seqs = non_sequences
# compute number of sequences and number of seqs
n_seqs = len(seqs)
n_outs = len(init_outs)
# update sequences_taps[idx] to contain 0 if it is not defined
for i in xrange(n_seqs):
if not sequences_taps.has_key(i):
sequences_taps.update({i:[0]})
# if input sequence is not actually used by the recursive function
elif sequences_taps[i] == []:
sequences_taps.__delitem__(i)
elif not (type(sequences_taps[i]) in (list,tuple)):
sequences_taps[i] = [sequences_taps[i]]
# update outputs_taps[idx] to contain -1 if it is not defined
for i in xrange(n_outs):
if not outputs_taps.has_key(i):
outputs_taps.update({i:[-1]})
elif outputs_taps[i] == []:
outputs_taps.__delitem__(i)
elif not(type(outputs_taps[i]) in (list,tuple)):
outputs_taps[i] = [outputs_taps[i]]
'''
# update keep_outputs list
for i in xrange(n_outs):
if not keep_outputs.has_key(i):
keep_outputs[i] = True
elif not keep_outputs[i]:
if outputs_taps[i] != [-1]:
keep_outputs[i] = True
warning('You need to keep past value of outputs if you use'\
'past taps of output different from -1')
'''
keep_outputs = [ 0 for i in xrange(n_outs)]
# create theano inputs for the recursive function
args = []
for (i,seq) in enumerate(seqs):
if sequences_taps.has_key(i):
for k in xrange(len(sequences_taps[i])):
args += [seq[0].type() ]
for (i,init_out) in enumerate(init_outs):
if outputs_taps.has_key(i):
for k in xrange(len(outputs_taps[i])):
if outputs_taps[i] == [-1]:
args += [init_out.type() ]
else:
args += [init_out[0].type() ]
args += non_seqs
next_outs = fn(*args)
if not (type(next_outs) in (list,tuple)):
next_outs = [next_outs]
# Create the Scan op object
local_op = Scan( (args,next_outs), n_seqs,n_outs,inplace_map,
sequences_taps, outputs_taps, truncate_gradient,
go_backwards, keep_outputs, mode)
# Call the object on the input sequences, initial values for outs,
# and non sequences
return local_op( *( [theano.tensor.as_tensor(n_steps)] \
+ seqs \
+ init_outs \
+ non_seqs))
class Scan(theano.Op):
def __init__(self,(inputs, outputs),n_seqs, n_outs,
inplace_map={}, seqs_taps={}, outs_taps={},
truncate_gradient = -1,
go_backwards = False, keep_outputs = {},
mode = 'FAST_RUN', inplace=False):
'''
:param (inputs,outputs): inputs and outputs Theano variables that
describe the function that is applied recursively
:param n_seqs: number of sequences over which scan will have to iterate
:param n_outs: number of outputs of the scan op
:param inplace_map: see scan function above
:param seqs_taps: see scan function above
:param outs_taps: see scan function above
:param truncate_gradient: number of steps after which scan should truncate
-1 implies no truncation
:param go_bacwards: see scan funcion above
:param keep_outputs: a list of booleans of same size as the number of
outputs; the value at position ``i`` in the list corresponds to the
``i-th`` output, and it tells how many steps (from the end towards
the begining) of the outputs you really need and should return;
given this information, scan can know (if possible) to allocate only
the amount of memory needed to compute that many entries
'''
# check inplace map
for _out,_in in inplace_map.iteritems():
if _out > n_outs:
raise ValueError(('Inplace map reffers to an unexisting'\
'output %d')% _out)
if _in > n_seqs:
raise ValueError(('Inplace map reffers to an unexisting'\
'input sequence %d')%_in)
if (_in >= 0) and (min(seqs_taps[_in]) < 0):
raise ValueError(('Input sequence %d uses past values that '\
'will be overwritten by inplace operation')%_in)
#check sequences past taps
for k,v in seqs_taps.iteritems():
if k > n_seqs:
raise ValueError(('Sequences past taps dictionary reffers to '
'an unexisting sequence %d')%k)
#check outputs past taps
for k,v in outs_taps.iteritems():
if k > n_outs:
raise ValueError(('Sequences past taps dictionary reffers to '
'an unexisting sequence %d')%k)
if max(v) > -1:
raise ValueError(('Can not require future value %d of output' \
' %d')%(k,max(v)))
self.destroy_map = {}
if inplace:
for i in inplace_map.keys():
self.destroy_map.update({i: [inplace_map[i]] } )
self.seqs_taps = seqs_taps
self.outs_taps = outs_taps
self.n_seqs = n_seqs
self.n_outs = n_outs
self.n_args = n_seqs+n_outs+1
self.inplace_map = inplace_map
self.keep_outputs = keep_outputs
self.inplace = inplace
self.inputs = inputs
self.outputs = outputs
self.truncate_gradient = truncate_gradient
self.go_backwards = go_backwards
self.fn = theano.function(inputs,outputs, mode = mode)
g_y = [outputs[0].type()]
def compute_gradient(y, g_y):
gmap = theano.gradient.grad_sources_inputs( \
[(y,g_y)], theano.gof.graph.inputs([y]), False)
def zero(p):
return theano.tensor.TensorConstant(theano.tensor.TensorType(\
dtype=p.type.dtype, broadcastable=[]),
theano._asarray(0,dtype = p.type.dtype))
return [gmap.get(p, zero(p)) for p in inputs]
g_args = compute_gradient( outputs[0], g_y[-1])
# for all outputs compute gradients and then sum them up
for y in outputs[1:]:
g_y += [y.type()]
g_args_y = compute_gradient( y,g_y[-1])
for i in xrange(len(g_args)):
g_args[i] += g_args_y[i]
self.g_ins = g_y+inputs
self.g_outs = g_args
def make_node(self,*inputs):
n_args = len(inputs)
if n_args < self.n_args :
err = 'There should be at least '+str(self.n_args)+ 'arguments'
raise ValueError(err)
# Create list of output datatypes
out_types = []
for i in xrange(self.n_seqs+1, self.n_seqs+self.n_outs+1):
if not (inputs[i] == []):
if self.outs_taps.has_key(i-1-self.n_seqs) and \
(self.outs_taps[i-self.n_seqs-1]==[-1]) and \
(self.keep_outputs[i-1-self.n_seqs] != 1):
out_types += [theano.tensor.Tensor(dtype=inputs[i].dtype, \
broadcastable=(False,)+inputs[i].broadcastable)()]
elif not self.keep_outputs[i-1-self.n_seqs] == 1:
out_types += [ inputs[i].type()]
else:
out_types += [theano.tensor.Tensor(dtype=inputs[i].dtype,\
broadcastable=(False,)+inputs[i].broadcastable[1:])()]
else:
raise ValueError(('You need to provide initial state for outputs'
' such that scan can infer what dataype they are'))
return theano.Apply(self,inputs, out_types)
def __eq__(self,other):
rval = type(self) == type(other)
if rval:
rval = (self.inputs == other.inputs) and \
(self.outputs == other.outputs) and \
(self.keep_outputs == other.keep_outputs) and \
(self.g_ins == other.g_ins) and \
(self.g_outs == other.g_outs) and \
(self.seqs_taps == other.seqs_taps) and \
(self.outs_taps == other.outs_taps) and \
(self.inplace_map == other.inplace_map) and \
(self.n_seqs == other.n_seqs) and\
(self.inplace == other.inplace) and\
(self.go_backwards == other.go_backwards) and\
(self.truncate_gradient == other.truncate_gradient) and\
(self.n_outs == other.n_outs) and\
(self.n_args == other.n_args)
return rval
def __hash__(self):
return hash(type(self)) ^ \
hash(self.n_seqs) ^ \
hash(self.n_outs) ^ \
hash(self.inplace) ^\
hash(self.go_backwards) ^\
hash(self.truncate_gradient) ^\
hash(self.n_args) ^ \
hash_listsDictsTuples(self.outputs) ^ \
hash_listsDictsTuples(self.inputs) ^ \
hash_listsDictsTuples(self.g_ins) ^ \
hash_listsDictsTuples(self.g_outs) ^ \
hash_listsDictsTuples(self.seqs_taps) ^\
hash_listsDictsTuples(self.outs_taps) ^\
hash_listsDictsTuples(self.keep_outputs)
def perform(self,node,args, outs):
n_steps = 0
if (self.n_seqs ==0 ) and (args[0] == 0):
raise ValueError('Scan does not know over how many steps it '
'should iterate! No input sequence or number of steps to '
'iterate given !')
if (args[0] != 0):
n_steps = args[0]
for i in xrange(self.n_seqs):
if self.seqs_taps.has_key(i):
# compute actual length of the sequence ( we need to see what
# past taps this sequence has, and leave room for them
seq_len = args[i+1].shape[0] + min(self.seqs_taps[i])
if max( self.seqs_taps[i]) > 0:
# using future values, so need to end the sequence earlier
seq_len -= max(self.seqs_taps[i])
if n_steps == 0 :
# length of the sequences, leaving room for the largest
n_steps = seq_len
if seq_len != n_steps :
warning(('Input sequence %d has a shorter length then the '
'expected number of steps %d')%(i,n_steps))
n_steps = min(seq_len,n_steps)
# check if we deal with an inplace operation
inplace_map = self.inplace_map
if not self.inplace: #if it was not optimized to work inplace
inplace_map = {}
# check lengths of init_outs
for i in xrange(self.n_seqs+1, \
self.n_seqs+self.n_outs+1):
if self.outs_taps.has_key(i-self.n_seqs-1):
if self.outs_taps[i-self.n_seqs-1] == [-1]:
args[i] = numpy.array([args[i]])
req_size = abs(min(self.outs_taps[i-self.n_seqs-1]))-1
if args[i].shape[0] < req_size:
warning(('Initial state for output %d has fewer values then '
'required by the maximal past value %d. Scan will use 0s'
' for missing values')%(i-self.n_iterable-1,req_size))
self.n_steps = n_steps
y = self.scan(self.fn, args[1:],self.n_seqs, self.n_outs,
self.seqs_taps, self.outs_taps, n_steps, self.go_backwards,
inplace_map)
# write to storage
for i in xrange(self.n_outs):
outs[i][0]=y[i]
def scan(self,fn, args, n_seqs, n_outs, seqs_taps, outs_taps, n_steps,
go_backwards, inplace_map):
y = []
for i in xrange(n_outs):
if inplace_map.has_key(i) and (inplace_map[i] >= 0):
y += [args[inplace_map[i]]]
else:
if self.keep_outputs[i] < 1 :
y_shape = (n_steps,)+args[i+n_seqs].shape[1:]
elif self.keep_outputs[i] == 1:
y_shape = args[i+n_seqs].shape[1:]
else:
y_shape = (self.keep_outputs[i],)+args[i+n_seqs].shape[1:]
y += [numpy.empty(y_shape,
dtype=args[i+n_seqs].dtype)]
seqs_mins = {}
for j in xrange(n_seqs):
if seqs_taps.has_key(j):
seqs_mins.update({j: min(seqs_taps[j])})
outs_mins = {}
initOuts_size = {}
for j in xrange(n_outs):
if outs_taps.has_key(j):
outs_mins.update({j: min(outs_taps[j])})
initOuts_size.update({j: args[n_seqs+j].shape[0]})
for i in xrange(n_steps):
fn_args = []
# sequences over which scan iterates
# check to see if we are scaning them backwards or no
_i = i
if go_backwards:
_i = n_steps-1-i
for j in xrange(n_seqs):
if seqs_taps.has_key(j):
ls_taps = seqs_taps[j]
min_tap = seqs_mins[j]
for tap_value in ls_taps:
k = _i - min_tap + tap_value
fn_args += [args[j][k]]
# past values of outputs
for j in xrange(n_outs):
if outs_taps.has_key(j):
ls_taps = outs_taps[j]
min_tap = outs_mins[j]
sz = initOuts_size[j]
for tap_value in ls_taps:
if i + tap_value < 0:
k = i + sz + tap_value
if k < 0:
# past value not provided.. issue a warning and use 0s
fn_args += [numpy.zeros(args[j+n_seqs][0].shape)]
warning(('Past value %d for output %d not given in inital '
'out') % (j,tap_value))
else:
fn_args += [args[j+n_seqs][k]]
else:
if self.keep_outputs[j] < 1:
fn_args += [y[j][i + tap_value]]
elif self.keep_outputs[j] == 1:
fn_args += [y[j] ]
else:
raise NotImplementedError('in the near future')
# get the non-iterable sequences
fn_args += list(args[(n_seqs+n_outs):])
# compute output
something = fn(*fn_args)
#update outputs
for j in xrange(n_outs):
if self.keep_outputs[j] <1:
y[j][i] = something[j]
elif self.keep_outputs[j] == 1:
y[j] = something[j]
else:
raise NotImplementedError('in the near future')
return y
def grad(self, args, g_outs):
if True:
#((self.updates.keys() != []) or (self.inplace_map.keys() != [])\
# or numpy.any(self.keep_outputs)):
# warning('Can not compute gradients if inplace or updates ' \
# 'are used or if you do not keep past value of outputs.'\
# 'Use force_gradient if you know for sure '\
# 'that the gradient can be computed automatically.')
warning('Gradient not fully tested yet !')
return [None for i in args]
else:
# forward pass
y = self(*args)
if not( type(y) in (list,tuple)):
y = [y]
# backwards pass
for i in xrange(len(y)):
if g_outs[i] == None:
g_outs[i] = theano.tensor.zeros_like(y[i])
g_args = [self.n_steps]+g_outs + y
# check if go_backwards is true
if self.go_backwards:
for seq in args[1:self.n_seqs]:
g_args += [seq[::-1]]
else:
g_args += args[1:self.n_seqs]
g_args += args[1+self.n_seqs: ]
g_scan = ScanGrad((self.g_ins,self.g_outs), self.n_seqs, \
self.n_outs,self.seqs_taps, self.outs_taps,
self.truncate_gradient)
return g_scan(g_args)
@gof.local_optimizer([None])
def scan_make_inplace(node):
op = node.op
if isinstance(op, Scan) and (not op.inplace) \
and (op.inplace_map.keys() != []):
return Scan((op.inputs, op.outputs) , op.n_seqs, \
op.n_outs, op.inplace_map, op.seqs_taps, op.outs_taps, \
op.force_gradient, op.truncate_gradient, \
op.go_backwards, inplace=True \
).make_node(*node.inputs).outputs
return False
optdb.register('scan_make_inplace', opt.in2out(scan_make_inplace,\
ignore_newtrees=True), 75, 'fast_run', 'inplace')
class ScanGrad(theano.Op):
"""Gradient Op for Scan"""
def __init__(self,(g_ins, g_outs) , n_seqs, n_outs,
seqs_taps = {}, outs_taps= {}, truncate_gradient = -1):
self.grad_fn = theano.function(g_ins, g_outs)
self.inputs = g_ins
self.outputs = g_outs
self.n_seqs = n_seqs
self.truncate_gradient = truncate_gradient
self.n_outs = n_outs
self.seqs_taps = seqs_taps
self.outs_taps = outs_taps
self.destroy_map = {}
def __eq__(self,other):
rval = type(self) == type(other)
if rval:
rval = (self.inputs == other.inputs) and \
(self.outputs == other.outputs) and \
(self.n_seqs == other.n_seqs) and \
(self.n_outs == other.n_outs) and \
(self.truncate_gradient == other.truncate_gradient) and\
(self.seqs_taps == other.seqs_taps) and \
(self.outs_taps == other.outs_taps)
return rval
def __hash__(self):
return hash(type(self)) ^ \
hash(self.n_seqs) ^ \
hash(self.n_outs) ^ \
hash(self.truncate_gradient) ^\
hash_list(self.inputs) ^ \
hash_list(self.outputs) ^ \
hash_dict(self.seqs_taps) ^ \
hash_dict(self.outs_taps)
def make_node(self, *args):
# input of the gradient op :
# | g_outs | y | seqs | outs | non_seqs |
# | n_outs | n_outs | n_seqs | n_outs | unknown |
# return
# | grad of seqs | grad of outs | grad of non_seqs |
# | n_seqs | n_outs | unknown |
return theano.Apply(self, list(args),
[i.type() for i in args[1+2*self.n_outs:] ])
def perform(self, node, args, storage):
# get scan inputs
n_steps = args[0]
inputs = args[2*self.n_outs+1:]
seqs = inputs[:self.n_seqs]
seeds = inputs[self.n_seqs:self.n_seqs+self.n_outs]
non_seqs = inputs[self.n_outs+self.n_seqs:]
# generate space for gradient
g_seqs = [numpy.zeros_like(k) for k in seqs]
g_seeds = [numpy.zeros_like(k) for k in seeds]
g_non_seqs = [numpy.zeros_like(k) for k in non_seqs]
# get gradient from above
g_outs = args[:self.n_outs]
# get the output of the scan operation
outs = args[self.n_outs:2*self.n_outs]
# go back through time to 0 or n_steps - truncate_gradient
lower_limit = n_steps - self.truncate_gradient
if lower_limit > n_steps-1:
the_range = xrange(n_steps-1,-1,-1)
elif lower_limit < -1:
the_range = xrange(n_steps-1,-1,-1)
else:
the_range = xrange(n_steps-1, lower_limit,-1)
seqs_mins = {}
for j in xrange(self.n_seqs):
if self.seqs_taps.has_key(j):
seqs_mins.update({j: min(self.seqs_taps[j])})
outs_mins = {}
seed_size = {}
for j in xrange(self.n_outs):
if self.outs_taps.has_key(j):
outs_mins.update({j: min(self.outs_taps[j])})
seed_size.update({j: g_seeds[j].shape[0]})
for i in the_range:
# time slice of inputs
_ins = []
for j in xrange(self.n_seqs):
if self.seqs_taps.has_key(j):
ls_taps = self.seqs_taps[j]
min_tap = seqs_mins[j]
for tap_value in ls_taps:
k = i - min_tap + tap_value
_ins += [ins[j][k]]
# time slice of outputs + taps
_outs = []
for j in xrange(self.n_outs):
if self.outs_taps.has_key(j):
ls_taps = self.outs_taps[j]
min_tap = outs_mins[j]
seed_sz = seed_size[j]
for tap_value in ls_taps:
if i + tap_value < 0:
k = i + seed_sz + tap_value
if k < 0 :
#past value not provided .. issue a warning and use 0
_outs += [numpy.zeros(seeds[j][0].shape)]
warning('Past value %d for output $d not given' \
%(j,tap_value))
else:
_outs += [seeds[j][k]]
else:
_outs += [outs[j][i + tap_value]]
g_out = [arg[i] for arg in g_outs]
grad_args = g_out + _ins + _outs + non_seqs
grads=self.grad_fn(*grad_args)
# get gradient for inputs
pos = 0
for j in xrange(self.n_seqs):
if self.seqs_taps.has_key(j):
ls_taps = self.seqs_taps[j]
min_tap = seqs_mins[j]
for tap_value in ls_taps :
k = i - min_tap + tap_value
g_ins[j][k] += grads[pos]
pos += 1
# get gradient for outputs
for j in xrange(self.n_outs):
if self.outs_taps.has_key(j):
ls_taps = self.outs_taps[j]
min_tap = outs_mins[j]
seed_sz = seed_size[j]
for tap_value in ls_taps:
if i+tap_value < 0 :
k = i + seed_sz + tap_value
if k > 0 :
g_seeds[j][k] += grads[pos]
pos += 1
for j in xrange(len(g_non_seqs)):
g_non_seqs[j] += grads[j+pos]
# return the gradient
for i,v in enumerate(g_ins + g_seeds+ g_non_seqs):
storage[i][0] = v
import unittest
import theano
import theano.sandbox.scan
import random
import numpy.random
from theano.tests import unittest_tools as utt
def verify_grad(op, pt, n_tests=2, rng=None, eps = None, tol = None,
mode = None, cast_to_output_type = False):
pt = [numpy.array(p) for p in pt]
_type_tol = dict( float32=1e-2, float64=1e-4)
if tol is None:
tol = max(_type_tol[str(p.dtype)] for p in pt)
if rng is None:
rng = numpy.random
utt.seed_rng()
def function(inputs, outputs):
if mode is None:
f = theano.function(inputs, outputs, accept_inplace=True)
else:
f = theano.function(inputs,outputs,accept_inplace=True, mode=mode)
return f
for test_num in xrange(n_tests):
tensor_pt=[theano.tensor.value(p.copy(),name='input %i'%i)
for i,p in enumerate(pt)]
# op outputs
o_outputs = op(*tensor_pt)
if not (type(o_outputs) in (list,tuple)):
o_outputs = [ o_outputs ]
o_fn = function(tensor_pt, o_outputs)
o_fn_outs = o_fn(*[p.copy() for p in pt])
if not type(o_fn_outs) in (list,tuple):
o_fn_outs = [o_fn_outs]
random_projection = rng.rand(*o_fn_outs[0].shape)
if cast_to_output_type:
random_projection = numpy.array(random_projection,
dtype = o_fn_outs[0].dtype)
t_r = theano.tensor.as_tensor_variable(random_projection)
cost = theano.tensor.sum( t_r * o_outputs[0])
for i, o in enumerate(o_fn_outs[1:] ):
random_projection = rng.rand(*o.shape)
if cast_to_output_type:
random_projection = numpy.array(random_projection,
dtype=o_outputs[i].dtype)
t_r = theano.tensor.as_tensor_variable(random_projection)
cost += theano.tensor.sum( t_r * o_outputs[i])
cost_fn = function(tensor_pt, cost)
num_grad = theano.tensor.numeric_grad(cost_fn,[p.copy() for p in pt],eps)
g_cost = theano.tensor.as_tensor_variable(1.0,name='g_cost')
if cast_to_output_type:
g_cost = cast(g_cost, o_output.dtype)
symbolic_grad = theano.tensor.grad(cost, tensor_pt, g_cost)
grad_fn = function(tensor_pt,symbolic_grad)
analytic_grad = grad_fn(*[p.copy() for p in pt])
if not isinstance(analytic_grad, (list,tuple)):
analytic_grad = [analytic_grad]
max_err, max_err_pos = num_grad.max_err(analytic_grad)
if max_err > tol:
raise Exception(theano.tensor.verify_grad.E_grad,
(max_err, tol, max_err_pos))
def compareArrays(a,b):
if type(a) in (list,tuple):
a = numpy.array(a)
if type(b) in (list, tuple):
b = numpy.array(b)
return numpy.all( abs(a-b) < 1e-5)
class T_Scan(unittest.TestCase):
def setUp(self):
utt.seed_rng()
# generator network, only one output , type scalar ; no sequence or
# non sequence arguments
def test_1(self):
def f_pow2(x_tm1):
return 2*x_tm1
s = theano.tensor.dscalar()
n_steps = theano.tensor.dscalar()
Y = theano.sandbox.scan.scan(f_pow2, [],s, [],n_steps = n_steps)
f1 = theano.function([s,n_steps], Y)
assert(compareArrays(f1(1,3), [2,4,8]))
# simple rnn, one input, one state, weights for each; input/state are
# vectors, weights are scalars
def test_2(self):
def f_rnn(u_t,x_tm1,W_in, W):
return u_t*W_in+x_tm1*W
u = theano.tensor.dvector()
x0 = theano.tensor.dscalar()
W_in = theano.tensor.dscalar()
W = theano.tensor.dscalar()
Y = theano.sandbox.scan.scan(f_rnn, u,x0,[W_in,W])
f2 = theano.function([u,x0,W_in,W], Y)
v_u = numpy.array([1.,2.,3.,4.])
v_x0 = numpy.array(1)
v_out = numpy.array([1.1,1.3,1.6,2.])
assert(compareArrays( f2(v_u,v_x0,.1,1), v_out ) )
# simple rnn, one input, one state, weights for each; input/state are
# vectors, weights are scalars; using shared variables
def test_3(self):
u = theano.tensor.dvector()
x0 = theano.tensor.dscalar()
W_in = theano.shared(.1, name = 'w_in')
W = theano.shared(1., name ='w')
def f_rnn_shared(u_t,x_tm1):
return u_t*W_in+x_tm1*W
Y = theano.sandbox.scan.scan(f_rnn_shared, u,x0,[])
f3 = theano.function([u,x0], Y)
v_u = numpy.array([1.,2.,3.,4.])
v_x0 = numpy.array(1.)
v_out = numpy.array([1.1,1.3,1.6,2.])
assert(compareArrays(f3(v_u,v_x0),v_out))
# some rnn with multiple outputs and multiple inputs; other dimension
# instead of scalars/vectors
def test_4(self):
W_in2 = theano.shared(numpy.array([1.,2.]), name='win2')
W = theano.shared(numpy.array([[2.,1.],[1.,1.]]), name='w')
W_out = theano.shared(numpy.array([.5,1.]), name = 'wout')
W_in1 = theano.tensor.dmatrix('win')
u1 = theano.tensor.dmatrix('u1')
u2 = theano.tensor.dvector('u2')
x0 = theano.tensor.dvector('x0')
y0 = theano.tensor.dscalar('y0')
def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, W_in1):
return [theano.dot(u1_t,W_in1) + u2_t* W_in2 + \
theano.dot(x_tm1, W), theano.dot(x_tm1, W_out)]
Y = theano.sandbox.scan.scan(f_rnn_cmpl,[u1,u2],[x0,y0],W_in1)
f4 = theano.function([u1,u2,x0,y0,W_in1], Y)
v_u1 = numpy.array([[1.,2.],[1.,2.],[1.,2.]])
v_u2 = numpy.array([1.,2.,3.])
v_x0 = numpy.array([0.,0.])
v_y0 = numpy.array(1)
v_Win1 = numpy.array([[1.,1.],[1.,1.]])
v_x = numpy.array([[4.,5.],[18.,16.],[58.,43.]])
v_y = numpy.array([0.,7.,25.])
(x,y) = f4( v_u1, v_u2, v_x0, v_y0, v_Win1)
assert( compareArrays(x,v_x))
assert( compareArrays(y,v_y))
'''
# basic ESN using updates
def test_5(self):
W_in = theano.shared(numpy.array([1.,1.]), name='win')
W = theano.shared(numpy.array([[.1,0.],[.0,.1]]),name='w')
W_out= theano.shared(numpy.array([.5,1.]), name='wout')
u = theano.tensor.dvector('u')
x = theano.shared(numpy.array([0.,0.]),'x')
y0 = theano.tensor.dscalar('y0')
def f_ESN(u_t):
return ( theano.dot(x,W_out), \
{ x: W_in*u_t + theano.dot(x,W) } )
Y = theano.sandbox.scan.scan(f_ESN,u,y0,[],outputs_taps={0:[]})
f5 = theano.function([u,y0],Y)
v_u = numpy.array([1.,2.,3.])
v_y0 = numpy.array(0.)
v_out = numpy.array([0.,1.5,3.15])
out = f5( v_u, v_y0 )
assert( compareArrays(v_out, out))
# basic ESN using updates ; moving backwards
def test_6(self):
W_in = theano.shared(numpy.array([1.,1.]), name='win')
W = theano.shared(numpy.array([[.1,0.],[.0,.1]]),name='w')
W_out= theano.shared(numpy.array([.5,1.]), name='wout')
u = theano.tensor.dvector('u')
x = theano.shared(numpy.array([0.,0.]),'x')
y0 = theano.tensor.dvector('y0')
def f_ESN(u_t):
return ( theano.dot(x,W_out), \
{ x: W_in*u_t + theano.dot(x,W) } )
Y = theano.sandbox.scan.scan(f_ESN,u,y0,[],outputs_taps={0:[]}, \
go_backwards = True)
f6 = theano.function([u,y0],Y)
v_u = numpy.array([1.,2.,3.])
v_y0 = numpy.array([0])
v_out = numpy.array([0.,4.5,3.45])
out = f6(v_u, v_y0)
assert( compareArrays(out, v_out))
'''
# simple rnn, one input, one state, weights for each; input/state are
# vectors, weights are scalars; using shared variables and past
# taps (sequences and outputs)
def test_7(self):
u = theano.tensor.dvector()
x0 = theano.tensor.dvector()
W_in = theano.shared(.1, name = 'w_in')
W = theano.shared(1., name ='w')
def f_rnn_shared(u_tm2, x_tm1, x_tm2):
return u_tm2*W_in+x_tm1*W+x_tm2
Y = theano.sandbox.scan.scan(f_rnn_shared, u,x0, [], \
sequences_taps = {0:[-2]}, outputs_taps = {0:[-1,-2]})
f7 = theano.function([u,x0], Y)
v_u = numpy.asarray([1.,2.,3.,4.])
v_x0 = numpy.asarray([1.,2.])
out = numpy.asarray([3.1,5.3])
assert (compareArrays( out, f7(v_u, v_x0)))
# simple rnn, one input, one state, weights for each; input/state are
# vectors, weights are scalars; using shared variables and past
# taps (sequences and outputs) and future taps for sequences
def test_8(self):
u = theano.tensor.dvector()
x0 = theano.tensor.dvector()
W_in = theano.shared(.1, name = 'w_in')
W = theano.shared(1., name ='w')
def f_rnn_shared(u_tm2,u_tp2, x_tm1, x_tm2):
return (u_tm2+u_tp2)*W_in+x_tm1*W+x_tm2
Y = theano.sandbox.scan.scan(f_rnn_shared, u,x0, [], \
sequences_taps = {0:[-2,2]}, outputs_taps = {0:[-1,-2]})
f8 = theano.function([u,x0], Y)
v_u = numpy.array([1.,2.,3.,4.,5.,6.])
v_x0 = numpy.array([1.,2.])
out = numpy.array([3.6, 6.4])
assert (compareArrays( out, f8(v_u, v_x0) ) )
'''
# simple rnn ; compute inplace
def test_9(self):
u = theano.tensor.dvector()
mu = theano.Param( u, mutable = True)
x0 = theano.tensor.dvector()
W_in = theano.shared(.1)
W = theano.shared(1.)
def f_rnn_shared(u_t, x_tm1):
return (u_t*W_in + x_tm1*W, {})
Y = theano.sandbox.scan.scan(f_rnn_shared, u, x0,[], \
inplace_map={0:0} )
f9 = theano.function([mu,x0], Y , #mode = 'FAST_RUN')
mode = 'DEBUG_MODE')
v_u = numpy.array([1.,2.,3.])
v_x0 = numpy.array([1.])
out = f9(v_u, v_x0)
v_out = numpy.array([1.1,1.3,1.6])
assert (compareArrays(out, v_out))
print v_u
assert (compareArrays(v_u, out))
'''
# test gradient simple network
def test_10(self):
pass
'''
TO TEST:
- test gradient (one output)
- test gradient (multiple outputs)
- test gradient (go_bacwards)
- test gradient (multiple outputs / some uncomputable )
- test gradient (truncate_gradient)
- test gradient (force_gradient)
- test_gradient (taps past/future)
'''
if __name__ == '__main__':
unittest.main()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论