提交 de635cb9 authored 作者: Frederic Bastien's avatar Frederic Bastien

merge without conflict.

......@@ -69,8 +69,8 @@ FancyModule = Module
from printing import \
pprint, pp
import scan as scan_module
from scan import scan, map, reduce, foldl, foldr, Scan, ScanGrad
import scan_module
from scan_module import scan, map, reduce, foldl, foldr, clone
import tensor
import scalar
......
......@@ -6,6 +6,242 @@ from theano.compile import orig_function, In, Out
from theano.compile.sharedvalue import SharedVariable, shared
import numpy # for backport to 2.4, to get any().
def rebuild_collect_shared( outputs
, inputs = None
, replace = None
, updates = None
, rebuild_strict = True
, copy_inputs_over = True
, no_default_updates = False
):
"""
Function that allows replacing subgraphs of a computational
graph.
It returns a set of dictionaries and lists which collect (partial?)
different information about shared variables. This info is required by
`pfunc`.
:type outputs: list of Theano Variables ( or Theano expressions)
:param outputs: list of Theano variables or expressions representing the
outputs of the computational graph
:type inputs: list of Theano Variables ( or Theano expressions)
:param inputs: list of Theano variables or expressions representing the
inputs of the computational graph (or None)
:type replace: dict
:param replace: dictionary describing which subgraphs should be
replaced by what
:type updates: dict
:param updates: dictionary describing updates expressions for shared
variables
:type rebuild_strict: bool
:param rebuild_strict: flag, if true the type of all inputs should be
the same as the for the current node
:type copy_inputs_over: bool
:param copy_inputs_over: flag; if False it will clone inputs
:type no_default_updates: either bool or list of Variables
:param no_default_updates: if True, do not perform any automatic update
on Variables. If False (default), perform
them all. Else, perform automatic updates
on all Variables that are neither in
"updates" nor in "no_default_updates".
"""
## This function implements similar functionality as graph.clone
## and it should be merged with that
clone_d = {}
update_d = {}
update_expr = []
# list of shared inputs that are used as inputs of the graph
shared_inputs = []
def clone_v_get_shared_updates(v, copy_inputs_over):
'''
Clones a variable and its inputs recursively until all are in
clone_d. Also appends all shared variables met along the way to
shared inputs, and their default_update (if applicable) to update_d
and update_expr.
'''
# this co-recurses with clone_a
assert v is not None
if v in clone_d:
return clone_d[v]
if v.owner:
clone_a(v.owner, copy_inputs_over)
return clone_d.setdefault(v,v)
elif isinstance(v, SharedVariable):
if v not in shared_inputs:
shared_inputs.append(v)
if hasattr(v, 'default_update'):
# Check that v should not be excluded from the default
# updates list
if ( no_default_updates is False or
( isinstance(no_default_updates, list) and
v not in no_default_updates
)
):
# Do not use default_update if a "real" update was
# provided
if v not in update_d:
v_update = v.filter_update(v.default_update)
if v_update.type != v.type:
raise TypeError(
( 'an update must have the same type as '
'the original shared variable' )
, (v, v.type, v_update, v_update.type))
update_d[v] = v_update
update_expr.append((v, v_update))
if not copy_inputs_over and not isinstance(v, Constant):
### Cloning shared variables implies copying their underlying
### memory buffer ??
return clone_d.setdefault(v,v.clone())
else:
return clone_d.setdefault(v,v)
def clone_a(a, copy_inputs_over):
'''
Clones a variable and its inputs recursively until all are in
clone_d. It occures with clone_v_get_shared_updates
'''
if a is None:
return None
if a not in clone_d:
for i in a.inputs:
clone_v_get_shared_updates(i, copy_inputs_over)
clone_d[a] = a.clone_with_new_inputs([clone_d[i] for i in
a.inputs],
strict = rebuild_strict)
for old_o, new_o in zip(a.outputs, clone_d[a].outputs):
clone_d.setdefault(old_o,new_o)
return clone_d[a]
# intialize the clone_d mapping with the replace dictionary
if replace is None:
replace = []
try:
replace_pairs = replace.items()
except:
replace_pairs = replace
for v_orig, v_repl in replace_pairs:
if not isinstance(v_orig,Variable):
raise TypeError('given keys must be Variable', v_orig)
if not isinstance(v_repl,Variable):
v_repl = shared(v_repl)
assert v_orig not in clone_d
clone_d[v_orig] = clone_v_get_shared_updates(v_repl,
copy_inputs_over)
if inputs is None:
inputs = []
def clone_inputs(i):
if not copy_inputs_over:
return clone_d.setdefault(i,i.clone())
else:
return clone_d.setdefault(i,i)
input_variables = [clone_inputs(i) for i in inputs]
# It was decided, as a first step, to prevent shared variables from
# being used as function inputs. Although it is technically possible,
# it is also not clear when/how to use the value of that shared
# variable (is it a default? ignored?, if the shared variable changes,
# does that function default also change?).
if numpy.any([isinstance(v, SharedVariable) for v in input_variables]):
raise TypeError(('Cannot use a shared variable (%s) as explicit '
'input. Consider substituting a non-shared'
' variable via the `givens` parameter') % v)
# Fill update_d and update_expr with provided updates
if updates is None:
updates = []
for (store_into, update_val) in iter_over_pairs(updates):
if not isinstance(store_into, SharedVariable):
raise TypeError('update target must be a SharedVariable'
, store_into)
if store_into in update_d:
raise ValueError(('this shared variable already has an update '
'expression'),
(store_into, update_d[store_into]))
update_val = store_into.filter_update(update_val)
# typically this might be a cast()
if update_val.type != store_into.type:
err_msg = ( 'an update must have the same type as the '
'original shared variable(dest, dest.type, '
'update_val, update_val.type)')
err_arg = ( store_into
, store_into.type
, update_val
, update_val.type)
raise TypeError(err_msg, err_arg )
update_d[store_into] = update_val
update_expr.append((store_into, update_val))
# Elements of "outputs" are here cloned to "cloned_outputs"
if isinstance(outputs, list):
cloned_outputs = []
for v in outputs:
if isinstance(v, Variable):
cloned_v = clone_v_get_shared_updates(v, copy_inputs_over)
cloned_outputs.append(cloned_v)
elif isinstance(v, Out):
cloned_v = clone_v_get_shared_updates(v.variable,
copy_inputs_over)
cloned_outputs.append(Out(cloned_v, borrow=v.borrow))
else:
raise TypeError( ( 'outputs must be theano Variable or '
'Out instances'), v)
#computed_list.append(cloned_v)
else:
if isinstance(outputs, Variable):
cloned_v = clone_v_get_shared_updates(outputs, copy_inputs_over)
cloned_outputs = cloned_v
#computed_list.append(cloned_v)
elif isinstance(outputs, Out):
cloned_v = clone_v_get_shared_updates(outputs.variable,
copy_inputs_over)
cloned_outputs = Out(cloned_v, borrow=outputs.borrow)
#computed_list.append(cloned_v)
elif outputs is None:
cloned_outputs = [] # TODO: get Function.__call__ to return None
else:
raise TypeError( ('output must be a theano Variable or Out '
'instance (or list of them)')
, outputs)
# Iterate over update_expr, cloning its elements, and updating
# shared_inputs, update_d and update_expr from the SharedVariables
# we discover.
# If the variable to be updated is a shared variable not already
# in shared_inputs, add it.
# Note: we extend update_expr while iterating over it.
i = 0
while i<len(update_expr):
v, v_update = update_expr[i]
cloned_v_update = clone_v_get_shared_updates(v_update,
copy_inputs_over)
update_d[v] = cloned_v_update
if isinstance(v, SharedVariable) and v not in shared_inputs:
shared_inputs.append(v)
i += 1
return ( input_variables, cloned_outputs
, [clone_d, update_d, update_expr, shared_inputs] )
class Param(object):
def __init__(self, variable, default=None, name=None, mutable=False,
strict=False, allow_downcast=None, implicit=None):
......@@ -93,7 +329,7 @@ def pfunc(params, outputs=None, mode=None, updates=[], givens=[],
# off to compile.function
# (There it will be cloned again, unnecessarily, because it doesn't know that we already
# cloned it.)
#
#
# First, it clones the replacements named in the givens argument, and points each Var1 to
# the clone of Var2.
# Then it sets the inputs in the clone dictionary.
......@@ -111,158 +347,33 @@ def pfunc(params, outputs=None, mode=None, updates=[], givens=[],
and not isinstance(no_default_updates, list):
raise TypeError("no_default_update should be either a boolean or a list")
clone_d = {}
# Updates as list and dictionary.
# They will both store the 'default_update' expressions (where applicable).
# The dictionary (update_d) is used to look up the existence of the keys, and to store
# the final [cloned] update expressions.
# The list of pairs (update_expr) is used to iterate in a consistent order while adding
# new pairs.
update_d = {}
update_expr = []
# list of shared inputs that are used as inputs of the graph
shared_inputs = []
def clone_v_get_shared_updates(v):
'''Clone a variable and its inputs recursively until all are in clone_d.
Also appends all shared variables met along the way to shared_inputs,
and their default_update (if applicable) to update_d and update_expr.
'''
# this method co-recurses with clone_a
assert v is not None
if v in clone_d:
return clone_d[v]
if v.owner:
clone_a(v.owner)
elif isinstance(v, SharedVariable):
if v not in shared_inputs:
shared_inputs.append(v)
if hasattr(v, 'default_update'):
# Check that v should not be excluded from the default updates list
if no_default_updates is False or\
(isinstance(no_default_updates, list) and\
v not in no_default_updates):
# Do not use default_update if a "real" update was provided
if v not in update_d:
v_update = v.filter_update(v.default_update)
if v_update.type != v.type:
raise TypeError('an update must have the same type as the original shared variable',
(v, v.type, v_update, v_update.type))
update_d[v] = v_update
update_expr.append((v, v_update))
return clone_d.setdefault(v, v)
def clone_a(a):
# this method co-recurses with clone_v_get_shared_updates
if a is None:
return None
if a not in clone_d:
for i in a.inputs:
clone_v_get_shared_updates(i)
clone_d[a] = a.clone_with_new_inputs([clone_d[i] for i in a.inputs],
strict = rebuild_strict)
for old_o, new_o in zip(a.outputs, clone_d[a].outputs):
clone_d.setdefault(old_o, new_o)
return clone_d[a]
# initialize the clone_d mapping with the `givens` argument
try:
givens = givens.items() # converts a dictionary to the sort of list that we want.
except:
pass
for v_orig, v_repl in givens:
if not isinstance(v_orig, Variable):
raise TypeError('given keys must be Variable', v_orig)
if not isinstance(v_repl, Variable):
v_repl = shared(v_repl)
assert v_orig not in clone_d
clone_d[v_orig] = clone_v_get_shared_updates(v_repl)
# transform params into theano.compile.In objects.
inputs = [_pfunc_param_to_in(p, allow_downcast=allow_input_downcast)
for p in params]
#Switch inputs to cloned variables
input_variables = [clone_d.setdefault(i.variable, i.variable) for i in inputs]
in_variables = [ input.variable for input in inputs ]
output_vars = rebuild_collect_shared(
outputs
, in_variables
, replace = givens
, updates = updates
, rebuild_strict = True
, copy_inputs_over = True
, no_default_updates = no_default_updates )
# extracting the arguments
input_variables, cloned_outputs, other_stuff = output_vars
clone_d, update_d, update_expr, shared_inputs = other_stuff
for i, iv in zip(inputs, input_variables):
i.variable = iv
# It was decided, as a first step, to prevent shared variables from being
# used as function inputs. Although it is technically possible, it is also not clear
# when/how to use the value of that shared variable (is it a default? ignored?, if the
# shared variable changes, does that function default also change?).
if numpy.any([isinstance(v, SharedVariable) for v in input_variables]):
raise TypeError(('Cannot use a shared variable (%s) as explicit input.'
' Consider substituting a non-shared'
' variable via the `givens` parameter') % v)
# Fill update_d and update_expr with provided updates
for (store_into, update_val) in iter_over_pairs(updates):
if not isinstance(store_into, SharedVariable):
raise TypeError('update target must be a SharedVariable', store_into)
if store_into in update_d:
raise ValueError('this shared variable already has an update expression',
(store_into, update_d[store_into]))
update_val = store_into.filter_update(update_val) # typically this might be a cast()
if update_val.type != store_into.type:
err_msg = 'an update must have the same type as the original shared variable(dest, dest.type, update_val, update_val.type)'
err_arg = (store_into, store_into.type, update_val, update_val.type)
raise TypeError(err_msg, err_arg )
update_d[store_into] = update_val
update_expr.append((store_into, update_val))
# Elements of "outputs" are here cloned to "cloned_outputs"
if isinstance(outputs, list):
cloned_outputs = []
for v in outputs:
if isinstance(v, Variable):
cloned_v = clone_v_get_shared_updates(v)
cloned_outputs.append(cloned_v)
elif isinstance(v, Out):
cloned_v = clone_v_get_shared_updates(v.variable)
cloned_outputs.append(Out(cloned_v, borrow=v.borrow))
else:
raise TypeError('outputs must be theano Variable or Out instances', v)
#computed_list.append(cloned_v)
else:
if isinstance(outputs, Variable):
cloned_v = clone_v_get_shared_updates(outputs)
cloned_outputs = cloned_v
#computed_list.append(cloned_v)
elif isinstance(outputs, Out):
cloned_v = clone_v_get_shared_updates(outputs.variable)
cloned_outputs = Out(cloned_v, borrow=outputs.borrow)
#computed_list.append(cloned_v)
elif outputs is None:
cloned_outputs = [] # TODO: get Function.__call__ to return None
else:
raise TypeError('output must be a theano Variable or Out instance (or list of them)', outputs)
# Iterate over update_expr, cloning its elements, and updating
# shared_inputs, update_d and update_expr from the SharedVariables
# we discover.
# If the variable to be updated is a shared variable not already
# in shared_inputs, add it.
# Note: we extend update_expr while iterating over it.
i = 0
while i<len(update_expr):
v, v_update = update_expr[i]
cloned_v_update = clone_v_get_shared_updates(v_update)
update_d[v] = cloned_v_update
if isinstance(v, SharedVariable) and v not in shared_inputs:
shared_inputs.append(v)
i += 1
for sv in shared_inputs:
if sv in update_d:
si = In(variable=sv, value=sv.container, mutable=True,
borrow=True, update=update_d[sv])
else:
si = In(variable=sv, value=sv.container,
si = In(variable=sv, value=sv.container,
mutable=False, borrow=True)
inputs.append(si)
......@@ -280,7 +391,7 @@ def _pfunc_param_to_in(param, strict=False, allow_downcast=None):
return In(variable=param, strict=strict, allow_downcast=allow_downcast)
elif isinstance(param, Param):
return In(
variable=param.variable,
variable=param.variable,
name=param.name,
value=param.default,
mutable=param.mutable,
......@@ -306,5 +417,6 @@ def iter_over_pairs(pairs):
if isinstance(pairs, dict):
return pairs.iteritems()
else:
return pairs
return pairs
"""
This module provides the Scan Op
Scanning is a general form of recurrence, which can be used for looping.
The idea is that you *scan* a function along some input sequence, producing
an output at each time-step that can be seen (but not modified) by the
function at the next time-step. (Technically, the function can see the
previous K time-steps of your outputs and L time steps (from the past and
future) of your inputs.
So for example, ``sum()`` could be computed by scanning the ``z+x_i``
function over a list, given an initial state of ``z=0``.
Special cases:
* A *reduce* operation can be performed by returning only the last
output of a ``scan``.
* A *map* operation can be performed by applying a function that
ignores previous steps of the outputs.
Often a for-loop can be expressed as a ``scan()`` operation, and ``scan`` is
the closest that theano comes to looping. The advantage of using ``scan``
over for loops is that it allows the number of iterations to be a part of
the symbolic graph.
The Scan Op should typically be used by calling any of the following
functions: ``scan()``, ``map()``, ``reduce()``, ``foldl()``,
``foldr()``.
"""
__docformat__ = 'restructedtext en'
__authors__ = ( "Razvan Pascanu "
"Frederic Bastien "
"James Bergstra "
"Pascal Lamblin " )
__copyright__ = "(c) 2010, Universite de Montreal"
__contact__ = "Razvan Pascanu <r.pascanu@gmail>"
import logging
import numpy
import theano
import tensor
import misc.safe_asarray as safe_asarray
from tensor import opt, TensorType
import gof
from gof import Optimizer, toolbox, Op, Apply, Variable
from compile import optdb, SharedVariable, function, Param
import compile
import gradient
from gof.python25 import all
# Logging function for sending warning or info
_logger = logging.getLogger('theano.scan')
def warning(*msg):
_logger.warning('WARNING theano.scan: '+' '.join(msg))
def info(*msg):
_logger.info('INFO theano.scan: '+' '.join(msg))
# Hashing a dictionary/list/tuple by xoring the hash of each element
def hash_listsDictsTuples(x):
hash_value = 0
if type(x) == dict :
for k,v in x.iteritems():
hash_value ^= hash_listsDictsTuples(k)
hash_value ^= hash_listsDictsTuples(v)
elif type(x) in (list,tuple):
for v in x:
hash_value ^= hash_listsDictsTuples(v)
else:
try:
hash_value ^= hash(x)
except:
pass
return hash_value
# The ``map`` view of Scan Op.
def map( fn
, sequences
, non_sequences = None
, truncate_gradient = -1
, go_backwards = False
, mode = None
, name = None ):
"""
Similar behaviour as python's map.
:param fn: The function that ``map`` applies at each iteration step
(see ``scan`` for more info).
:param sequences: List of sequences over which ``map`` iterates
(see ``scan`` for more info).
:param non_sequences: List of arguments passed to ``fn``. ``map`` will
not iterate over these arguments (see ``scan`` for
more info).
:param truncate_gradient: See ``scan``.
:param go_backwards: Boolean value that decides the direction of
iteration. True means that sequences are parsed
from the end towards the beginning, while False
is the other way around.
:param mode: See ``scan``.
:param name: See ``scan``.
"""
return scan( fn = fn
, sequences = sequences
, outputs_info = []
, non_sequences = non_sequences
, truncate_gradient = truncate_gradient
, go_backwards = go_backwards
, mode = mode
, name = name )
# The ``reduce`` view of Scan Op.
def reduce( fn
, sequences
, outputs_info
, non_sequences = None
, go_backwards = False
, mode = None
, name = None ):
"""
Similar behaviour as python's reduce
:param fn: The function that ``reduce`` applies at each iteration step
(see ``scan`` for more info).
:param sequences: List of sequences over which ``reduce`` iterates
(see ``scan`` for more info)
:param outputs_info: List of dictionaries describing the outputs of
reduce (see ``scan`` for more info).
:param non_sequences: List of arguments passed to ``fn``. ``reduce`` will
not iterate over these arguments (see ``scan`` for
more info).
:param go_backwards: Boolean value that decides the direction of
iteration. True means that sequences are parsed
from the end towards the begining, while False
is the other way around.
:param mode: See ``scan``.
:param name: See ``scan``.
"""
# Makes sure the outputs_info is a list.
if type(outputs_info) not in (list,tuple):
outs_info = [outputs_info]
else:
outs_info = list(outputs_info)
for i,out_info in enumerate(outs_info):
if out_info:
if not type(out_info) == dict:
# Specifies that it should return only the last step.
outs_info[i] = dict(
initial = out_info, return_steps = 1, store_steps = 1)
else:
# Specifies that it should return only the last step.
outs_info[i]['store_steps'] = 1
outs_info[i]['return_steps'] = 1
# NOTE : If the user asks for more then the last step,
# it means he does not understand ``reduce``. We could
# issue a warning in that case
return scan( fn = fn
, sequences = sequences
, outputs_info = outs_info
, non_sequences = non_sequences
, go_backwards = go_backwards
, truncate_gradient = 1
, mode = mode
, name = name )
# The ``foldl`` view of Scan Op.
def foldl( fn
, sequences
, outputs_info
, non_sequences = None
, mode = None
, name = None ):
"""
Similar behaviour as haskell's foldl
:param fn: The function that ``foldl`` applies at each iteration step
(see ``scan`` for more info).
:param sequences: List of sequences over which ``foldl`` iterates
(see ``scan`` for more info)
:param outputs_info: List of dictionaries describing the outputs of
reduce (see ``scan`` for more info).
:param non_sequences: List of arguments passed to `fn`. ``foldl`` will
not iterate over these arguments (see ``scan`` for
more info).
:param mode: See ``scan``.
:param name: See ``scan``.
"""
return reduce( fn = fn
, sequences = sequences
, outputs_info = outputs_info
, non_sequences = non_sequences
, go_backwards = False
, mode = mode
, name = name )
# The ``foldl`` view of Scan Op.
def foldr( fn
, sequences
, outputs_info
, non_sequences = None
, mode = None
, name = None ):
"""
Similar behaviour as haskell' foldr
:param fn: The function that ``foldr`` applies at each iteration step
(see ``scan`` for more info).
:param sequences: List of sequences over which ``foldr`` iterates
(see ``scan`` for more info)
:param outputs_info: List of dictionaries describing the outputs of
reduce (see ``scan`` for more info).
:param non_sequences: List of arguments passed to `fn`. ``foldr`` will
not iterate over these arguments (see ``scan`` for
more info).
:param mode: See ``scan``.
:param name: See ``scan``.
"""
return reduce( fn = fn
, sequences = sequences
, outputs_info = outputs_info
, non_sequences = non_sequences
, go_backwards = True
, mode = mode
, name = name )
#
# QUESTION:
# If the larger (in absolute values) the sequence_taps, the shorter the output
# right? If the sequence_taps = {0: [-10, 10]}, and I pass an input with 22
# rows, then the scan will output something of length <=2 right?
#
# ANSWER:
# Yes, actually it will be exactly 2 ( if there are no other constraints)
def scan( fn
, sequences = None
, outputs_info = None
, non_sequences = None
, n_steps = None
, truncate_gradient = -1
, go_backwards = False
, mode = None
, name = None ):
"""
This function constructs and applies a Scan op to the provided
arguments.
:param fn:
``fn`` is a function that describes the operations involved in one step
of ``scan``. ``fn`` should construct variables describing the output of
one iteration step. It should expect as input theano variables
representing all the time slices of the input sequences and outputs,
and all other arguments given to scan as ``non_sequences``. The order
in which scan passes this variables to ``fn`` is the following :
* all time slices of the first sequence
* all time slices of the second sequence
* ...
* all time slices of the last sequence
* all time slices of the first output
* all time slices of the second otuput
* ...
* all time slices of the last output
* all other arguments (the list given as `non_sequences` to
scan)
The order of the sequences is the same as the one in the list
`sequences` given to scan. The order of the outputs is the same
as the order of ``output_info``. For any sequence or output the
order of the time slices is the same as the order of the time
taps provided. For example if one writes the following :
.. code-block:: python
scan(fn, sequences = [ dict( Sequence1, taps = [-3,2,-1])
, Sequence2
, dict( Sequence3, taps = 3) ]
, outputs_info = [ dict( Output1, taps = [-3,-5])
, dict( Output2, taps = None)
, Output3 ]
, non_sequences = [ Argument1, Argument2])
``fn`` should expect the following arguments in this given order:
#. ``Sequence1[t-3]``
#. ``Sequence1[t+2]``
#. ``Sequence1[t-1]``
#. ``Sequence2[t]``
#. ``Sequence3[t+3]``
#. ``Output1[t-3]``
#. ``Output1[t-5]``
#. ``Output3[t-1]``
#. ``Argument1``
#. ``Argument2``
The list of ``non_sequences`` can also contain shared variables
used in the function, though ``scan`` is able to figure those
out on its own so they can be skipped. For the clarity of the
code we recommend though to provide them to scan.
The function is expected to return two things. One is a list of
outputs ordered in the same order as ``outputs_info``, with the
difference that there should be only one output variable per
output initial state (even if no tap value is used). Secondly
`fn` should return an update dictionary ( that tells how to
update any shared variable after each iteration ste). The
dictionary can optionally be given as a list of tuples. There is
no constraint on the order of these two lists, ``fn`` can return
either ``(outputs_list, update_dictionary)`` or ``(update_dictionary,
outputs_list)`` or just one of the two (in case the other is
empty).
:param sequences:
``sequences`` is the list of Theano variables or dictionaries
describing the sequences ``scan`` has to iterate over. If a
sequence is given as wrapped in a dictionary a set of optional
information can be provided about the sequence. The dictionary
should have the following keys:
* ``input`` (*mandatory*) -- Theano variable representing the
sequence.
* ``taps`` -- Temporal taps of the sequence required by ``fn``.
They are provided as a list of integers, where a value ``k`` implies
that at iteration step ``t`` scan will pass to ``fn`` the slice
``t+k``. Default value is ``[0]``
Any Theano variable in the list ``sequences`` is automatically
wrapped into a dictionary where ``taps`` is set to ``[0]``.
:param outputs_info:
``outputs_info`` is the list of Theano variables or dictionaries
describing the initial state of the outputs computed
recurrently. When this initial state is given as a dictionary,
optional information can be provided about the output corresponding
to these initial states. The dictionary should have the following
keys:
* ``initial`` -- Theano variable that represents the initial
state of a given output. In case the output is not computed
recursively (think of a map) and does not require an initial
state this field can be skipped. Given that only the previous
time step of the output is used by ``fn`` the initial state
should have the same shape as the output. If multiple time
taps are used, the initial state should have one extra
dimension that should cover all the possible taps. For example
if we use ``-5``, ``-2`` and ``-1`` as past taps, at step 0,
``fn`` will require (by an abuse of notation) ``output[-5]``,
``output[-2]`` and ``output[-1]``. This will be given by
the initial state, which in this case should have the shape
(5,)+output.shape. If this variable containing the initial
state is called ``init_y`` then ``init_y[0]`` *corresponds to*
``output[-5]``; ``init_y[1]`` *corresponds to* ``output[-4]``;
``init_y[2]`` corresponds to ``output[-3]``; ``init_y[3]``
coresponds to ``output[-2]``; ``init_y[4]`` corresponds to
``output[-1]``. While this order might seem strange, it comes
naturally from splitting an array at a given point. Assume that
we have a array ``x``, and we choose ``k`` to be time step
``0``. Then our initial state would be ``x[:k]``, while the
output will be ``x[k:]``. Looking at this split, elements in
``x[:k]`` are ordered exactly like those in ``init_y``.
* ``taps`` -- Temporal taps of the output that will be passed to
``fn``. They are provided as a list of *negative* integers,
where a value ``k`` implies that at iteration step ``t`` scan will
pass to ``fn`` the slice ``t+k``.
* ``inplace`` -- DEPRECATED. Previously, one could specify with this
option whether the output should overwrite some particular input,
but it is now inferred automatically. If you specify this option
it will be ignored.
* ``return_steps`` -- Integer representing the number of steps
to return for the current steps. For example, if ``k`` is
provided, ``scan`` will return ``output[-k:]``. This is meant as a
hint, based on ``k`` and the past taps of the outputs used, scan
can be smart about the amount of memory it requires to store
intermediate results. If not given, or ``0``, ``scan`` will return
all computed steps.
* ``store_steps`` -- Integer representing the number of
intermediate steps ``scan`` should use for a given output. Use
this key only if you really know what you are doing. In general
it is recommended to let scan decide for you the amount of memory
it should use.
``scan`` will follow this logic if partial information is given:
* If an output is not wrapped in a dictionary, ``scan`` will wrap
it in one assuming that you use only the last step of the output
(i.e. it makes your tap value list equal to [-1]) and that it is
not computed inplace.
* If you wrap an output in a dictionary and you do not provide any
taps but you provide an initial state it will assume that you are
using only a tap value of -1.
* If you wrap an output in a dictionary but you do not provide any
initial state, it assumes that you are not using any form of
taps.
* If you provide ``None`` instead of a variable or a dictionary
``scan`` assumes that you will not use any taps for this output
(like for example in case of a map)
If ``outputs_info`` is an empty list or None, ``scan`` assumes
that no tap is used for any of the outputs. If information is
provided just for a subset of the outputs an exception is
raised (because there is no convention on how scan should map
the provided information to the outputs of ``fn``)
:param non_sequences:
``non_sequences`` is the list of arguments that are passed to
``fn`` at each step. It is not necessary to list shared variables
used in ``fn`` here, since they will be identified automatically.
:param n_steps:
``n_steps`` is the number of steps to iterate given as an int
or Theano scalar. If any of the input sequences do not have
enough elements, scan will produce a warning and run only for
the maximal amount of steps it can. If the *value is 0* the
outputs will have *0 rows*. If the value is negative, ``scan``
run backwards in time. If the ``go_backwards`` flag is already
set and also ``n_steps`` is negative, ``scan`` will run forward
in time. If ``n_steps`` is not provided, or evaluates to ``None``,
``inf`` or ``NaN``, then ``scan`` will figure out the amount of
steps it should run given its input sequences.
:param truncate_gradient:
``truncate_gradient`` is the number of steps to use in truncated
BPTT (backpropagation through time). If you compute gradients
through a scan op, they are
computed using backpropagation through time. By providing a
different value than -1, you choose to use truncated BPTT instead
of classical BPTT, where you go for only ``truncate_gradient``
number of steps back in time.
:param go_backwards:
``go_backwards`` is a flag indicating if ``scan`` should go
backwards through the sequences. If you think of each sequence
as indexed by time, making this flag True would mean that
``scan`` goes back in time, namely that for any sequence it
starts from the end and goes towards 0.
:param name:
When profiling ``scan`` it is crucial to provide a name for any
instance of ``scan``. The profiler will produce an overall
profile of your code as well as profiles for doing one iteration
step for each instance of ``scan``. The ``name`` of the instance is
how you differentiate between all these profiles.
:param mode:
It is recommended to leave this argument to None, especially
when profiling ``scan`` (otherwise the results are not going to
be accurate). If you prefer the computations of one step of
``scan`` to be done differently compared to the entire function, set
this parameters (see ``theano.function`` for details about
possible values and their meaning).
:rtype: tuple
:return: tuple of the form (outputs, updates); ``outputs`` is either a
Theano variable or a list of Theano variables representing the
outputs of ``scan`` (in the same order as in
``outputs_info``). ``updates`` is a dictionary specifying the
update rules for all shared variables used in the scan
operation. This dictionary should be passed to ``theano.function``
when you compile your function.
"""
# General observation : this code is executed only once, at creation
# of the computational graph, so we don't yet need to be smart about
# anything (to speed things up)
# check if inputs are just single variables instead of lists
if sequences == None:
seqs = []
elif not (type(sequences) in (list, tuple)):
seqs = [sequences]
else:
seqs = sequences
if outputs_info == None:
outs_info = []
elif not (type(outputs_info) in (list,tuple)):
outs_info = [outputs_info]
else:
outs_info = outputs_info
if non_sequences == None:
non_seqs = []
elif not (type(non_sequences) in (list,tuple)):
non_seqs = [non_sequences]
else:
non_seqs = non_sequences
# If we provided a known number of steps (before compilation)
# and if that number is 1 or -1, then we can skip the Scan Op,
# and just apply the inner function once
# To do that we check here to see the nature of n_steps
if type(n_steps) in (float,int):
n_fixed_steps = int(n_steps)
else:
# also check if this value happens to be a constant,
# then we could do the same
try :
n_fixed_steps = opt.get_constant_value(n_steps)
except:
n_fixed_steps = None
# compute number of sequences and number of outputs
n_seqs = len(seqs)
n_outs = len(outs_info)
# initialize the inplace map, sequences map and
# outputs map
''' Details:
The scan op identifies different properties attached
to input tensors by their order in the input list.
These maps ( inplace, sequence_taps, output_taps,
store_steps, return_steps) go from the index of an input to
its properties. Note that inputs are always first, followed
by outputs. Since we always know the number of inputs we
index the outputs from 0 ( so sometimes you will need to
do something like outputs_taps[i-n_ins]
'''
inplace_map = {}
sequences_taps = {}
outputs_taps = {}
# Assume that for any output we want to store everything that it produces
store_steps = []
return_steps = {}
# wrap sequences in a dictionary if they are not already dictionaries
# in the same pass create a sequences_taps dictionary
for i in xrange(n_seqs):
if not type(seqs[i]) == dict :
# if it is not a dictionary make it into one
seqs[i] = dict(input=seqs[i], taps=[0])
# see if taps values are provided as a list
elif seqs[i].get('taps',None):
# users can optionally provide the past value (if is just
# one) as a number instead of a list. Wrap it in a list
# to have a uniform way of dealing with inputs later on
if not type(seqs[i]['taps']) in (tuple,list):
seqs[i]['taps'] = [seqs[i]['taps']]
else:
# See if the user actually provided the None value to taps,
# which would indicate that the sequence was provided but
# not used by the internal function; Only if the user has
# not provided anything add the default [0]
# A possible reason to provide a sequence and not use it is
# if you want to compute the output
# inplace of this input; it is a very unlikely behaviour but
# we do want to cover it for completeness
if not seqs[i].has_key('taps'):
seqs[i][taps] = [0]
# Now that our input is well behaved, collect the taps in the
# sequences_taps map that we will use later in the body of scan
# since inputs will be just tensors there
if seqs[i].get('taps',None):
sequences_taps[i] = seqs[i]['taps']
# wrap outputs info in a dictionary if they are not already
# in one and in the same pass create a init_outs_taps dictionary and a inplace map
for i in xrange(n_outs):
if outs_info[i]:
# If output is a dictionary, collect the number of steps the
# user would like scan to return
if type(outs_info[i]) == dict:
if outs_info[i].get('return_steps', None):
return_steps[i] = outs_info[i]['return_steps']
# If you provide the number of steps to store internally,
# (not advocated in the user documentation), then also
# make sure you are returning only those number of steps
if outs_info[i].get('store_steps', None):
store_steps += [outs_info[i].get('store_steps',None)]
return_steps[i] = outs_info[i].get('store_steps',None)
else:
store_steps += [0]
else:
store_steps += [0]
# trying to collect taps of the output
if not type(outs_info[i]) == dict:
# by default any output has a tap value of -1
outs_info[i] = dict(initial=outs_info[i], taps = [-1])
# if there is no initial state but there are taps
# then return an error because it makes no sense
elif (not outs_info[i].get('initial',None)) and \
(outs_info[i].get('taps',None)):
raise ValueError('If you are using slices of an output you need to '\
'provide an initial state for it', outs_info[i])
# if there is an intial state but no tap, we will add the default value
# for taps, namely [-1] ( previous value); note that this will happen
# even though you have provided for taps the value None, which is a bit
# strange (why would one provide an initial state but tell scan not to
# use it ? ), just that in that case we will throw in a warning message
# pointing out this inconsistency
elif outs_info[i].get('initial',None) and \
( not outs_info[i].get('taps',None)):
if outs_info[i].has_key('taps'):
warning('You are providing an initial state for an output and then '
'tell scan not to use it. Why? Scan will overwrite this setting'
' and use the previous value of the provided initial state. If'
' this is not what you wanted, check your code and do not '
'provide the initial state')
outs_info[i]['taps'] = [-1]
else:
# if the output is a None then replace it with an empty dictionary for
# easing up dealing with this case later one ( we can directly call .has_key
# and things like this
outs_info[i] = dict()
store_steps += [0]
if outs_info[i].get('taps', None):
# Create a separate outputs_taps dictionary with all the outputs taps; This
# is how the Scan Op expects this information, separated from the variables
outputs_taps[i] = outs_info[i]['taps']
if outs_info[i].get('inplace', None):
warning("DEPRECATED: you should not set the inplace parameter for an output in scan(...). "
"This can cause problems for the early stages of the optimizer "
"and there is a late optimization which automatically figures it out.")
# The same is true for the inplace info; it has to go into a separate
# dictionary based on index; Note that the input we're replacing should also
# come as an index, therefore we have to look for it at this point
found = None
for k in xrange(n_seqs):
if seqs[k].get('input', None) == outs_info[i].get('inplace',None):
found = k
if found != None:
# NOTE : inplace_map is identical to destroy_map, i.e. it tells what
# output is computed inplace of what input !!
inplace_map[i] = found
else:
raise ValueError('Asked to compute in place of a non-input variable',\
outs_info[i].get('inplace', None))
# create theano inputs for the recursive function
# note : this is a first batch of possible inputs that will
# be compiled in a dummy function; we used this dummy
# function to detect shared variables and their updates
# and to construct a new and complete list of inputs and outputs
args = [] # list of arguments
dummy_notshared_ins = 0 # number of arguments corresponding to input seqs
dummy_notshared_init_outs = 0 # number of arguments corresponding to output seqs
slice_to_seqs = [] # for each slice index of the corresponding input
# go through sequences picking up time slices as needed
for i,seq in enumerate(seqs):
# Note that you can have something like no taps for
# a sequence, though is highly unlikely in practice
if seq.get('taps', None):
# go through the indicated slice
mintap = numpy.min(seq['taps'])
for k in seq['taps']:
# create one slice of the input
'''
Later on, if we decide not to use scan because we are going
for just one step, it makes things easier if we compute the
correct outputs here. This way we can use the output of the
lambda expression directly to replace the output of scan.
If not we need to use copies, that will be replaced at each
frame by the corresponding slice
'''
if n_fixed_steps not in [1,-1]:
nw_slice = seq['input'][0].type()
elif n_fixed_steps == 1:
nw_slice = seq['input'][k-mintap]
else:
nw_slice = seq['input'][-1+mintap-k]
# Add names to slices for debugging and pretty printing ..
# that is if the input already has a name
if seq['input'].name:
if seq['taps'][k] > 0:
nw_slice.name = seq['input'].name + '[t+%d]'%seq['taps'][k]
elif seq['taps'][k] == 0:
nw_slice.name = seq['input'].name + '[t]'
else:
nw_slice.name = seq['input'].name + '[t%d]'%seq['taps'][k]
args.append(nw_slice)
# Specify to whom this slice belongs
slice_to_seqs.append(i)
# Any slice is not a shared variable, even though the sequence
# from where we pick the slices is shared, therefore we should
# increase the number of notshared inputs to the dummy function
# by the number of slices
dummy_notshared_ins += len(seq['taps'])
# go through outputs picking up time slices as needed
for i,init_out in enumerate(outs_info):
# Note that our convention dictates that if an output uses
# just the previous time step, as an initial state we will only provide
# a tensor of the same dimension as one time step; This makes code
# much cleaner for those who do not use taps. Otherwise they would
# always had to shape_pad_left the initial state .. which is ugly
if init_out.get('taps', None) == [-1]:
if n_fixed_steps in [-1,1]:
args += [init_out['initial']]
else:
args += [init_out['initial'].type()]
# Added name to slices for debugging and pretty printing
if init_out['initial'].name:
args[-1].name = init_out['initial'].name+'[t-1]'
# we need to specify in slice_seqs to which output this
# slice belongs; Because we might get confused afterwards
# if a number is an index of a sequence or an output, and
# because we do not want to create yet another list, we will
# add the number of sequences + the current output. This makes
# decoding easy and spares us from writing a lot of lines
slice_to_seqs += [ i+n_seqs ]
dummy_notshared_init_outs += 1
elif init_out.get('taps',None):
if numpy.any(numpy.array(init_out.get('taps',[])) > 0):
# Make sure we do not have requests for future values of a sequence
# we can not provide such values
raise ValueError('Can not use future taps of outputs', init_out)
# go through the taps
minstep = abs(numpy.min(init_out['taps']))
for k in init_out['taps']:
# create a new slice
if n_fixed_steps in [1,-1]:
nw_slice = init_out['initial'][k+minstep]
else:
nw_slice = init_out['initial'][0].type()
# give it a name or debugging and pretty printing
if init_out['initial'].name:
if k > 0:
nw_slice.name = init_out['initial'].name + '[t+%d]'%k
elif k == 0:
nw_slice.name = init_out['initial'].name + '[t]'
else:
nw_slice.name = init_out['initial'].name + '[t%d]'%k
args.append(nw_slice)
# indicate the output index + n_seqs ( see above why)
slice_to_seqs.append(i + n_seqs)
# add as many slices as there are taps
dummy_notshared_init_outs += len(init_out['taps'])
#NOTE: there is another case, in which we do not want to provide any previous
# value of the output to the inner case; in this case we do not have to do
# anything ..
# remove shared variables from the non sequences list
# such that we can compile the function ( the user has the option to add them when
# writing scan, because in some situations this might make the code more readable)
notshared_other_args = []
for non_seq in non_seqs:
if not isinstance(non_seq, SharedVariable):
notshared_other_args += [non_seq]
# add only the not shared variables to the arguments of the dummy
# function [ a function should not get shared variables as input ]
dummy_args = []
for arg in args:
if not isinstance(arg, SharedVariable):
dummy_args += [arg]
dummy_args += notshared_other_args
# arguments for the lambda expression that gives us the output
# of the inner function
args += non_seqs
# when we apply the lambda expression we get a mixture of update rules
# and outputs that needs to be separated
outputs_updates = fn(*args)
# The code that follows tries to be as flexible as possible allowing the
# user to return the output and updates in any order, and giving the updates
# however he wants ( as a dictionary or a list o pairs ..)
# Is there a way to compress all this by writing it in a more python/functional way?
outputs = []
updates = {}
# we will try now to separate the outputs from the updates
if not type(outputs_updates) in (list,tuple):
if type(outputs_updates) == dict :
# we have just an update dictionary
updates = outputs_updates
else:
outputs = [outputs_updates]
else:
elem0 = outputs_updates[0]
elem1 = outputs_updates[1]
t_el0 = type(elem0)
t_el1 = type(elem1)
if t_el0 == dict or ( t_el0 in (list,tuple) and type(elem0[0]) in (list,tuple)):
# elem0 is the updates dictionary / list
updates = elem0
outputs = elem1
if not type(outputs) in (list,tuple):
outputs = [outputs]
elif ( type(elem1) == dict) or \
( type(elem1) in (list,tuple) and type(elem1[0]) in (list,tuple)):
# elem1 is the updates dictionary / list
updates = elem1
outputs = elem0
if not type(outputs) in (list,tuple):
outputs = [outputs]
else :
if type(outputs_updates) in (list,tuple) and \
(type(outputs_updates[0]) in (list,tuple)):
outputs = []
updates = outputs_updates
else:
outputs = outputs_updates
updates = {}
# in case you return a tuple .. convert it to a list (there are certain
# operation that are not permited on tuples, like element assignment)
outputs = list(outputs)
# If you return numbers (highly unlikely) this will not go well for theano
# We need to convert them to Theano constants
for i,out in enumerate(outputs):
outputs[i] = tensor.as_tensor(out)
# We can now compile a dummy function just to see what shared variable
# we have and what are their update rules (note that the user has
# the option not to pass the shared variable to scan, so we need to
# pick them manually and add them to scan)
# make the compilation as fast as possible by not applying any optimization
# or conversion to C [ note this region is not important for performance
# so we can do stuff as unoptimal as we wish ]
if n_fixed_steps in [-1,1]:
''' We do have a special case here, namely is so might happen that
whatever we have in dummy_args is not sufficient to compile the
function( i.e. missing inputs). Furthermore we might not even need
to compile the function here for this special case. But due to the
way I wrote the code is easier to have a compiled function here
that I can ignore later. Plus it is easier this way to take care
of shared variables with non-default updates. Therefore only for
this case I need to use gof.graph.inputs to look for the real inputs
so that I can compile the function. RP '''
dummy_f = function(filter(lambda x: isinstance(x, gof.Variable) and \
not isinstance(x,SharedVariable) and not isinstance(x,gof.Constant), \
gof.graph.inputs(dummy_args)), outputs, updates = updates, mode = compile.mode.Mode(linker='py',optimizer=None))
else:
dummy_f = function(filter(lambda x: isinstance(x, gof.Variable) and \
not isinstance(x,SharedVariable) and not isinstance(x,gof.Constant), \
dummy_args), outputs, updates = updates, mode = compile.mode.Mode(linker='py',optimizer=None))
# We now look at what outputs our function returns
inner_fn_outs = [ out.variable for out in dummy_f.maker.outputs]
update_map = {}
shared_outs = []
shared_non_seqs = []
givens = {}
# if the number of outputs to the function does not match the number of
# assumed outputs until now (provided by the initial case) there can be
# only one explanation that we now how to deal with. Namely no information
# is provided for any outputs which will indicate that we deal with a map,
# i.e. we never use previous values of outputs
if len(inner_fn_outs) != n_outs:
if outs_info == []:
# We know how to deal with this case, assume that none of the outputs
# are required to have any sort of time taps
# we just need to update the number of actual outputs
n_outs = len(inner_fn_outs)
# other updates :
for i in xrange(n_outs):
outs_info += [ dict() ]
# we also need to re-initialize the store_steps list to match the
# number of outputs
store_steps = [ 0 for i in xrange(n_outs)]
else:
# Otherwise there is a bit of confusion, since Scan works on the index of
# a sequence /output. There are maybe corner cases that could be added here
# or defult behaviour ( like always add the extra outputs at the end !?)
# But I did not bother implementing this, I leave it to the user to clearly
# express what he/she wants to do
raise ValueError('Scan is totally lost. Make sure that you indicate for each'
' output what taps you want to use, or None, if you do not want to'
' use any !')
inner_fn_inputs=[input.variable for input in \
dummy_f.maker.expanded_inputs[:dummy_notshared_ins+dummy_notshared_init_outs]]
# Keep track of the range (place) where you insert shared variables with updates
# Because we will not be able to compute the gradient with respect to those variables
# inner_fn_notshared_ins_idx is from where these shared variables with updates start
inner_fn_notshared_ins_idx = dummy_notshared_ins + dummy_notshared_init_outs
# Because scan is particularly sensitive at the order in which it gets its
# arguments, we need to separete the shared variables that act as outputs
# from those that are not outputs of the network as well
n_extended_outs = n_outs
# Skip the slices that we've added to the inner_fn which will be the first elements
# of f.maker.epanded_inputs and which we know that are not shared
fromIdx = dummy_notshared_ins + dummy_notshared_init_outs
copy_map = {}
for input in dummy_f.maker.expanded_inputs[fromIdx:] :
# If input is a shared variable that gets updated, then
# this shared variable will be an output of our inner function
if isinstance(input.variable, SharedVariable) and input.update:
# Create a copy of it
new_var = input.variable.type()
if input.variable.name:
new_var.name = input.variable.name + '_copy'
copy_map[new_var] = input.variable
inner_fn_inputs.append(new_var)
# add it to the slices at the end
slice_to_seqs += [ n_extended_outs ]
inner_fn_outs += [input.update]
update_map[ input.variable ] = n_extended_outs
# We know that we only have access to the last step
outputs_taps[ n_extended_outs ] = [-1]
n_extended_outs += 1
# we shouldn't try to store more then the last step
# this might not even be a tensor ! ( RandomState )
store_steps += [1]
return_steps[n_extended_outs -1] = 1
shared_outs += [input.variable]
givens[input.variable] = inner_fn_inputs[-1]
# inner_fn_shared_ins_idx stores where we stop having shared variables with updates
inner_fn_shared_ins_idx = len(inner_fn_inputs) - inner_fn_notshared_ins_idx
# Now that we took out the shared variables that have an update rule
# we need to take care of all the other shared variables
for input in dummy_f.maker.expanded_inputs[fromIdx:] :
# make sure that we do not add the same shared variable twice
if isinstance(input.variable, SharedVariable) and not input.update:
shared_non_seqs += [input.variable]
new_var = input.variable.type()
if input.variable.name:
new_var.name = input.variable.name + '_copy'
inner_fn_inputs += [new_var]
slice_to_seqs += [ n_extended_outs]
givens[input.variable] = inner_fn_inputs[-1]
copy_map[inner_fn_inputs[-1]] = input.variable
elif not isinstance(input.variable, SharedVariable):
# also add the normal tensor that are non sequences at the
# end of the inputs intertwingled with the shared variables
inner_fn_inputs.append(input.variable)
# If we haven't provided a number of steps nor did we provide a sequence
# scan will not know how long to iterate
if (n_steps == None or n_steps == numpy.inf or n_steps == numpy.nan) and n_seqs == 0 :
raise ValueError('Scan does not know for how many steps to iterate. '
'You need to provide the number of steps through the '
' ``n_steps`` argument if you do not iterate over any sequence')
# We can now create the Scan Op Object
if n_fixed_steps not in [1,-1]:
if n_steps != None:
n_steps = tensor.as_tensor(n_steps)
else:
n_steps = gof.Constant(gof.generic, 'unknown', '?_steps')
local_op = Scan( (inner_fn_inputs,inner_fn_outs, givens, slice_to_seqs ), n_seqs,
n_extended_outs, inplace_map, sequences_taps, outputs_taps, n_steps,truncate_gradient,
# n_outs, inner_fn_notshared_ins_idx and inner_fn_shared_ins_idx are used by the gradient
# to figure out where in the input are shared variables with updates, for whom I can't compute
# a gradient
n_outs, inner_fn_notshared_ins_idx, inner_fn_shared_ins_idx,
go_backwards, store_steps, return_steps, mode, name = name )
# Shortcut for attaching this property to the Scan op
local_op.copy_map = copy_map
# Call the object on the input sequences, initial values for outs,
# and non sequences
for seq in seqs :
if not seq.get('input', None):
raiseValue('All input sequences should provide')
unwrapped_seqs = [ seq.get('input',tensor.as_tensor(0.)) for seq in seqs ]
unwrapped_outs = [ out.get('initial',tensor.as_tensor(0.)) for out in outs_info ]
values = local_op( *( [n_steps]
+ unwrapped_seqs
+ unwrapped_outs
+ shared_outs
+ notshared_other_args
+ shared_non_seqs))
else:
# If we do not actually need scan
for pos, inner_out in enumerate(inner_fn_outs):
if isinstance(inner_out.type, tensor.TensorType) and store_steps[pos] != 1:
inner_fn_outs[pos] = tensor.unbroadcast( tensor.shape_padleft(inner_out),0)
values = inner_fn_outs
if not type(values) in (tuple, list):
values = [values]
# take out the updates of shared variable and build the dictionary
# that tells what to update and with what value
for val in update_map.keys():
update_map[val] = values [ update_map[val] ]
# Now we need to check the values returned
# if it just one strip the list around it
if n_outs == 1:
# if we need to return just one step or several steps
# note that when we return one step we have two cases, in
# the first one store_steps is set to 1, case in which we don't
# need to take a slice of the output (is already of the right
# dimension) and case 2 when we store more then one step,
# and we actually need to take a slice
if return_steps.has_key(0):
if return_steps[0] > 1:
values = values[0][-return_steps[0]:]
else:
if store_steps[0] == 1:
values = values[0]
else:
values = values[0][-1]
else:
values = values[0]
else:
values = values[:n_outs]
for idx,val in enumerate(values):
if return_steps.has_key(idx):
if return_steps[idx] > 1:
values[idx] = val[-return_steps[idx]:]
else:
if store_steps[idx] == 1:
values[idx] = val
else:
values[idx] = val[-1]
return (values, update_map)
class Scan(Op):
#
# OLD DOCUMENTATION CAN BE FOUND NEAR REVISION 2581
#
def __init__(self, ins, n_seqs, n_outs,
inplace_map={}, seqs_taps={}, outs_taps={},
n_steps = gof.Constant(gof.generic, 'unknown', '?_steps'),
truncate_gradient = -1, n_outs_not_shared =0,
inner_fn_start_shared = 0, inner_fn_end_shared = 0,
go_backwards = False, store_steps = {},
return_steps={}, mode = None, inplace=False, name = None):
'''
:param (inputs,outputs, givens,slice_to_seqs):
inputs and outputs Theano variables that describe the function that is
applied recursively; givens list is used to replace shared
variables with not shared ones; slice_to_seqs is a convinience list that
tells which of the inputs is slice to which of the sequences
:param n_seqs: number of sequences over which scan will have to
iterate
:param n_outs: number of outputs of the scan op
:param inplace_map: see scan function above
:param seqs_taps: see scan function above
:param outs_taps: see scan function above
:param truncate_gradient: number of steps after which scan should
truncate -1 implies no truncation
:param go_bacwards: see scan funcion above
:param store_steps:
a list of booleans of same size as the number of outputs; the value at position
``i`` in the list corresponds to the ``i-th`` output, and it tells how many
steps (from the end towards the begining) of the outputs you really need and should
return; given this information, scan can know (if possible) to allocate only
the amount of memory needed to compute that many entries
:param name: see scan fct
:param mode: see scan fct
'''
inputs, outputs, givens, slice_to_seqs = ins
# build a list of output types for any Apply node using this op.
self.apply_output_types = []
for i, o in enumerate(outputs):
if 1 == store_steps[i]:
self.apply_output_types.append(o.type)
else:
expanded_otype = TensorType(
broadcastable=(False,)+o.type.broadcastable,
dtype=o.type.dtype)
self.apply_output_types.append(expanded_otype)
self.destroy_map = {}
if inplace:
for i in inplace_map.keys():
# the n_steps is always the first argument of scan's perform,
# so we need to shift everything by 1
self.destroy_map.update({i: [inplace_map[i]+1] } )
# make all inplace inputs mutable for the inner function for extra efficency
for idx in xrange(len(inputs)):
# get seq number
n_seq = slice_to_seqs[idx]
if n_seq in inplace_map.keys():
if type(inputs[n_seq]) is Param:
inputs[n_seq].mutable = True
else:
inputs[n_seq] = Param( inputs[n_seq], mutable = True)
self.seqs_taps = seqs_taps
self.outs_taps = outs_taps
self.n_seqs = n_seqs
self.n_outs = n_outs
self.n_args = n_seqs+n_outs+1
self.inplace_map = inplace_map
self.store_steps = store_steps
self.inplace = inplace
self.inputs = inputs
self.return_steps = return_steps
self.givens = givens
self.n_outs_not_shared = n_outs_not_shared
self.inner_fn_start_shared = inner_fn_start_shared
self.inner_fn_end_shared = inner_fn_end_shared
self.outputs = outputs
self.n_steps = n_steps # It will be computed at runtime
# This is here just for an optimization to be able to pick up if
# scan is really needed in the graph; if the number of steps
# scan does is a constant of 1, -1 or 0 then we can remove scan
# from the graph
self.mode = mode
self.name = name
self.truncate_gradient = truncate_gradient
self.go_backwards = go_backwards
self.slice_to_seqs = slice_to_seqs
mode_instance = compile.mode.get_mode(mode)
#if we use the default mode and it is a ProfileMode
#we must make a copy otherwise in the profile their will time counted many times
#1) The scan op and its time will include all time spend into the inner node.
#2) The inner scan op with their real time.
#This is done for the Scan and ScanGred op
if mode is None and isinstance(mode_instance, compile.profilemode.ProfileMode):
mode_instance = compile.profilemode.ProfileMode(
optimizer=mode_instance.provided_optimizer,
linker=mode_instance.provided_linker)
compile.profilemode.prof_mode_instance_to_print.append(mode_instance)
self.mode_instance = mode_instance
if self.name:
self.mode_instance.message=self.name+" sub profile"
else:
self.mode_instance.message="Scan sub profile"
if name is None: name = 'scan_fn'
self.fn = function(inputs,outputs, mode = mode_instance, givens = givens,
name = name)
# asert that we don't have shasred variables anymore ( we replaced them
# with non shared versions)
assert not numpy.any([isinstance(x.variable,SharedVariable) for x in
self.fn.maker.inputs])
def __str__(self):
if self.name:
return self.name
else:
return 'scan'
def make_node(self,*inputs):
assert all(isinstance(i, gof.Variable) for i in inputs)
self.n_steps = inputs[0]
return Apply(self, inputs, [t() for t in self.apply_output_types])
def __eq__(self,other):
# the self.apply_output_types are a function of all these things
# no need to compare it as well
rval = type(self) == type(other)
if rval:
rval = (self.inputs == other.inputs) and \
(self.outputs == other.outputs) and \
(self.givens == other.givens) and \
(self.store_steps == other.store_steps) and \
(self.seqs_taps == other.seqs_taps) and \
(self.outs_taps == other.outs_taps) and \
(self.inplace_map == other.inplace_map) and \
(self.return_steps == other.return_steps) and \
(self.n_outs_not_shared == other.n_outs_not_shared) and \
(self.inner_fn_start_shared == other.inner_fn_start_shared) and\
(self.inner_fn_end_shared == other.inner_fn_end_shared) and \
(self.mode == other.mode) and \
(self.n_seqs == other.n_seqs) and\
(self.inplace == other.inplace) and\
(self.go_backwards == other.go_backwards) and\
(self.truncate_gradient == other.truncate_gradient) and\
(self.n_outs == other.n_outs) and\
(self.n_args == other.n_args)
return rval
def __hash__(self):
# the self.apply_output_types are a function of all these things
# no need to compare it as well
return hash(type(self)) ^ \
hash(self.n_seqs) ^ \
hash(self.n_outs) ^ \
hash(self.n_outs_not_shared) ^ \
hash(self.inner_fn_start_shared) ^\
hash(self.inner_fn_end_shared) ^\
hash(self.inplace) ^\
hash(self.go_backwards) ^\
hash(self.truncate_gradient) ^\
hash(self.n_args) ^ \
hash(self.mode) ^\
hash_listsDictsTuples(self.outputs) ^ \
hash_listsDictsTuples(self.inputs) ^ \
hash_listsDictsTuples(self.givens) ^ \
hash_listsDictsTuples(self.seqs_taps) ^\
hash_listsDictsTuples(self.outs_taps) ^\
hash_listsDictsTuples(self.return_steps) ^\
hash_listsDictsTuples(self.store_steps)
def perform(self,node,args, outs):
"""
The args are packed like this:
n_steps
X sequence inputs x_1, x_2, ... x_<self.n_seqs>
Y initial states (u_1, u_2, ... u_<self.n_outs>) for our outputs. Each must have appropriate length (T_1, T_2, ..., T_Y).
W other inputs w_1, w_2, ... w_W
There are at least 1 + self.n_seqs + self.n_outs inputs, and the ones above this number
are passed to the scanned function as non-sequential inputs.
The outputs are more straightforward:
Y sequence outputs y_1, y_2, ... y_<self.n_outs>
"""
n_steps = args[0]
if n_steps != 'unknown':
n_steps = int(n_steps)
if n_steps < 0:
n_steps = abs(n_steps)
go_backwards = not self.go_backwards
else:
go_backwards = self.go_backwards
else:
n_steps = None
go_backwards = self.go_backwards
if (self.n_seqs == 0 ) and (not numpy.isfinite(n_steps) ):
raise ValueError('Scan does not know how many steps it '
'should iterate! Either provide some input sequences from '
'which scan could find out the number of steps, or directly'
'the number of steps you want through the n_steps argument.')
for i in xrange(self.n_seqs):
if self.seqs_taps.has_key(i):
# compute actual length of the sequence ( we need to see what
# past taps this sequence has, and leave room for them
seq_len = args[i+1].shape[0] + min(self.seqs_taps[i])
if max( self.seqs_taps[i]) > 0:
# using future values, so need to end the sequence earlier
seq_len -= max(self.seqs_taps[i])
if n_steps == None :
# length of the sequences, leaving room for the largest
n_steps = seq_len
if seq_len != n_steps :
if seq_len > n_steps:
warning('Input sequence is longer then required. '
'Extra values will be ignored')
else:
warning(' Input sequence is shorter then the number '
'of steps scan was suppose to do. Readjusting'
'the number of steps scan will iterate ... ')
n_steps = min(seq_len,n_steps)
# check if we deal with an inplace operation
inplace_map = self.inplace_map
if not self.inplace: #if it was not optimized to work inplace
inplace_map = {}
# check lengths of init_outs
for i in xrange(self.n_seqs+1, self.n_seqs+self.n_outs+1):
if self.outs_taps.has_key(i-self.n_seqs-1):
if self.outs_taps[i-self.n_seqs-1] != [-1]:
req_size = abs(min(self.outs_taps[i-self.n_seqs-1]))-1
if args[i].shape[0] < req_size:
warning(('Initial state for output %d has fewer values then '
'required by the maximal past value %d. Scan will use 0s'
' for missing values')%(i-self.n_iterable-1,req_size))
y = self.scan(self.fn, args[1:],self.n_seqs, self.n_outs,
self.seqs_taps, self.outs_taps, n_steps, go_backwards,
inplace_map)
for i in xrange(self.n_outs):
if self.store_steps[i] > 1 :
# we need to reorder the steps .. to have them in the correct order
# we use numpy advanced indexing for this
# index order :
index_order = range(self.idx_store_steps[i],self.store_steps[i]) + \
range(self.idx_store_steps[i])
outs[i][0] = y[i][index_order]
else:
outs[i][0] = y[i]
def scan(self, fn, args, n_seqs, n_outs, seqs_taps, outs_taps, n_steps, go_backwards, inplace_map):
''' Actual loop of the scap op perform function '''
# Note that we removed the n_steps from the args for this function, so the
# order of arguments is slightly different compared to perform
y = []
# When you have taps, you need to leave borders in your sequences, initial outputs
# for those taps; here we compute what are those borders for sequences
seqs_mins = {}
for j in xrange(n_seqs):
if seqs_taps.has_key(j):
seqs_mins.update({j: min(seqs_taps[j])})
# create storage space for the outputs ( using corresponding inputs if we are
# dealing with inplace operations
# `idx_store_steps` is a dictionary telling us the current position in y of an
# output where we want to store only the last k steps
self.idx_store_steps = {}
for i in xrange(n_outs):
if inplace_map.has_key(i) and seqs_taps.has_key(inplace_map[i]) and\
seqs_taps[inplace_map[i]] >=0:
y += [args[inplace_map[i]][:n_steps]]
else:
# check if you are using past value .. through in a warning and do not
# work inplace
if inplace_map.has_key(i) and seqs_taps.has_key(inplace_map[i]) and\
seqs_taps[inplace_map[i]] < 0:
warning('Can not work inplace because of past values')
if self.store_steps[i] == 1 :
y+= [ None ]
else:
arg_shape = args[i+n_seqs].shape[1:]
if (not self.outs_taps.has_key(i)) or self.outs_taps[i] == [-1]:
arg_shape = args[i+n_seqs].shape
if self.store_steps[i] < 1 :
y_shape = (n_steps,)+arg_shape
else:
# we need to store only a fixed number of steps of our output
self.idx_store_steps[i] = 0
y_shape = (self.store_steps[i],)+arg_shape
y += [numpy.empty(y_shape, dtype=args[i+n_seqs].dtype)]
# and here we compute the borders for initial states of outputs
outs_mins = {}
initOuts_size = {}
for j in xrange(n_outs):
if outs_taps.has_key(j):
outs_mins.update({j: min(outs_taps[j])})
if self.outs_taps[j] != [-1]:
initOuts_size.update({j: args[n_seqs+j].shape[0]})
else:
initOuts_size.update({j: 0})
############## THE MAIN LOOP ############################
for i in xrange(n_steps):
fn_args = []
# sequences over which scan iterates
# check to see if we are scaning them backwards or no
# and get a new index ``_i`` accordingly
_i = i
if go_backwards:
_i = n_steps-1-i
# collect data from sequences
for j in xrange(n_seqs):
# get borders
if seqs_taps.has_key(j):
ls_taps = seqs_taps[j]
min_tap = seqs_mins[j]
for tap_value in ls_taps:
# use the borders to figure out what value you actually need
k = _i - min_tap + tap_value
fn_args += [args[j][k]]
# past values of outputs
for j in xrange(n_outs):
if outs_taps.has_key(j):
ls_taps = outs_taps[j]
min_tap = outs_mins[j]
sz = initOuts_size[j]
for tap_value in ls_taps:
if i + tap_value < 0:
if sz < 1:
# this is a special case, when our initial state has no
# temporal dimension
fn_args += [args[j+n_seqs] ]
else:
k = i + sz + tap_value
if k < 0:
# past value not provided.. issue a warning and use
# 0s of the correct dtype
fn_args += [numpy.zeros(args[j+n_seqs][0].shape, \
dtype = args[j+n_sqs][0].dtype)]
warning(('Past value %d for output %d not given in '
'inital out') % (j,tap_value))
else:
fn_args += [args[j+n_seqs][k]]
else:
if self.store_steps[j] < 1:
# no limit on how many steps to store from our output
fn_args += [y[j][i + tap_value]]
elif self.store_steps[j] == 1:
# just the last one
fn_args += [y[j] ]
else:
# storing only the last k
# get what idx we want
req_idx = (self.idx_store_steps[j] + tap_value + \
self.store_steps[j])
# we need this modula self.store_steps[j]
req_idx = req_idx % self.store_steps[j]
fn_args += [y[j][req_idx] ]
# get the non-iterable sequences
fn_args += list(args[(n_seqs+n_outs):])
# compute output
something = fn(*fn_args)
#update outputs
for j in xrange(n_outs):
if self.store_steps[j] <1:
# if you have provided no size for the missing output you might
# find yourself here with a incorect array .. if that happens
# realocate memory for the needed array
try :
if hasattr(something[j],'dtype') and (y[j].dtype != \
something[j].dtype) :
raise ValueError('wrong dtype')
y[j][i] = something[j]
except :
y[j]= numpy.empty((n_steps,)+something[j].shape, dtype= \
something[j].dtype)
y[j][i] = something[j]
elif self.store_steps[j] == 1:
try:
if hasattr(something[j],'dtype') and y[j].dtype != \
something[j].dtype:
raise ValueError('wrong dtype')
y[j] = something[j]
except:
y[j] = numpy.empty( something[j].shape, dtype = \
something[j].dtype)
y[j] = something[j]
else:
try:
if hasattr(something[j],'dtype') and y[j].dtype != \
something[j].dtype:
raise ValueError('worng dtype')
y[j][self.idx_store_steps[j]] = something[j]
self.idx_store_steps[j] = (self.idx_store_steps[j] + 1) %\
self.store_steps[j]
except:
y[j] = numpy.empty( (self.store_steps[j],)+something[j].shape, \
dtype = something[j].dtype)
y[j][idx_store_steps[j]] = something[j]
self.idx_store_steps[j] = (self.idx_store_steps[j] + 1) %\
self.store_steps[j]
return y
def grad(self, args, g_outs):
# forward pass - get the outputs after applying scan
scan_outputs = self(*args)
# make sure they are given as a list
if not( type(scan_outputs) in (list,tuple)):
scan_outputs = [scan_outputs]
# get a list of clean inputs ( against which one can compute
# gradients ) [ everything except shared variables with updates ]
clean_inputs = self.inputs[:self.inner_fn_start_shared] + \
self.inputs[self.inner_fn_start_shared + \
self.inner_fn_end_shared:]
clean_inputs = [ self.copy_map.get(x,x) for x in clean_inputs]
s_inputs = [self.copy_map.get(x,x) for x in self.inputs ]
# function that computes the gradient (we sum over the gradients
# with respect to all outputs
def compute_gradient(y, g_y):
gmp = gradient.grad_sources_inputs( \
[(y,g_y)], clean_inputs, False)
def zero(p):
try:
use_dtype = p.type.dtype
except:
use_dtype = theano.config.floatX
return tensor.TensorConstant(tensor.TensorType(\
dtype=use_dtype, broadcastable=[]),
safe_asarray._asarray(0,dtype = use_dtype))
return [gmp.get(p, zero(p)) for p in s_inputs]
# this are g_outs for the inner function (that computes the gradients)
inner_g_outs = []
# the outs of the gradient computting inner function
inner_gfn_outs = []
inner_gfn_ins = []
# Go through the outputs that don't represent update rules
for out in self.outputs[:self.n_outs_not_shared]:
inner_g_out = out.type()
if out.name:
# for debugging add names to all variables I'm creating
g_y.name = 'g_'+out.name
inner_g_outs.append(inner_g_out)
_grad_outs = compute_gradient(out, inner_g_out)
grad_outs = _grad_outs[:self.n_seqs+self.n_outs_not_shared] + \
_grad_outs[self.n_seqs+self.n_outs:]
if not inner_gfn_outs :
inner_gfn_outs = grad_outs
else:
# safety check, some of this inputs might still not be differentiable,
# for those we don't add them to the mix (assume their gradient is 0)
for i,(x,y) in enumerate(zip(grad_outs, inner_gfn_outs)):
if x and y:
inner_gfn_outs[i] = x+y
elif y:
inner_gfn_outs[i] = y
else:
inner_gfn_outs[i] = x
# backwards pass
for i in xrange(len(inner_gfn_outs)):
if inner_gfn_outs[i] == None:
inner_gfn_outs[i] = tensor.zeros_like(clean_inputs[i])
for i in xrange(self.n_outs_not_shared):
# Safety check
if g_outs[i] == None:
try:
# this try is for catching non ndarray inputs (random states)
# it is more of a safety check ( all random states should be
# after n_outs_not_shared ...
g_outs[i] = tensor.zeros_like(scan_outputs[i])
except:
g_outs[i] = theano.tensor.constant(numpy.array(0,dtype=\
theano.config.floatX))
inner_gfn_ins = inner_g_outs + self.inputs
# Make sure you don't have numbers in here
if not isinstance(self.n_steps, Variable):
n_steps = tensor.as_tensor(self.n_steps)
else:
n_steps = self.n_steps
g_args = [n_steps] + g_outs[:self.n_outs_not_shared] \
+ scan_outputs + args[1:]
truncate_gradient = self.truncate_gradient
for x in self.store_steps[:self.n_outs_not_shared]:
if x>0 :
raise ValueError('Can not compute gradients if one does not ',
'store all intermediate results (remove store_steps'
'from the dictionaries describing your outputs)')
g_scan = ScanGrad((inner_gfn_ins, inner_gfn_outs),
self.n_seqs, self.n_outs, self.n_outs_not_shared,
self.go_backwards, self.seqs_taps, self.outs_taps,
truncate_gradient)
g_scan_outs = g_scan(g_args)
if not type(g_scan_outs) in (list, tuple):
g_scan_outs = [ g_scan_outs ]
# We need to add several None's for shared vars with updates
gradients = [None] + g_scan_outs[:self.n_seqs+self.n_outs_not_shared]
gradients += [None for i in xrange(self.n_outs-self.n_outs_not_shared)]
gradients += g_scan_outs[self.n_seqs+self.n_outs_not_shared:]
return gradients
class ScanGrad(Op):
"""Gradient Op for Scan"""
def __init__(self, grads, n_seqs, n_outs,
n_outs_not_shared,
go_backwards = False, seqs_taps = {}, outs_taps= {},
truncate_gradient = -1, mode = None, name = None):
"""
:param mode: see scan fct
:param name: see scan fct
"""
g_ins, g_outs = grads
self.inputs = g_ins
self.outputs = g_outs
self.n_outs_not_shared = n_outs_not_shared
self.n_seqs = n_seqs
self.go_backwards = go_backwards
self.truncate_gradient = truncate_gradient
self.n_outs = n_outs
self.seqs_taps = seqs_taps
self.outs_taps = outs_taps
self.destroy_map = {}
self.mode = mode
mode_instance = compile.mode.get_mode(mode)
#if we use the default mode and it is a ProfileMode
#we must make a copy otherwise in the profile their will time counted many times
#1) The scan op and its time will include all time spend into the inner node.
#2) The inner scan op with their real time.
#This is done for the Scan and ScanGred op
if mode is None and isinstance(mode_instance, compile.profilemode.ProfileMode):
mode_instance = compile.profilemode.ProfileMode(
optimizer=mode_instance.provided_optimizer,
linker=mode_instance.provided_linker)
compile.profilemode.prof_mode_instance_to_print.append(mode_instance)
self.mode_instance = mode_instance
self.mode_instance.message="ScanGrad sub profile"
if name is None: name = 'scan_grad_fn'
self.grad_fn = function(g_ins, g_outs, mode = mode_instance, name = name)
def __eq__(self,other):
rval = type(self) == type(other)
if rval:
rval = (self.inputs == other.inputs) and \
(self.outputs == other.outputs) and \
(self.n_seqs == other.n_seqs) and \
(self.n_outs == other.n_outs) and \
(self.go_backwards == other.go_backwards) and \
(self.n_outs_not_shared == other.n_outs_not_shared) and\
(self.truncate_gradient == other.truncate_gradient) and\
(self.mode == other.mode) and \
(self.seqs_taps == other.seqs_taps) and \
(self.outs_taps == other.outs_taps)
return rval
def __hash__(self):
return hash(type(self)) ^ \
hash(self.n_seqs) ^ \
hash(self.n_outs) ^ \
hash(self.go_backwards) ^\
hash(self.truncate_gradient) ^\
hash(self.mode) ^\
hash_listsDictsTuples(self.inputs) ^ \
hash_listsDictsTuples(self.outputs) ^ \
hash_listsDictsTuples(self.seqs_taps) ^ \
hash_listsDictsTuples(self.outs_taps)
def make_node(self, *args):
# input of the gradient op :
# | g_outs | y | seqs | outs | non_seqs |
# | n_outs | n_outs | n_seqs | n_outs | unknown |
# return
# | grad of seqs | grad of outs | grad of non_seqs |
# | n_seqs | n_outs | unknown |
scan_inputs = args[0][1+self.n_outs_not_shared+self.n_outs:]
outputs_grad = scan_inputs[:self.n_seqs+self.n_outs_not_shared]
outputs_grad += scan_inputs[self.n_seqs+self.n_outs:]
return Apply(self, list(args[0]),
[i.type() for i in outputs_grad ])
def perform(self, node, args, storage):
# get scan inputs
n_steps = args[0]
if n_steps != 'unknown':
n_steps = int(n_steps)
if n_steps < 0:
n_steps = abs(n_steps)
go_backwards = not self.go_backwards
else:
go_backwards = self.go_backwards
else:
n_steps = None
go_backwards = self.go_backwards
inputs = args[self.n_outs_not_shared+self.n_outs+1:]
seqs = inputs[:self.n_seqs]
outInfo = inputs[self.n_seqs:self.n_seqs+self.n_outs]
non_seqs = inputs[self.n_outs+self.n_seqs:]
if (self.n_seqs == 0 ) and (not numpy.isfinite(n_steps) ):
raise ValueError('Scan does not know how many steps it '
'should iterate! Either provide some input sequences from '
'which scan could find out the number of steps, or directly'
'the number of steps you want through the n_steps argument.')
for i in xrange(self.n_seqs):
if self.seqs_taps.has_key(i):
# compute actual length of the sequence ( we need to see what
# past taps this sequence has, and leave room for them
seq_len = seqs[i].shape[0] + min(self.seqs_taps[i])
if max( self.seqs_taps[i]) > 0:
# using future values, so need to end the sequence earlier
seq_len -= max(self.seqs_taps[i])
if n_steps == None :
# length of the sequences, leaving room for the largest
n_steps = seq_len
if seq_len != n_steps :
if seq_len > n_steps:
warning('Input sequence is longer then required. '
'Extra values will be ignored')
else:
warning(' Input sequence is shorter then the number '
'of steps scan was suppose to do. Readjusting'
'the number of steps scan will iterate ... ')
n_steps = min(seq_len,n_steps)
# go back through time to 0 or n_steps - truncate_gradient
lower_limit = n_steps - self.truncate_gradient
length = n_steps
if lower_limit > n_steps-1:
the_range = xrange(n_steps-1,-1,-1)
lower_limit = 0
elif lower_limit < -1:
the_range = xrange(n_steps-1,-1,-1)
lower_limit = 0
else:
the_range = xrange(n_steps-1, lower_limit-1,-1)
lower_limit = lower_limit + 1
# generate space for gradient
if lower_limit != 0 :
length = len(the_range)
g_seqs = []
# Check for taps ==> you need to enlarge the sequence length
for j in xrange(self.n_seqs):
if self.seqs_taps.has_key(j):
length = length - min(self.seqs_taps[j])
length = length + max(self.seqs_taps[j])
g_seqs += [ numpy.zeros_like(seqs[j][:length]) ]
else:
g_seqs = [numpy.zeros_like(k) for k in seqs]
g_outInfo = [numpy.zeros_like(k) \
for k in outInfo[:self.n_outs_not_shared]]
g_non_seqs = [numpy.zeros_like(k) for k in non_seqs]
# get gradient on the outputs
g_outs = [arg.copy() for arg in args[1:self.n_outs_not_shared+1]]
# get the output of the scan operation
outs = args[1+self.n_outs_not_shared:self.n_outs_not_shared+self.n_outs+1]
seqs_mins = {}
for j in xrange(self.n_seqs):
if self.seqs_taps.has_key(j):
seqs_mins.update({j: min(self.seqs_taps[j])})
outs_mins = {}
initOuts_size = {}
for j in xrange(self.n_outs):
if j >= self.n_outs_not_shared:
outs_mins.update({j:-1})
initOuts_size.update({j:0})
elif self.outs_taps.has_key(j):
outs_mins.update({j: min(self.outs_taps[j])})
if self.outs_taps[j] != [-1]:
initOuts_size.update({j:g_outInfo[j].shape[0]})
else:
initOuts_size.update({j:0})
for i in the_range:
# time slice of inputs
_ins = []
_i = i
if go_backwards:
_i = n_steps -1 -i
for j in xrange(self.n_seqs):
if self.seqs_taps.has_key(j):
ls_taps = self.seqs_taps[j]
min_tap = seqs_mins[j]
for tap_value in ls_taps:
k = _i - min_tap + tap_value
_ins += [seqs[j][k]]
# time slice of outputs + taps
_outs = []
for j in xrange(self.n_outs):
if self.outs_taps.has_key(j):
ls_taps = self.outs_taps[j]
min_tap = outs_mins[j]
seed_sz = initOuts_size[j]
for tap_value in ls_taps:
if i + tap_value < 0:
if seed_sz < 1:
_outs += [outInfo[j]]
else:
k = i + seed_sz + tap_value
if k < 0 :
#past value not provided .. issue a warning and use 0
_outs += [numpy.zeros(outInfo[j][0].shape)]
warning('Past value %d for output $d not given' \
%(j,tap_value))
else:
_outs += [outInfo[j][k]]
else:
if j>= self.n_outs_not_shared:
_outs += [outs[j] ]
else:
_outs += [outs[j][i + tap_value]]
g_out = []
g_out = [ arg[i] for arg in g_outs]
grad_args = g_out + _ins + _outs + non_seqs
grads=self.grad_fn(*grad_args)
# get gradient for inputs
pos = 0
for j in xrange(self.n_seqs):
if self.seqs_taps.has_key(j):
ls_taps = self.seqs_taps[j]
min_tap = seqs_mins[j]
for tap_value in ls_taps :
k = _i - min_tap + tap_value
g_seqs[j][k-lower_limit] += grads[pos]
pos += 1
# get gradient for outputs
for j in xrange(self.n_outs_not_shared):
if self.outs_taps.has_key(j):
ls_taps = self.outs_taps[j]
min_tap = outs_mins[j]
seed_sz = initOuts_size[j]
for tap_value in ls_taps:
if i+tap_value < 0 :
k = i + seed_sz + tap_value
if k >= 0 :
g_outInfo[j][k] += grads[pos]
else:
g_outInfo[j] += grads[pos]
else:
g_outs[j][i+tap_value] += grads[pos]
pos += 1
for j in xrange(len(g_non_seqs)):
g_non_seqs[j] += grads[j+pos]
# return the gradient
for i,v in enumerate(g_seqs + g_outInfo+ g_non_seqs):
storage[i][0] = v
class ScanSpaceOptimizer(Optimizer):
""" Graph Optimizer that reduces scan memory consumption """
def __init__(self):
Optimizer.__init__(self)
def add_requirements(self,env):
env.extend(toolbox.ReplaceValidate())
def apply(self, env):
nodelist = list(env.toposort())
for node in nodelist:
op = node.op
# If it is a scan Op
if isinstance(op, Scan):
outputs = node.outputs
store_steps = [0 for x in outputs]
# check the outputs
for i,out in enumerate(node.outputs):
if op.store_steps[i] == 0 :
# if we do not have a range for this output
req_steps = numpy.max(numpy.abs(op.outs_taps.get(i,1)))
# look at all its clients
for cl,_dx in out.clients:
if type(cl) == str:
# if the node is actually an output, then
# we need to store the entire thing
req_steps = None
break
else:
if not isinstance(cl.op,
tensor.basic.Subtensor):
# if any of the clients is not a subtensor
# we also need to store the enitre thing
req_steps = None
break
else:
# if it is a tensor, and the first
# dimension is just -1
if cl.op.idx_list[0] == -1 and req_steps != None:
req_steps = numpy.max([1, req_steps])
else:
# or a constant that evaluates to
# -1
try:
idx = opt.get_constant_value(\
cl.op.idx_list[0])
if idx== -1:
req_steps = numpy.max([1, req_steps])
else:
req_steps = None
break
except:
req_steps = None
break
if req_steps != None:
store_steps[i] = req_steps
else:
store_steps[i] = 0
else:
store_steps[i] = op.store_steps[i]
if numpy.any(store_steps!= op.store_steps):
new_scan = Scan((op.inputs, op.outputs, op.givens,
op.slice_to_seqs),op.n_seqs, op.n_outs,
op.inplace_map, op.seqs_taps, op.outs_taps, op.n_steps,
op.truncate_gradient, op.n_outs_not_shared,
op.inner_fn_start_shared, op.inner_fn_end_shared,
op.go_backwards, store_steps, op.return_steps, op.mode,
op.inplace, name = op.fn.name).make_node(*node.inputs)
# we not need to replace the outputs of scan
for i,out in enumerate(node.outputs):
# if we are dealing with an output for which
# we changed the number of stored steps we
# also need to get rid off the subtensor
if op.store_steps[i] == 0 and store_steps[i] == 1:
# get the output of the subtensor variables
outSubTens = [ x[0].outputs[0] for x in out.clients ]
new_old = [(x,new_scan.outputs[i]) for x in outSubTens]
env.replace_all_validate(new_old,reason =
'scan_space_optimizer')
else:
env.replace_all_validate([(out,
new_scan.outputs[i])], reason =
'scan_space_optimizer')
optdb.register('scanOp_space_optimization', ScanSpaceOptimizer(), 74, 'fast_run')
@gof.local_optimizer([None])
def scan_make_inplace(node):
op = node.op
if isinstance(op, Scan) and (not op.inplace) and (op.inplace_map.keys() != []):
return Scan((op.inputs, op.outputs, op.givens, op.slice_to_seqs ) , op.n_seqs,
op.n_outs, op.inplace_map, op.seqs_taps, op.outs_taps, op.n_steps,
op.truncate_gradient, op.n_outs_not_shared, op.inner_fn_start_shared,
op.inner_fn_end_shared, op.go_backwards, op.store_steps, op.return_steps,
op.mode, inplace=True, name = op.fn.name).make_node(*node.inputs).outputs
return False
optdb.register('scanOp_make_inplace', opt.in2out(scan_make_inplace,
ignore_newtrees=True), 75, 'fast_run', 'inplace')
@theano.compile.profilemode.register_profiler_printer
def profile_printer(fct_name, compile_time, fct_call_time, fct_call,
apply_time, op_cimpl, message, outputs_size,
other_time):
# Scan overhead profile
if any([isinstance(node.op, (Scan, ScanGrad)) and v>0 for (_,node),v in apply_time.items()]):
print
print 'Scan overhead:'
print '<Scan op time(s)> <sub scan fct time(s)> <sub scan op time(s)> <sub scan fct time/scan op time(%)> <sub scan op time/scan op time(%)> <node>'
total_super_scan_time = 0
total_scan_fct_time = 0
total_scan_op_time = 0
for (_,node),v in apply_time.items():
if isinstance(node.op, (Scan, ScanGrad)) and v > 0:
scan_fct_time = sum(node.op.mode_instance.fct_call_time.values())
scan_op_time = sum(node.op.mode_instance.local_time)
total_super_scan_time += v
total_scan_fct_time += scan_fct_time
total_scan_op_time += scan_op_time
print ' %5.1es %5.1es %5.1es %5.1f%% %5.1f%%'%(
v, scan_fct_time, scan_op_time, scan_fct_time/v*100,
scan_op_time/v*100), node
print ' total %5.1es %5.1es %5.1es %5.1f%% %5.1f%%'%(
total_super_scan_time, total_scan_fct_time, total_scan_op_time, total_scan_fct_time/total_super_scan_time*100, total_scan_op_time/total_super_scan_time*100)
"""
This module provides the Scan Op
Scanning is a general form of recurrence, which can be used for looping.
The idea is that you *scan* a function along some input sequence, producing
an output at each time-step that can be seen (but not modified) by the
function at the next time-step. (Technically, the function can see the
previous K time-steps of your outputs and L time steps (from the past and
future) of your inputs.
So for example, ``sum()`` could be computed by scanning the ``z+x_i``
function over a list, given an initial state of ``z=0``.
Special cases:
* A *reduce* operation can be performed by returning only the last
output of a ``scan``.
* A *map* operation can be performed by applying a function that
ignores previous steps of the outputs.
Often a for-loop can be expressed as a ``scan()`` operation, and ``scan`` is
the closest that theano comes to looping. The advantage of using ``scan``
over for loops is that it allows the number of iterations to be a part of
the symbolic graph.
The Scan Op should typically be used by calling any of the following
functions: ``scan()``, ``map()``, ``reduce()``, ``foldl()``,
``foldr()``.
"""
__docformat__ = 'restructedtext en'
__authors__ = ( "Razvan Pascanu "
"Frederic Bastien "
"James Bergstra "
"Pascal Lamblin " )
__copyright__ = "(c) 2010, Universite de Montreal"
__contact__ = "Razvan Pascanu <r.pascanu@gmail>"
import itertools
import logging
import numpy
import sys
from theano import tensor
from theano.tensor import opt, TensorType
from theano import gof
from theano.gof import Optimizer, toolbox
from theano.compile import optdb
from theano.compile.sharedvalue import SharedVariable
from theano.configparser import AddConfigVar, BoolParam
from theano import config
from theano.tensor import opt
import scan_op
from scan import scan
from scan_views import map, reduce, foldl, foldr
import scan_utils
from scan_utils import clone
# Logging function for sending warning or info
_logger = logging.getLogger('theano.scan')
def warning(*msg):
_logger.warning('WARNING theano.scan: '+' '.join(msg))
def info(*msg):
_logger.info('INFO theano.scan: '+' '.join(msg))
@gof.local_optimizer([None])
def scan_make_inplace(node):
op = node.op
if ( isinstance(op, scan_op.Scan) and
(not op.info['inplace']) ):
info = op.info.copy()
info['inplace'] = True
new_op = scan_op.Scan( op.inputs
, op.outputs
, info)
return new_op.make_node(*node.inputs).outputs
return False
optdb.register( 'scanOp_make_inplace'
, opt.in2out(scan_make_inplace,ignore_newtrees=True)
, 75
, 'fast_run'
, 'inplace')
class ScanSaveMem(Optimizer):
""" Graph Optimizer that reduces scan memory consumption """
def __init__(self):
Optimizer.__init__(self)
def add_requirements(self,env):
env.extend(toolbox.ReplaceValidate())
def get_int_val(self,x):
# int/constant
if type(x) in [int, float]:
return int(val)
elif isinstance(val, tensor.Constant):
return int(val.value)
else:
return None
def process_node(self, env, node):
# helpful functions
def select_min(x,y):
if x is None:
return y
if y is None:
return x
return tensor.minimum(x,y)
def select_max(x,y):
if x is None:
return y
if y is None:
return x
return tensor.maximum(x,y)
def sanitize(x):
if x is None:
return None
else:
return tensor.as_tensor_variable(x)
shape_of = node.env.shape_feature.shape_of
# 1. Initialization of variables
# Note 1) We do not actually care about outputs representing shared
# variables (those have no intermediate values) so it is safer to
# ignore them and not change them in any way. To simplify the
# optimizations I construct the variable ``c_outs`` ( that counts
# outputs up to those we care) and the list ``init_l`` which for any
# output we care says the length of its initial state. Note that
# defining ``init_l`` for mit_mot sequences is a bit trickier but
# it is safe to set it to 0
op = node.op
c_outs = op.n_mit_mot + op.n_mit_sot + op.n_sit_sot + op.n_nit_sot
init_l = [ 0 for x in xrange(op.n_mit_mot)]
init_l += [ abs(numpy.min(v)) for v in op.tap_array[op.n_mit_mot:] ]
init_l += [ 0 for x in xrange(op.n_nit_sot)]
# 2. Check the clients of each output and see for how many steps
# does scan need to run
# This comparison checks if there is any uncounted output, which
# can only be an output corresponding to a shared variable
# 2.1 Initialize
# global_nsteps is a dictionary having two fields ( 'real' deals
# with int values, 'sym' with symbolic ones) or None
# given that a scan op has k outputs o_1, .. o_k and each
# output has n_j clients c_1^1, c_1^2, .. c_1^{n_1}, c_2^1, ..,
# global_nsteps is None if any of the clients is different
# from a subtensor or its real and sym field equal to
# max(c_i_j.idx_list[0].stop), meaning store up to which maximal
# index(step) for any output scan actually needs to compute
# In other words n_steps should be equal to this maximal !
# Note: if we have a shared variable that gets updated at every step
# of the loop, reducing the number of steps will affect the the
# value of the shared variable after the loop so we need not to
# change the number of steps in that case. To do this we set
# global_nsteps to None which is seen as a flag that nothing needs
# to be done
if len(node.outputs) > c_outs :
global_nsteps = {'real' :-1, 'sym': []}
else:
global_nsteps = None
# Keeps track of the original slices that each client represent
slices = [ None for o in node.outputs]
# A list for each output indicating how many intermediate values
# should be stored. If negative it means none of the intermediate
# values (i.e. the output can be removed since it is not used
# afterwards in the computations), if 0 it means that all
# intermediate values are required, otherwise is up to that number
# of intermediate values
# Note that for mit_mot outputs and shared outputs we can not change
# the number of intermediate steps stored without affecting the
# result of the op
store_steps = [ 0 for o in xrange(op.n_mit_mot)]
store_steps += [-1 for o in node.outputs[op.n_mit_mot:c_outs]]
# Flag that says if an input has changed and we need to do something
# or not
flag_store = False
# 2.2 Loop over the clients
for i,out in enumerate(node.outputs[:c_outs]):
# look at all its clients
slices[i] = []
for cl,_ in out.clients:
# 2.1 outputs of the function
#=> output needs all its intermediate values
if type(cl) == str:
# if the node is actually an output, then
# we need to store the entire thing
global_nsteps = None
slices[i] = None
break
# 2.2 non-subtensor nodes
#=> output needs all its intermediate values
elif not isinstance(cl.op, tensor.basic.Subtensor):
global_nsteps = None
slices[i] = None
break
# 2.3 subtensor nodes
#=> output might need to store just a subset of its values
else:
# 2.3.1 extract idx list of subtensor
this_slice = tensor.basic.get_idx_list(cl.inputs,
cl.op.idx_list)
if this_slice == None:
# if unable to extract idx_list
#=> outputs needs all its intermediate values
global_nsteps = None
slices[i] = None
break
# 2.3.2 extract the begin/end of the first dimension
if i > op.n_mit_mot:
try:
length = shape_of[out][0]
except:
length = node.inputs[0] + init_l[i]
else:
try:
length = shape_of[out][0]
except:
length = out.shape[0]
cf_slice = tensor.basic.get_canonical_form_slice(
this_slice[0], length)
slices[i] += [(cf_slice,this_slice)]
if ( isinstance(this_slice[0],slice) and
this_slice[0].stop is None ):
global_nsteps = None
break
if isinstance(cf_slice[0], slice):
stop = tensor.basic.extract_constant(cf_slice[0].stop)
else:
stop = tensor.basic.extract_constant(cf_slice[0]) + 1
if stop == sys.maxint or stop == length:
stop = None
else:
# there is a **gotcha** here ! Namely, scan returns an
# array that contains the initial state of the output as
# well. Which means that if have a initial state of
# length 3, and you look for 5 steps you get an output y
# of length 8. If you only use y[:5], this does not mean
# that you only need to loop for 5 steps but actually
# only for 2 steps ( the first 3 are the initial state)
stop = stop - init_l[i]
# 2.3.3 we might get away with less number of steps
if stop is not None and global_nsteps is not None:
# yes if it is a tensor
if isinstance(stop, tensor.Variable):
global_nsteps['sym'] += [stop]
# not if it is maxint
elif (type(stop) is int and stop == sys.maxint):
global_nsteps = None
# yes if it is a int k, 0 < k < maxint
elif (type(stop) is int and global_nsteps['real'] < stop):
global_nsteps['real'] = stop
# yes if it is a int k, 0 < k < maxint
elif (type(stop) is int and stop > 0 ):
pass
# not otherwise
else:
global_nsteps = None
# 2.3. Analyze global_nsteps to figure out for how many steps scan
# needs to iterate
if global_nsteps is not None:
nw_steps = node.inputs[0]
# there are some symbolic tensors that limit the number of
# steps
if len(global_nsteps['sym']) == 0 :
sym_steps = None
else:
sym_steps =global_nsteps['sym'][0]
for c in global_nsteps['sym'][:1]:
sym_steps = tensor.maximum(sym_steps, c)
if global_nsteps['real'] >= 0:
real_steps = global_nsteps['real']
else:
real_steps = None
nw_steps = select_min(select_max(sym_steps, real_steps),
node.inputs[0])
else:
nw_steps = node.inputs[0]
global_nsteps = None
# 2.4 Loop over the clients again now looking just to see how many
# intermediate steps to store
for i,out in enumerate(node.outputs[:c_outs]):
# look at all its clients
for cl,_ in out.clients:
if type(cl) == str:
store_steps[i] = 0
break
elif not isinstance(cl.op, tensor.basic.Subtensor):
store_steps[i] = 0
break
else:
this_slice = tensor.basic.get_idx_list(cl.inputs,
cl.op.idx_list)
if this_slice == None:
store_steps[i] = 0
break
if ( isinstance(this_slice[0],slice) and
this_slice[0].start is None):
store_steps[i] = 0
break
if i > op.n_mit_mot:
length = node.inputs[0] + init_l[i]
else:
try:
length = shape_of[out][0]
except:
length = out.shape[0]
cf_slice = tensor.basic.get_canonical_form_slice(
this_slice[0],length)
if isinstance(cf_slice[0], slice):
start = tensor.basic.extract_constant(cf_slice[0].start)
else:
start = tensor.basic.extract_constant(cf_slice[0])
if start == 0 or store_steps[i] == 0:
store_steps[i] = 0
else:
pval = select_max(nw_steps -start + init_l[i], init_l[i])
if store_steps[i] != -1:
pval = select_max(pval, store_steps[i])
store_steps[i] = pval
flag_store = True
orphane_outs = [ i for i,x in enumerate(store_steps)
if (type(x) is int) and (x<0) ]
flag_store = flag_store or (len(orphane_outs) > 0 )
# 3. is there anything to change ?
if (flag_store or global_nsteps is not None):
# 3.1 initialize inputs for the new scan
old_outputs = []
nw_inputs = list(node.inputs)
nw_inputs[0] = nw_steps
# 3.2 check orphane outputs to see if we can eliminate any
required,not_required = \
scan_utils.scan_can_remove_outs(node.op
, orphane_outs)
# 3.3. compose replace pairs for those nodes that need not
# to store everything in memory ( or ar orphane and required
# by the inner function .. )
replaced_outs = []
offset = 1 + op.n_seqs + op.n_mit_mot
for idx,_val in enumerate(store_steps[op.n_mit_mot:]):
i = idx + op.n_mit_mot
if not( type(_val) is int and _val <=0 and i not in required):
if idx+op.n_mit_mot in required:
val = 1
else:
val = _val
# If the memory for this output has been pre-allocated
# before going into the scan op (by an alloc node)
if idx < op.n_mit_sot + op.n_sit_sot:
_nw_input = nw_inputs[offset+idx].owner.inputs[1]
nw_input = scan_utils.expand( _nw_input, val - init_l[i] )
nw_inputs[offset+idx] = nw_input
replaced_outs.append(op.n_mit_mot + idx)
odx = op.n_mit_mot + idx
old_outputs += [(odx, [x[0].outputs[0] for x in
node.outputs[odx].clients])]
# If there is no memory pre-allocated for this output
elif idx < op.n_mit_sot + op.n_sit_sot + op.n_nit_sot:
pos = ( op.n_mit_mot + idx + op.n_seqs
+ 1 + op.n_shared_outs )
if nw_inputs[pos] == node.inputs[0]:
nw_inputs[pos] = val
odx = op.n_mit_mot + idx
replaced_outs.append(odx)
old_outputs += [(odx, [x[0].outputs[0] for x in
node.outputs[odx].clients])]
# 3.4. Recompute inputs for everything else based on the new
# number of steps
if global_nsteps is not None:
for idx, val in enumerate(store_steps[op.n_mit_mot:]):
if val == 0:
if idx < op.n_mit_sot + op.n_sit_sot:
_nw_input = nw_inputs[offset+idx].owner.inputs[1]
odx = op.n_mit_mot + idx
nw_input = scan_utils.expand(_nw_input, nw_steps)
nw_inputs[offset+idx] = nw_input
elif idx < (op.n_mit_sot + op.n_sit_sot +
+ op.n_nit_sot):
in_idx = offset+idx+op.n_shared_outs
if nw_inputs[in_idx] == node.inputs[0]:
nw_inputs[in_idx] =nw_steps
odx = op.n_mit_mot + idx
# 3.5 Remove unwanted orphane outputs
(inps, outs, info, node_ins, compress_map) = \
scan_utils.compress_outs(op, not_required, nw_inputs)
# 3.6 Compose the new scan
new_outs = scan_op.Scan(inps
, outs
, info).make_node(*node_ins).outputs
old_new = []
# 3.7 Get replace pairs for those outputs that do not change
# the number of intermediate steps stored
for idx,sl in enumerate(slices):
if global_nsteps and sl is not None and store_steps[idx] == 0:
for hdx,cl in enumerate(node.outputs[idx].clients):
cnf_slice, old_slices = sl[hdx]
# Sanitize the nw_slice by converting ints back into
# constants :) I only need to do this for the first
# slice since that is the only slice
if isinstance(cnf_slice[0], slice):
fslice = slice(
sanitize(cnf_slice[0].start),
sanitize(cnf_slice[0].stop),
sanitize(cnf_slice[0].step)
)
else:
fslice = sanitize(cnf_slice[0])
nw_slice = (fslice,) + tuple(old_slices[1:])
nw_pos = compress_map[idx]
nw_out = new_outs[nw_pos]
subtens = tensor.basic.Subtensor(nw_slice)
# slice inputs
sl_ins = tensor.basic.Subtensor.collapse(
nw_slice
, lambda entry: isinstance(entry
, tensor.Variable))
new_o = subtens.make_node(new_outs[nw_pos],
*sl_ins).outputs[0]
if new_o.ndim > 0:
new_o = new_o[::cnf_slice[1]]
replaced_outs.append(idx)
old_new += [(cl[0].outputs[0], new_o)]
# 3.8. Get replace pairs for those outputs that change
# the number of stored intermediate steps
for pos, old_outs in old_outputs:
nw_pos = compress_map[pos]
nw_out = new_outs[nw_pos]
for k,old in enumerate(old_outs):
# Get the correct slice
cnf_slice, old_slices = slices[pos][k]
if type(cnf_slice[0]) is slice:
start = ( cnf_slice[0].start - nw_steps -
init_l[pos] + store_steps[pos] )
if ( cnf_slice[0].stop is not None and
cnf_slice[0].stop != sys.maxint ):
stop = ( cnf_slice[0].stop - nw_steps -
init_l[pos] + store_steps[pos])
else:
stop = None
nw_slice = ( (slice(sanitize(start),
sanitize(stop),
sanitize(cnf_slice[0].step)),) +
tuple(old_slices[1:]) )
else:
position = (cnf_slice[0] - nw_steps -
init_l[pos] + store_steps[pos] )
nw_slice = (sanitize(position),) + tuple(old_slices[1:])
subtens = tensor.basic.Subtensor(nw_slice)
sl_ins = tensor.basic.Subtensor.collapse(
nw_slice
, lambda entry: isinstance(entry
, tensor.Variable))
new_o = subtens.make_node(new_outs[nw_pos],
*sl_ins).outputs[0]
if new_o.ndim > 0:
new_o = new_o[::cnf_slice[1]]
old_new += [(old, new_o)]
# 3.9. Get replace pairs for all other nodes
if flag_store or global_nsteps is not None:
for idx,o in enumerate(node.outputs):
if not (idx in replaced_outs) and not idx in not_required:
nw_pos = compress_map[idx]
old_new += [(o,new_outs[nw_pos])]
env.replace_all_validate(old_new, reason = 'scan_save_mem')
def apply(self, env):
nodelist = list(env.toposort())
old_new = []
for node in nodelist:
op = node.op
if isinstance(op, scan_op.Scan):
self.process_node(env, node)
# Just before specialize to have the other optimization
# like constant folding being applied
# This don't introduce inplace.
optdb.register( 'scanOp_save_mem'
, ScanSaveMem()
, 1.99
, 'fast_run')
'''
class ScanMerge(Optimizer):
""" Graph Optimizer that reduces scan memory consumption """
def __init__(self):
Optimizer.__init__(self)
def add_requirements(self,env):
env.extend(toolbox.ReplaceValidate())
def merge(self, A,B):
# Step 1. Identify common inputs
equal_ins = []
for Aidx, Ainp in enumerate(A.inputs):
if Ainp in B.inputs:
equal_ins += [ (Aidx, B.inputs.index(Ainp) ) ]
# Step 2. Get their slices together with taps
Cslices = {}
for Aidx,Bidx in equal_ins:
Aslices = self.get_slice(A, Aidx)
Bslices = self.get_slice(B, Bidx)
Cslices = Aslices.copy()
for tap, var in Bslices.iteritems():
if tap in Cslices :
cvar = Clisces[tap]
replace = {var: cvar}
else:
Cslices[tap] = var
# two outputs are equal if they implement same computations
# and start from the same inputs
# Step 2. Get their corresponding slices in the input
# Step 3.
def apply(self, env):
nodelist = list(env.toposort())
cond_nodes = [ x for x in nodelist if x.op.__class__.__name__=='Cond']
scan_nodes = [ x for x in nodelist if x.op.__class__.__name__=='Scan']
# Having lazy ifs in the graph complicates a bit things, and for
# now I will not treat that case
if len(cond_nodes) > 0:
return False
tomerge_nodes = []
for try_node in scan_nodes:
can_merge = False
for idx in xrange(len(tomerge_nodes)):
node = tomerge_nodes[idx]
if scan_utils.equal_computations(
node.inputs[0], try_node.inputs[0], strict = True):
can_merge = True
try:
new_node = self.merge(try_node, node)
position = idx
except NotImplementedError:
can_merge = False
if not can_merge:
tomerge_nodes += [try_node]
else:
tomerge_nodes[position] = new_node
optdb.register( 'scanOp_merge'
, ScanMerge()
, 2.39
, 'fast_run')
'''
from theano.sandbox import cuda
if cuda.cuda_available:
from theano.sandbox.cuda.basic_ops import *
from theano.sandbox.cuda.type import CudaNdarrayType
from theano import sandbox
from theano.sandbox.cuda.opt import register_opt, local_optimizer
from theano import config
def safe_to_gpu(x):
if (isinstance(x.type, TensorType) and
x.type.dtype == config.floatX):
return gpu_from_host(x)
else:
return x
def safe_to_cpu(x):
if isinstance(x.type, CudaNdarrayType):
return host_from_gpu(x)
else:
return x
def tensor_to_cuda(x):
if (isinstance(x.type, TensorType) and
x.type.dtype == config.floatX):
y = CudaNdarrayType( broadcastable = x.type.broadcastable)()
if x.name :
y.name = x.name +'[cuda]'
return y
else:
return x
@register_opt()
@local_optimizer([])
def gpuScanOptimization(node):
"""
gpu_from_host(scan) -> GPUscan(gpu_from_host)
scan(host_from_gpu) -> host_from_gpu(GPUscan)
"""
if node.op == gpu_from_host:
host_input = node.inputs[0]
if ( host_input.owner
and host_input.owner.op == scan_op.Scan
and not host_input.owner.op.info['gpu']):
thescan = host_input.owner.op
inputs = host_input.owner.inputs
# I need to cast thescan.inputs to gpuhost stuff
info = thescan.info.copy()
info['gpu'] = True
nw_ins = [ inputs[0]]
e = ( thescan.n_seqs
+ thescan.n_mit_mot
+ thescan.n_mit_sot
+ thescan.n_sit_sot
+ thescan.n_shared_outs)
nw_ins += [safe_to_gpu(x) for x in inputs[1:e] ]
b = e
e = e + thescan.n_nit_sot + thescan.n_other_ignore
nw_ins += inputs[b:e]
nw_ins += [safe_to_gpu(x) for x in inptus[e:] ]
scan_ins = [ tensor_to_cuda(x) for x in thescan.inputs]
scan_outs = [ safe_to_gpu(x) for x in thescan.outputs ]
scan_outs = scan_utils.clone(
scan_outs
, replace = zip(thescan.inputs
,[safe_to_cpu(x) for x in scan_ins]))
nw_op = scan_op.Scan( scan_ins
, scan_outs
, info).make_node(*nw_ins)
_outputs = nw_op.outputs
outputs = [safe_to_cpu(x) for x in _outputs]
return outputs
if (type(node.op) == scan_op.Scan
and not node.op.info['gpu']):
if numpy.any([(i.owner and i.owner.op == host_from_gpu)
for i in node.inputs]):
thescan = node.op
info = thescan.info.copy()
info['gpu'] = True
inputs = node.inputs
nw_ins = [ inputs[0]]
e = ( 1+ thescan.n_seqs
+ thescan.n_mit_mot
+ thescan.n_mit_sot
+ thescan.n_sit_sot
+ thescan.n_shared_outs)
nw_ins += [safe_to_gpu(x) for x in inputs[1:e] ]
b = e
e = e + thescan.n_nit_sot + thescan.n_other_ignore
nw_ins += inputs[b:e]
nw_ins += [safe_to_gpu(x) for x in inputs[e:] ]
scan_ins = [ tensor_to_cuda(x) for x in thescan.inputs]
scan_outs = [ safe_to_gpu(x) for x in thescan.outputs ]
scan_outs = scan_utils.clone(
scan_outs
, replace = zip(thescan.inputs
,[safe_to_cpu(x) for x in scan_ins]))
info['gpu'] = True
_outputs = scan_op.Scan(
scan_ins
, scan_outs
, info).make_node(*nw_ins).outputs
outputs = [safe_to_cpu(x) for x in _outputs]
return outputs
return False
"""
This module provides the Scan Op
Scanning is a general form of recurrence, which can be used for looping.
The idea is that you *scan* a function along some input sequence, producing
an output at each time-step that can be seen (but not modified) by the
function at the next time-step. (Technically, the function can see the
previous K time-steps of your outputs and L time steps (from the past and
future) of your inputs.
So for example, ``sum()`` could be computed by scanning the ``z+x_i``
function over a list, given an initial state of ``z=0``.
Special cases:
* A *reduce* operation can be performed by returning only the last
output of a ``scan``.
* A *map* operation can be performed by applying a function that
ignores previous steps of the outputs.
Often a for-loop can be expressed as a ``scan()`` operation, and ``scan`` is
the closest that theano comes to looping. The advantage of using ``scan``
over for loops is that it allows the number of iterations to be a part of
the symbolic graph.
The Scan Op should typically be used by calling any of the following
functions: ``scan()``, ``map()``, ``reduce()``, ``foldl()``,
``foldr()``.
"""
__docformat__ = 'restructedtext en'
__authors__ = ( "Razvan Pascanu "
"Frederic Bastien "
"James Bergstra "
"Pascal Lamblin " )
__copyright__ = "(c) 2010, Universite de Montreal"
__contact__ = "Razvan Pascanu <r.pascanu@gmail>"
import itertools
import logging
import numpy
from theano.compile import SharedVariable, function
from theano import compile
from theano import gof
from theano.tensor import opt
from theano import tensor
from theano import config
import scan_op
from scan_op import safe_new, safe_to_cpu
import scan_utils
from scan_utils import safe_new, safe_to_cpu, traverse
from theano.sandbox import cuda
# Logging function for sending warning or info
_logger = logging.getLogger('theano.scan')
def warning(*msg):
_logger.warning('WARNING theano.scan: '+' '.join(msg))
def info(*msg):
_logger.info('INFO theano.scan: '+' '.join(msg))
def scan( fn
, sequences = None
, outputs_info = None
, non_sequences = None
, n_steps = None
, truncate_gradient = -1
, go_backwards = False
, mode = None
, name = None ):
"""
This function constructs and applies a Scan op to the provided
arguments.
:param fn:
``fn`` is a function that describes the operations involved in one
step of ``scan``. ``fn`` should construct variables describing the
output of one iteration step. It should expect as input theano
variables representing all the time slices of the input sequences
and outputs, and all other arguments given to scan as
``non_sequences``. The order in which scan passes this variables
to ``fn`` is the following :
* all time slices of the first sequence
* all time slices of the second sequence
* ...
* all time slices of the last sequence
* all time slices of the first output
* all time slices of the second otuput
* ...
* all time slices of the last output
* all other arguments (the list given as `non_sequences` to
scan)
The order of the sequences is the same as the one in the list
`sequences` given to scan. The order of the outputs is the sane
as the order of ``output_info``. For any sequence or output the
order of the time slices is the same as the order of the time
taps provided. For example if one writes the following :
.. code-block:: python
scan(fn, sequences = [ dict( Sequence1, taps = [-3,2,-1])
, Sequence2
, dict( Sequence3, taps = 3) ]
, outputs_info = [ dict( Output1, taps = [-3,-5])
, dict( Output2, taps = None)
, Output3 ]
, non_sequences = [ Argument1, Argument 2])
``fn`` should expect the following arguments in this given order:
#. ``Sequence1[t-3]``
#. ``Sequence1[t+2]``
#. ``Sequence1[t-1]``
#. ``Sequence2[t]``
#. ``Sequence3[t+3]``
#. ``Output1[t-3]``
#. ``Output1[t-5]``
#. ``Output3[t-1]``
#. ``Argument1``
#. ``Argument2``
The list of ``non_sequences`` can also contain shared variables
used in the function, though ``scan`` is able to figure those
out on its own so they can be skipped. For the clarity of the
code we recommand though to provide them to scan.
The function is expected to return two things. One is a list of
outputs ordered in the same order as ``outputs_info``, with the
difference that there should be only one output variable per
output initial state (even if no tap value is used). Secondly
`fn` should return an update dictionary ( that tells how to
update any shared variable after each iteration ste). The
dictionary can optionally be given as a list of tuples. There is
no constraint on the order of these two list, ``fn`` can return
either ``(outputs_list, update_dictionary)`` or
``(update_dictionary, outputs_list)`` or just one of the two (in
case the other is empty).
:param sequences:
``sequences`` is the list of Theano variables or dictionaries
describing the sequences ``scan`` has to iterate over. If a
sequence is given as wrapped in a dictionary a set of optional
information can be provided about the sequence. The dictionary
should have the following keys:
* ``input`` (*mandatory*) -- Theano variable representing the
sequence.
* ``taps`` -- Temporal taps of the sequence required by ``fn``.
They are provided as a list of integers, where a value ``k``
impiles that at iteration step ``t`` scan will pass to ``fn``
the slice ``t+k``. Default value is ``[0]``
Any Theano variable in the list ``sequences`` is automatically
wrapped into a dictionary where ``taps`` is set to ``[0]``
:param outputs_info:
``outputs_info`` is the list of Theano variables or dictionaries
describing the initial state of the outputs computed
recurrently. When this initial states are given as dictionary
optional information can be provided about the output corresponding
to these initial states. The dictionary should have the following
keys:
* ``initial`` -- Theano variable that represents the initial
state of a given output. In case the output is not computed
recursively (think of a map) and does not require a initial
state this field can be skiped. Given that only the previous
time step of the output is used by ``fn`` the initial state
should have the same shape as the output. If multiple time
taps are used, the initial state should have one extra
dimension that should cover all the possible taps. For example
if we use ``-5``, ``-2`` and ``-1`` as past taps, at step 0,
``fn`` will require (by an abuse of notation) ``output[-5]``,
``output[-2]`` and ``output[-1]``. This will be given by
the initial state, which in this case should have the shape
(5,)+output.shape. If this variable containing the initial
state is called ``init_y`` then ``init_y[0]`` *corresponds to*
``output[-5]``. ``init_y[1]`` *correponds to* ``output[-4]``,
``init_y[2]`` corresponds to ``output[-3]``, ``init_y[3]``
coresponds to ``output[-2]``, ``init_y[4]`` corresponds to
``output[-1]``. While this order might seem strange, it comes
natural from splitting an array at a given point. Assume that
we have a array ``x``, and we choose ``k`` to be time step
``0``. Then our initial state would be ``x[:k]``, while the
output will be ``x[k:]``. Looking at this split, elements in
``x[:k]`` are ordered exactly like those in ``init_y``.
* ``taps`` -- Temporal taps of the output that will be pass to
``fn``. They are provided as a list of *negative* integers,
where a value ``k`` implies that at iteration step ``t`` scan
will pass to ``fn`` the slice ``t+k``.
* ``return_steps`` -- Integer representing the number of steps
to return for the current steps. For example, if ``k`` is
provided, ``scan`` will return ``output[-k:]``. This is meant
as a hint, based on ``k`` and the past taps of the outputs used,
scan can be smart about the amount of memory it requires to
store intermidiate results. If not given, or ``0``, ``scan``
will return all computed steps.
``scan`` will follow this logic if partial information is given:
* If an output is not wrapped in a dictionary, ``scan`` will wrap
it in one assuming that you use only the last step of the output
(i.e. it makes your tap value list equal to [-1]).
* If you wrap an output in a dictionary and you do not provide any
taps but you provide an initial state it will assume that you are
using only a tap value of -1.
* If you wrap an output in a dictionary but you do not provide any
initial state, it assumes that you are not using any form of
taps.
* If you provide a ``None`` instead of a variable or a dictionary
``scan`` assumes that you will not use any taps for this output
(like for example in case of a map)
If ``outputs_info`` is an empty list or None, ``scan`` assumes
that no tap is used for any of the otuputs. If information is
provided just for a subset of the outputs an exception is
raised (because there is no convention on how scan should map
the provided information to the outputs of ``fn``)
:param non_sequences:
``non_sequences`` is the list of arguments that are passed to
``fn`` at each steps. Once can opt to exclude shared variables
used in ``fn`` from this list.
:param n_steps:
``n_steps`` is the number of steps to iterate given as an int
or Theano scalar. If any of the input sequences do not have
enough elements, scan will produce a warning and run only for
the maximal amount of steps it can. If the *value is 0* the
outputs will have *0 rows*. If the value is negative, ``scan``
run backwards in time. If the ``go_backwards`` flag is already
set and also ``n_steps`` is negative, ``scan`` will run forward
in time. If n stpes is not provided, or evaluates to ``None``,
``inf`` or ``NaN``, ``scan`` will figure out the amount of
steps it should run given its input sequences.
:param truncate_gradient:
``truncate_gradient`` is the number of steps to use in truncated
BPTT. If you compute gradients through a scan op, they are
computed using backpropagation through time. By providing a
different value then -1, you choose to use truncated BPTT instead
of classical BPTT, where you go for only ``truncate_gradient``
number of steps back in time.
:param go_backwards:
``go_backwards`` is a flag indicating if ``scan`` should go
backwards through the sequences. If you think of each sequence
as indexed by time, making this flag True would mean that
``scan`` goes back in time, namely that for any sequence it
starts from the end and goes towards 0.
:param name:
When profiling ``scan`` it is crucial to provide a name for any
instance of ``scan``. The profiler will produce an overall
profile of your code as well as profiles for doing one iteration
step for each instance of ``scan``. The ``name`` of the instance is
how you differentiate between all these profiles.
:param mode:
It is recommended to leave this argument to None, especially
when profiling ``scan`` (otherwise the results are not going to
be accurate). If you prefer the computations of one step os
``scan`` to be done differently then the entire function set
this parameters (see ``theano.function`` for details about
possible values and their meaning).
:rtype: tuple
:return: tuple of the form (outputs, updates); ``outputs`` is either a
Theano variable or a list of Theano variables representing the
outputs of ``scan`` (in the same order as in
``outputs_info``). ``updates`` is a dictionary specifying the
update rules for all shared variables used in the scan
operation. This dictionary should be passed to
``theano.function`` when you compile your function.
"""
# General observation : this code is executed only once, at creation
# of the computational graph, so we don't yet need to be smart about
# anything (to speed things up)
##
### Step 1. Wrap all inputs in dictionaries and add default values
##
# check if inputs are just single variables instead of lists
def wrap_into_list(x):
'''
Wrap the input into a list if it is not already a list
'''
if x is None:
return []
elif not isinstance(x, (list,tuple)):
return [x]
else:
return list(x)
seqs = wrap_into_list(sequences)
outs_info = wrap_into_list(outputs_info)
non_seqs = wrap_into_list(non_sequences)
# If we provided a known number of steps ( before compilation)
# and if that number is 1 or -1, then we can skip the Scan Op,
# and just apply the inner function once
# To do that we check here to see the nature of n_steps
n_fixed_steps = None
if isinstance( n_steps, (float,int)):
n_fixed_steps = int(n_steps)
else:
try :
n_fixed_steps = opt.get_constant_value(n_steps)
except:
n_fixed_steps = None
# Check n_steps is an int
if ( hasattr(n_steps,'dtype') and
str(n_steps.dtype)[:3] not in ('uin','int') ):
raise ValueError(' n_steps must be an int. dtype provided '
'is %s'%n_steps.dtype)
# compute number of sequences and number of outputs
n_seqs = len(seqs)
n_outs = len(outs_info)
return_steps = {}
# wrap sequences in a dictionary if they are not already dictionaries
for i in xrange(n_seqs):
if not isinstance(seqs[i], dict) :
seqs[i] = dict(input=seqs[i], taps=[0])
elif seqs[i].get('taps',None):
seqs[i]['taps'] = wrap_into_list(seqs[i]['taps'])
elif seqs[i].get('taps',True) is None:
# seqs dictionary does not have the ``taps`` key
seqs[i]['taps'] = [0]
# wrap outputs info in a dictionary if they are not already in one
for i in xrange(n_outs):
if outs_info[i]:
if isinstance(outs_info[i], dict):
if outs_info[i].get('return_steps', None):
return_steps[i] = outs_info[i]['return_steps']
if not isinstance(outs_info[i], dict):
# by default any output has a tap value of -1
outs_info[i] = dict(initial=outs_info[i], taps = [-1])
elif (not outs_info[i].get('initial',None) and
outs_info[i].get('taps',None)):
# ^ no initial state but taps provided
raise ValueError( ( 'If you are using slices of an output '
'you need to provide a initial state '
'for it'), outs_info[i] )
elif (outs_info[i].get('initial',None) and
not outs_info[i].get('taps',None)):
# ^ initial state but taps not provided
if outs_info[i].has_key('taps'):
# ^ explicitly provided a None for taps
warning (' Output %s ( index %d) has a initial state '
' but taps is explicitly set to None ' % (
outs_info[i]['initial'].name
, i) )
outs_info[i]['taps'] = [-1]
else:
# if a None is provided as the output info we replace it
# with an empty dict() to simplify handling
outs_info[i] = dict()
##
### Step 2. Generate inputs and outputs of the inner functions
### for compiling a dummy function (Iteration #1)
##
# create theano inputs for the recursive function
# note : this is a first batch of possible inputs that will
# be compiled in a dummy function; we used this dummy
# function to detect shared variables and their updates
# and to construct a new and complete list of inputs and
# outputs
n_seqs = 0
scan_seqs = [] # Variables passed as inputs to the scan op
inner_seqs = [] # Variables passed as inputs to the inner function
inner_slices = [] # Actual slices if scan is removed from the picture
# go through sequences picking up time slices as needed
for i,seq in enumerate(seqs):
# Note that you can have something like no taps for
# a sequence, though is highly unlikely in practice
if 'taps' in seq:
# go through the indicated slice
mintap = numpy.min(seq['taps'])
maxtap = numpy.max(seq['taps'])
for k in seq['taps']:
# create one slice of the input
# Later on, if we decide not to use scan because we are
# going for just one step, it makes things easier if we
# compute the correct outputs here. This way we can use
# the output of the lambda expression directly to replace
# the output of scan.
# If not we need to use copies, that will be replaced at
# each frame by the corresponding slice
nw_slice = seq['input'][0].type()
actual_slice = seq['input'][k-mintap]
# Add names to slices for debugging and pretty printing ..
# that is if the input already has a name
if seq['input'].name:
if k > 0:
nw_name = seq['input'].name + '[t+%d]'%k
elif k == 0:
nw_name = seq['input'].name + '[t]'
else:
nw_name = seq['input'].name + '[t%d]'%k
nw_slice.name = nw_name
# We cut the sequence such that seq[i] to correspond to
# seq[i-k]
if maxtap < 0:
offset = abs(maxtap)
else:
offset = 0
if maxtap == mintap and maxtap != 0:
nw_seq =seq['input'][:abs(maxtap)]
elif maxtap -k != 0 :
nw_seq = seq['input'][offset +k -mintap: -(maxtap -k)]
else:
nw_seq = seq['input'][offset +k -mintap: ]
if go_backwards:
nw_seq = nw_seq[::-1]
scan_seqs.append( nw_seq )
inner_seqs.append( nw_slice )
inner_slices.append( actual_slice )
n_seqs += 1
# Since we've added all sequences now we need to level them up based on
# n_steps or their different shapes
lengths_vec = []
for seq in scan_seqs:
lengths_vec.append( seq.shape[0] )
if not scan_utils.check_NaN_Inf_None(n_steps):
# ^ N_steps should also be considered
lengths_vec.append( tensor.as_tensor(n_steps) )
if len(lengths_vec) == 0 :
# ^ No information about the number of steps
raise ValueError(' No information about the number of steps '
'provided. Either provide a value for '
'n_steps argument of scan or provide an input '
'sequence')
actual_n_steps = lengths_vec[0]
for contestant in lengths_vec[1:]:
actual_n_steps = tensor.minimum(actual_n_steps, contestant)
# Add names -- it helps a lot when debugging
for (nw_seq, seq) in zip(scan_seqs, seqs):
if seq['input'].name:
nw_seq.name = seq['input'].name + '[%d:]'%k
# Conventions :
# mit_mot = multiple input taps, multiple output taps ( only provided
# by the gradient function )
# mit_sot = multiple input taps, single output tap (t + 0)
# sit_sot = single input tap, single output tap (t + 0)
# nit_sot = no input tap, single output tap (t + 0)
# MIT_MOT -- not provided by the user only by the grad function
n_mit_mot = 0
n_mit_mot_outs = 0
mit_mot_scan_inputs = []
mit_mot_inner_inputs = []
mit_mot_inner_outputs = []
mit_mot_out_slices = []
mit_mot_rightOrder = []
# SIT_SOT -- provided by the user
n_mit_sot = 0
mit_sot_scan_inputs = []
mit_sot_inner_inputs = []
mit_sot_inner_slices = []
mit_sot_inner_outputs = []
mit_sot_return_steps = {}
mit_sot_tap_array = []
mit_sot_rightOrder = []
n_sit_sot = 0
sit_sot_scan_inputs = []
sit_sot_inner_inputs = []
sit_sot_inner_slices = []
sit_sot_inner_outputs = []
sit_sot_return_steps = {}
sit_sot_rightOrder = []
# go through outputs picking up time slices as needed
for i,init_out in enumerate(outs_info):
# Note that our convention dictates that if an output uses
# just the previous time step, as a initial state we will only
# provide a tensor of the same dimension as one time step; This
# makes code much cleaner for those who do not use taps. Otherwise
# they would always had to shape_padleft the initial state ..
# which is ugly
if init_out.get('taps', None) == [-1]:
actual_arg = init_out['initial']
arg = safe_new(init_out['initial'])
if init_out['initial'].name:
arg.name = init_out['initial'].name+'[t-1]'
# We need now to allocate space for storing the output and copy
# the initial state over. We do this using the expand function
# defined in scan utils
sit_sot_scan_inputs.append(
scan_utils.expand(
tensor.unbroadcast(
tensor.shape_padleft(actual_arg), 0)
, actual_n_steps
) )
sit_sot_inner_slices.append(actual_arg)
if i in return_steps:
sit_sot_return_steps[n_sit_sot] = return_steps[i]
sit_sot_inner_inputs.append( arg )
sit_sot_rightOrder.append( i )
n_sit_sot += 1
elif init_out.get('taps',None):
if numpy.any(numpy.array(init_out.get('taps',[])) > 0):
# Make sure we do not have requests for future values of a
# sequence we can not provide such values
raise ValueError('Can not use future taps of outputs'
, init_out)
# go through the taps
mintap = abs(numpy.min(init_out['taps']))
mit_sot_tap_array.append( init_out['taps'] )
idx_offset = abs(numpy.min(init_out['taps']))
# Sequence
mit_sot_scan_inputs.append(
scan_utils.expand( init_out['initial'][:mintap]
, actual_n_steps) )
if i in return_steps:
mit_sot_return_steps[n_mit_sot] = return_steps[i]
mit_sot_rightOrder.append( i )
n_mit_sot += 1
for k in init_out['taps']:
# create a new slice
actual_nw_slice = init_out['initial'][k+mintap]
nw_slice = init_out['initial'][0].type()
# give it a name or debugging and pretty printing
if init_out['initial'].name:
if k > 0:
nw_slice.name = ( init_out['initial'].name +
'[t+%d]'%k )
elif k == 0:
nw_slice.name = init_out['initial'].name + '[t]'
else:
nw_slice.name = ( init_out['initial'].name +
'[t%d]'%k )
mit_sot_inner_inputs.append( nw_slice )
mit_sot_inner_slices.append( actual_nw_slice )
#NOTE: there is another case, in which we do not want to provide
# any previous value of the output to the inner function (i.e.
# a map); in that case we do not have to do anything ..
# Re-order args
max_mit_sot = numpy.max( [-1] + mit_sot_rightOrder ) + 1
max_sit_sot = numpy.max( [-1] + sit_sot_rightOrder ) + 1
n_elems = numpy.max( [ max_mit_sot, max_sit_sot ] )
_ordered_args = [[] for x in xrange(n_elems)]
offset = 0
for idx in xrange(n_mit_sot):
n_inputs = len(mit_sot_tap_array[idx])
if n_fixed_steps in [1,-1]:
_ordered_args[mit_sot_rightOrder[idx]] = \
mit_sot_inner_slices[offset:offset+n_inputs]
else:
_ordered_args[mit_sot_rightOrder[idx]] = \
mit_sot_inner_inputs[offset:offset+n_inputs]
offset += n_inputs
for idx in xrange(n_sit_sot):
if n_fixed_steps in [1,-1]:
_ordered_args[sit_sot_rightOrder[idx]] = \
[ sit_sot_inner_slices[idx] ]
else:
_ordered_args[sit_sot_rightOrder[idx]] = \
[ sit_sot_inner_inputs[idx] ]
ordered_args = []
for ls in _ordered_args:
ordered_args += ls
if n_fixed_steps in [1,-1]:
args = (inner_slices +
ordered_args +
non_seqs )
else:
args = ( inner_seqs +
ordered_args +
non_seqs )
# add only the non-shared variables to the arguments of the dummy
# function [ a function should not get shared variables as input ]
# this could happen if for example the initial state of an output is a
# shared variable for which we use only the last step (i.e. no
# subtensort is applied to the shared variable )
dummy_args = [arg for arg in args
if not isinstance(arg, SharedVariable)]
# when we apply the lambda expression we get a mixture of update rules
# and outputs that needs to be separated
outputs, updates = scan_utils.get_updates_and_outputs(fn(*args))
##
### Step 3. Check if we actually need scan and remove it if we don't
##
if n_fixed_steps in [1, -1]:
# We do not need to use the scan op anymore, so we can just return
# the outputs and updates we have
for pos, inner_out in enumerate(outputs):
# we need to see if we need to pad our sequences with an
# unbroadcastable dimension; case example : we return an
# output for which we want all intermediate. If n_steps is 1
# then, if we return the output as given by the innner function
# this will represent only a slice and it will have one
# dimension less.
if ( isinstance(inner_out.type, tensor.TensorType) and
return_steps.get(pos, 0) != 1):
outputs[pos] = tensor.unbroadcast(
tensor.shape_padleft(inner_out),0)
if len(outputs) == 1:
outputs = outputs[0]
return (outputs, updates)
##
### Step 4. Compile the dummy function
##
# We can now compile a dummy function just to see what shared variable
# we have and what are their update rules (note that the user has
# the option not to pass the shared variable to scan, so we need to
# pick them manually and add them to scan)
# make the compilation as fast as possible by not applying any
# optimization or conversion to C [ note this region is not important
# for performance so we can do stuff as unoptimal as we wish ]
# extract still missing inputs (there still might be so) and add them
# as non sequences at the end of our args
all_inputs = itertools.ifilter(
lambda x: ( isinstance(x, gof.Variable) and
not isinstance(x, SharedVariable) and
not isinstance(x, gof.Constant) ),
gof.graph.inputs( outputs) )
extra_inputs = filter( lambda x: x not in args,
all_inputs)
non_seqs += extra_inputs
## Note we do not use all_inputs directly since the order of variables
## in args is quite important
dummy_args += extra_inputs
dummy_f = function( dummy_args
, outputs
, updates = updates
, mode = compile.mode.Mode(linker='py',
optimizer=None) )
##
### Step 5. Re-arange inputs of scan into a more strict order
##
## Step 5.0 Check the outputs of the dummy function to see if they
## match with user provided data
# if the number of outputs to the function does not match the number of
# assumed outputs until now (provided by the user) there can be
# only one explanation: No information is provided for any of the
# outputs (i.e. we are dealing with a map)
if not ( len(dummy_f.maker.outputs) == n_outs or outs_info == []):
raise ValueError('Please provide None as output_info for '
'any output that does not feed back into '
'scan (i.e. it behaves like a map) ')
if outs_info == []:
n_outs = len(dummy_f.maker.outputs)
outs_info = [ dict() for x in xrange(n_outs) ]
## Step 5.1 Outputs with taps different then -1
for i, out in enumerate(outs_info):
if 'taps' in out and out['taps'] != [-1]:
mit_sot_inner_outputs.append( outputs[i])
## Step 5.2 Outputs with tap equal to -1
for i, out in enumerate(outs_info):
if 'taps' in out and out['taps'] == [-1]:
sit_sot_inner_outputs.append( outputs[i] )
## Step 5.3 Outputs that correspond to update rules of shared variables
givens = {}
n_shared_outs = 0
shared_scan_inputs = []
shared_inner_inputs = []
shared_inner_outputs = []
for input in dummy_f.maker.expanded_inputs:
if isinstance(input.variable, SharedVariable) and input.update:
new_var = safe_new(input.variable)
if input.variable.name:
new_var.name = input.variable.name + '_copy'
shared_inner_inputs.append( new_var )
shared_scan_inputs.append( input.variable )
shared_inner_outputs.append( input.update )
givens[input.variable] = new_var
n_shared_outs += 1
## Step 5.4 Outputs with no taps used in the input
n_nit_sot = 0
nit_sot_inner_outputs = []
nit_sot_return_steps = {}
nit_sot_rightOrder = []
for i,out in enumerate(outs_info):
if not 'taps' in out:
nit_sot_inner_outputs.append( outputs[i] )
if i in return_steps:
nit_sot_return_steps[n_nit_sot] = return_steps[i]
nit_sot_rightOrder.append( i )
n_nit_sot += 1
## Step 5.5 Sequences with no taps used
n_other_ignore = 0
ignore_scan_seqs = []
ignore_inner_seqs = []
for i,seq in enumerate(seqs):
if not 'taps' in seq:
ignore_scan_seqs.append(seq['input'])
n_other_ignore += 1
## Step 5.6 all other arguments including extra inputs
other_scan_args = []
other_inner_args = []
other_scan_args += [ arg for arg in non_seqs
if not isinstance(arg, SharedVariable) ]
## Step 5.8 all shared variables with no update rules
def new_variable( v ):
new_v = safe_new(v)
if v.name:
new_v.name = v.name + '_copy'
return new_v
other_inner_args += [ new_variable(arg) for arg in non_seqs
if not isinstance(arg, SharedVariable) ]
givens.update( dict( zip(other_scan_args, other_inner_args) ))
other_shared_scan_args = [ arg.variable for arg
in dummy_f.maker.expanded_inputs
if ( isinstance(arg.variable, SharedVariable) and
not arg.update) ]
other_shared_inner_args = [ new_variable(arg.variable) for arg
in dummy_f.maker.expanded_inputs
if ( isinstance(arg.variable, SharedVariable) and
not arg.update) ]
givens.update( dict( zip( other_shared_scan_args,
other_shared_inner_args) ) )
##
### Step 6. Re-order the outputs and clone them replacing things
### using the givens
##
inner_inputs = ( inner_seqs +
mit_mot_inner_inputs +
mit_sot_inner_inputs +
sit_sot_inner_inputs +
shared_inner_inputs +
ignore_inner_seqs +
other_shared_inner_args +
other_inner_args )
inner_outs = ( mit_mot_inner_outputs +
mit_sot_inner_outputs +
sit_sot_inner_outputs +
nit_sot_inner_outputs +
shared_inner_outputs )
if cuda.cuda_available:
# very often we end up in this situation when we want to
# replace w with w_copy, where w is CudaNdarray
# and w_copy is TensorType. This is caused because shared
# variables are put on GPU right aways >:| ,
new_givens = {}
for w,w_copy in givens.iteritems():
if (isinstance(w.type, cuda.CudaNdarrayType)
and isinstance(w_copy.type, tensor.TensorType)):
for o in inner_outs:
new_givens = traverse(o,w,w_copy, new_givens)
else:
new_givens[w] = w_copy
else:
new_givens = givens
new_outs = scan_utils.clone(inner_outs, replace = new_givens)
##
### Step 7. Create the Scan Op
##
tap_array = mit_sot_tap_array + [[-1] for x in xrange(n_sit_sot)]
info = {}
info['tap_array'] = tap_array
info['n_seqs'] = n_seqs
info['n_mit_mot'] = n_mit_mot
info['n_mit_mot_outs'] = n_mit_mot_outs
info['mit_mot_out_slices'] = mit_mot_out_slices
info['n_mit_sot'] = n_mit_sot
info['n_sit_sot'] = n_sit_sot
info['n_shared_outs'] = n_shared_outs
info['n_nit_sot'] = n_nit_sot
info['n_other_ignore'] = n_other_ignore
info['truncate_gradient'] = truncate_gradient
info['name'] = name
info['mode'] = mode
info['inplace'] = False
info['gpu'] = False
revised_outs = []
for o in new_outs:
if (o in inner_inputs or
isinstance(o, tensor.Constant)):
revised_outs.append( scan_utils.cloneOp(o))
else:
revised_outs.append(o)
local_op = scan_op.Scan( inner_inputs, revised_outs, info )
##
### Step 8. Compute the outputs using the scan op
##
scan_inputs = ( scan_seqs +
mit_mot_scan_inputs +
mit_sot_scan_inputs +
sit_sot_scan_inputs +
shared_scan_inputs +
[ actual_n_steps for x in xrange(n_nit_sot) ] +
ignore_scan_seqs +
other_shared_scan_args +
other_scan_args )
scan_inputs = [safe_to_cpu(x) for x in ([actual_n_steps] + scan_inputs)]
scan_outs = local_op(* scan_inputs )
if type(scan_outs) not in (list,tuple):
scan_outs = [scan_outs]
##
### Step 9. Figure out which outs are update rules for shared variables
### and so on ...
##
update_map = {}
def remove_dimensions( outs, steps_return, offsets = None):
out_ls = []
for idx, out in enumerate(outs):
if idx in steps_return:
if steps_return[idx] > 1:
out_ls.append( out[-steps_return[idx]:] )
else:
out_ls.append( out[-1] )
else:
if offsets is None:
out_ls.append( out )
else:
out_ls.append( out[offsets[idx]:] )
return out_ls
offset = n_mit_mot
offsets = [ abs(numpy.min(x)) for x in mit_sot_tap_array ]
mit_sot_outs = remove_dimensions(
scan_outs[offset:offset+n_mit_sot]
, mit_sot_return_steps
, offsets )
offset += n_mit_sot
offsets = [ 1 for x in xrange(n_sit_sot) ]
sit_sot_outs = remove_dimensions(
scan_outs[offset:offset+n_sit_sot]
, sit_sot_return_steps
, offsets )
offset += n_sit_sot
nit_sot_outs = remove_dimensions(
scan_outs[offset:offset+n_nit_sot]
, nit_sot_return_steps )
offset += n_nit_sot
for idx, update_rule in enumerate(scan_outs[offset:offset+n_shared_outs]):
update_map[shared_scan_inputs[idx]] = update_rule
_scan_out_list = ( mit_sot_outs +
sit_sot_outs +
nit_sot_outs )
# Step 10. I need to reorder the outputs to be in the order expected by
# the user
rightOrder = ( mit_sot_rightOrder +
sit_sot_rightOrder +
nit_sot_rightOrder )
scan_out_list = [None]*len(rightOrder)
for idx,pos in enumerate(rightOrder):
scan_out_list[pos] = _scan_out_list[idx]
if len(scan_out_list) == 1:
scan_out_list = scan_out_list[0]
elif len(scan_out_list) == 0:
scan_out_list = None
return (scan_out_list, update_map)
"""
This module provides the Scan Op
See scan.py for details on scan
"""
__docformat__ = 'restructedtext en'
__authors__ = ( "Razvan Pascanu "
"Frederic Bastien "
"James Bergstra "
"Pascal Lamblin " )
__copyright__ = "(c) 2010, Universite de Montreal"
__contact__ = "Razvan Pascanu <r.pascanu@gmail>"
import copy
import itertools
import logging
import numpy
from theano.compile import SharedVariable, function, Param
from theano import compile
from theano import gradient
from theano.gof.python25 import all
from theano.gof import Op, Apply
from theano import gof
from theano.misc import safe_asarray as safe_asarray
from theano.tensor import TensorType
from theano import tensor
from theano.tensor.opt import Shape_i
import theano
import scan_utils
from scan_utils import safe_new, safe_to_cpu, traverse
# Logging function for sending warning or info
_logger = logging.getLogger('theano.scan_op')
def warning(*msg):
_logger.warning('WARNING theano.scan: '+' '.join(msg))
def info(*msg):
_logger.info('INFO theano.scan: '+' '.join(msg))
from theano.sandbox import cuda
class Scan(Op):
#
# OLD DOCUMENTATION CAN BE FOUND NEAR REVISION 2581
#
def __init__( self
, inputs
, outputs
, info ):
"""
:param inputs: inputs of the inner function of scan
:param outputs: outputs of the inner function of scan
:param properties: dictionary containing different properties of
the scan op.
"""
# adding properties into self
self.info = info
self.inputs = inputs
self.outputs = outputs
self.__dict__.update(info)
# build a list of output types for any Apply node using this op.
info['output_types'] = []
idx = 0
jdx = 0
if info['gpu']:
# mit_mot
while idx < self.n_mit_mot_outs:
# Not that for mit_mot there are several output slices per
# output sequence
o = outputs[idx]
info['output_types'].append(
cuda.CudaNdarrayType(
broadcastable = (False,) + o.type.broadcastable))
idx += len(self.mit_mot_out_slices[jdx])
jdx += 1
# mit_sot / sit_sot / nit_sot
end = idx + self.n_mit_sot + self.n_sit_sot + self.n_nit_sot
for o in outputs[idx:end]:
info['output_types'].append(
cuda.CudaNdarrayType( broadcastable = (False,) +
o.type.broadcastable))
# shared outputs
for o in outputs[end:]:
if isinstance(o.type, TensorType):
info['output_types'].append(cuda.CudaNdarrayType(
broadcastable = o.type.broadcastable))
else:
info['output_types'].append( o.type )
else:
while idx < self.n_mit_mot_outs:
# Not that for mit_mot there are several output slices per
# output sequence
o = outputs[idx]
info['output_types'].append(
TensorType(
broadcastable = (False,) + o.type.broadcastable
, dtype = o.type.dtype)
)
idx += len(self.mit_mot_out_slices[jdx])
jdx += 1
# mit_sot / sit_sot / nit_sot
end = idx + self.n_mit_sot + self.n_sit_sot + self.n_nit_sot
for o in outputs[idx:end]:
info['output_types'].append(
TensorType(
broadcastable = (False,) + o.type.broadcastable
, dtype = o.type.dtype ))
# shared outputs
for o in outputs[end:]:
if cuda.cuda_available and isinstance(o.type,
cuda.CudaNdarrayType):
info['output_types'].append( TensorType(
broadcastable = o.type.broadcastable
, dtype = theano.config.floatX) )
else:
info['output_types'].append( o.type )
self.destroy_map = {}
if 'inplace' in info and info['inplace']:
for idx in xrange(info['n_mit_mot'] + info['n_mit_sot'] +
info['n_sit_sot'] ):
self.destroy_map[idx] = [idx + 1 + info['n_seqs']]
# I consider all inputs of the inner function non mutable
nonmutable = range(len(inputs))
mode_instance = compile.mode.get_mode(info['mode'])
# if the default mode is used, and that mode is ProfileMode
# then we need to copy the mode otherwise the time for a given
# op will be counted multiple times
if ( info['mode'] is None and
isinstance(mode_instance, compile.profilemode.ProfileMode) ):
mode_instance = compile.profilemode.ProfileMode(
optimizer = mode_instance.provided_optimizer
, linker = mode_instance.provided_linker )
compile.profilemode.prof_mode_instance_to_print.append(mode_instance)
info['mode_instance'] = mode_instance
if self.name:
info['mode_instance'].message = self.name + " sub profile"
else:
info['mode_instance'].message = "Scan sub profile"
else:
info['mode_instance'] = mode_instance
if 'name' not in info or info['name'] is None:
info['name'] = 'scan_fn'
if isinstance(info['mode_instance'], compile.debugmode.DebugMode):
theano_fn = function(
inputs
, outputs
, mode = info['mode_instance']
, name = info['name'] )
def fn_wrapper(ins_storage, outs_storage):
'''
Wrap theano_fn to have same interface as scan_utils's
scan_function
'''
outputs = theano_fn(*ins_storage)
for (out,out_storage) in zip( outputs, outs_storage):
if out_storage[0] is not None and out_storage[0].shape:
out_storage[0][:] = out
elif out_storage[0] is not None:
out_storage[0].itemset(out)
return [[o] for o in outputs ]
self.fn = fn_wrapper
self.fn.maker = scan_utils.EmptyObject()
self.fn.maker.inputs = inputs
self.fn.maker.outputs = outputs
self.fn.maker.env = theano_fn.maker.env
self.mask = [ 0 for x in xrange(self.n_shared_outs)]
else:
self.mask, self.fn = scan_utils.scan_function(
inputs
, outputs
, nonmutable
, mode = info['mode_instance']
, name = info['name']
, slices = ( info['n_mit_mot_outs'] +
info['n_mit_sot'] +
info['n_sit_sot'] +
info['n_nit_sot'] )
)
for o in self.fn.maker.env.outputs:
if not o.owner:
import GPUscan.ipdb; GPUscan.ipdb.set_trace()
# check for shared variables in the inputs
assert not numpy.any( [isinstance(x, SharedVariable) for x
in self.fn.maker.inputs])
self.__dict__.update(info)
self.info = info
# Pre-computing some values to speed up perform
self.mintaps = [ numpy.min(x) for x in self.tap_array]
self.mintaps += [ 0 for x in xrange(self.n_nit_sot) ]
self.seqs_arg_offset = 1+self.n_seqs
self.shared_arg_offset = ( self.seqs_arg_offset
+ self.n_mit_mot
+ self.n_mit_sot
+ self.n_sit_sot )
self.nit_sot_arg_offset = ( self.shared_arg_offset +
self.n_shared_outs )
self.n_outs = self.n_mit_mot + self.n_mit_sot + self.n_sit_sot
self.n_tap_outs = self.n_mit_mot + self.n_mit_sot
def make_node(self, *inputs):
assert numpy.all(isinstance(i, gof.Variable) for i in inputs)
# assert dtype is consistent
err_msg1 = ('%s %s (index %d) has dtype %s. Slice %s representing '
'this input has dtype %s' )
err_msg2 = ('Initial state %s (index %d) has dtype %s. The '
'corresponding output of the inner function applied '
'recurrently has dtype %s')
# Flags that indicate which inputs are vectors
self.vector_seqs = [ seq.ndim == 1 for seq in
inputs[1:1+self.n_seqs ] ]
self.vector_outs = [ arg.ndim ==1 for arg in
inputs[1+self.n_seqs: (1+self.n_seqs +
self.n_outs)] ]
self.vector_outs += [ False]*self.n_nit_sot
# Check if input sequences and variables representing a slice of
# them have the same dtype
for idx in xrange(self.n_seqs):
if inputs[1+idx].dtype != self.inputs[idx].dtype:
raise ValueError(err_msg1%( 'Sequence'
, inputs[1+idx].name
, idx
, inputs[1+idx].dtype
, self.inputs[idx].name
, self.inputs[idx].dtype) )
# Check that this 3 things have the same dtype for mit_mot:
# - initial state of the output
# - variable representing an input slice of the otuput
# - variable representing an output slice of the otuput
# Maybe checking that ndim fits would be good as well !?
index_i = self.n_seqs
index_o = 0
index = 1+self.n_seqs
start = index
end = index + self.n_mit_mot
while index < end:
for k in self.tap_array[index-start]:
if inputs[index].dtype != self.inputs[index_i].dtype:
raise ValueError(err_msg1%( 'Initial state'
, inputs[index].name
, idx
, inputs[index].dtype
, self.inputs[index_i].name
, self.inputs[index_i].dtype) )
index_i += 1
for k in self.mit_mot_out_slices[index-start]:
if inputs[index].dtype != self.outputs[index_o].dtype:
raise ValueError(err_msg2%( inputs[index].name
, idx
, inputs[index].dtype
, self.outputs[index_o].dtype) )
index_o += 1
index += 1
# Same checks as above but for outputs of type mit_sot and sit_sot
end += self.n_mit_sot + self.n_sit_sot
while index < end:
for k in self.tap_array[index-start]:
if inputs[index].dtype != self.inputs[index_i].dtype:
raise ValueError(err_msg1%( 'Initial state'
, inputs[index].name
, idx
, inputs[index].dtype
, self.inputs[index_i].name
, self.inputs[index_i].dtype) )
index_i += 1
if inputs[index].dtype != self.outputs[index_o].dtype:
raise ValueError(err_msg2%( inputs[index].name
, index
, inputs[index].dtype
, self.outputs[index_o].dtype) )
index_o += 1
index += 1
# Check that the shared variable and their update rule have the same
# dtype. Maybe even same type ?!
end += self.n_shared_outs
index_o += self.n_nit_sot
while index < end:
if (hasattr(inputs[index],'dtype') and
inputs[index].dtype != self.outputs[index_o].dtype):
raise ValueError(err_msg2%( inputs[index].name
, idx
, inputs[index].dtype
, self.outputs[index_o].dtype) )
index += 1
index_o += 1
for x in inputs[index:index+ self.n_nit_sot]:
# For every nit_sot input we get as input a int/uint that
# depicts the size in memory for that sequence. This feature is
# used by truncated BPTT and by scan space optimization
if (str(x.dtype)[:3] not in ('uin','int') or
x.ndim != 0):
raise ValueError('For output %d you need to provide a '
'scalar int !',x)
apply_node = Apply(self
, inputs
, [t() for t in self.info['output_types']])
return apply_node
def __eq__(self, other):
if not type(self) == type(other):
return False
elif not len(self.inputs) == len(other.inputs):
return False
elif not len(self.outputs) == len(other.outputs):
return False
else:
for x,y in zip(self.inputs, other.inputs):
if not scan_utils.equal_computations(x,y):
return False
for x,y in zip(self.outputs, other.outputs):
if not scan_utils.equal_computations(x,y):
return False
return self.info == other.info
def __str__(self):
if self.name:
return self.name
else:
return 'scan'
def __hash__(self):
return ( hash(type(self)) ^
scan_utils.hash_listsDictsTuples(self.inputs) ^
scan_utils.hash_listsDictsTuples(self.outputs) ^
scan_utils.hash_listsDictsTuples(self.info) )
def perform( self, node, args, outs):
"""
The args are packed like this:
n_steps
X sequence inputs x_1, x_2, ... x_<self.n_seqs>
Y initial states (u_1, u_2, ... u_<self.n_outs>) for our
outputs. Each must have appropriate length (T_1, T_2, ..., T_Y).
W other inputs w_1, w_2, ... w_W
There are at least 1 + self.n_seqs + self.n_outs inputs, and the
ones above this number are passed to the scanned function as
non-sequential inputs.
The outputs are more straightforward:
Y sequence outputs y_1, y_2, ... y_<self.n_outs>
"""
# 1. Unzip the number of steps and sequences. If number of steps is
# negative flip sequences around, and make n_steps positive
n_steps = args[0]
if n_steps < 0:
n_steps = abs(n_steps)
seqs = [ seq[::-1] for seq in args[1:self.seqs_arg_offset]]
seqs = zip( seqs, self.vector_seqs )
else:
seqs = args[1:self.seqs_arg_offset]
seqs = zip( seqs, self.vector_seqs )
# 2. Allocate memory for the outputs. Construct the list:
# store_steps -- map containting the length of each output
# pos -- map containing the current position of each output
store_steps = [ arg.shape[0] for arg
in args[self.seqs_arg_offset:
self.shared_arg_offset] ]
store_steps += [ arg for arg in
args[self.nit_sot_arg_offset:
self.nit_sot_arg_offset+self.n_nit_sot]
]
pos = [ (-self.mintaps[idx])%store_steps[idx] for idx
in xrange(self.n_outs+self.n_nit_sot)]
# 2.1 Create storage space for outputs
for idx in xrange(self.n_outs):
if self.inplace:
# ^ Case 1. Outputs should be computed inplace of their
# initial state
outs[idx][0] = args[self.seqs_arg_offset + idx ]
elif ( outs[idx][0] is not None and
outs[idx][0].shape[1:] == args[self.seqs_arg_offset + idx].shape[1:]
and outs[idx][0].shape[0] >= store_steps[idx] ):
# Put in the values of the initial state
outs[idx][0] = outs[idx][0][:store_steps[idx]]
if idx > self.n_mit_mot:
l = - self.mintaps[idx]
outs[idx][0][:l] = args[self.seqs_arg_offset + idx][:l]
else:
outs[idx][0][:] = args[self.seqs_arg_offset + idx]
else:
outs[idx][0] = args[self.seqs_arg_offset + idx].copy()
offset = self.nit_sot_arg_offset + self.n_nit_sot + self.n_other_ignore
other_args = args[offset:]
zipped_outs = [(outs[idx], self.vector_outs[idx], tap,
store_steps[idx], idx) for idx in xrange(self.n_outs)
for tap in self.tap_array[idx] ]
end = self.n_outs + self.n_nit_sot
sot_outs = zip( outs[self.n_mit_mot:end]
, self.vector_outs[self.n_mit_mot:end]
, store_steps[self.n_mit_mot:end]
, range(self.n_mit_mot, end ))
############## THE MAIN LOOP #########################
for i in xrange(n_steps):
# sequences over which scan iterates
# 3. collect input slices
if i == 1 and self.n_nit_sot > 0 :
sot_outs = zip( outs[self.n_mit_mot:end]
, self.vector_outs[self.n_mit_mot:end]
, store_steps[self.n_mit_mot:end]
, range(self.n_mit_mot, end ))
fn_args = [ seq[i:i+1].reshape(()) if c else seq[i]
for seq,c in seqs]
fn_args += [ out[0][(pos[j]+tap)%sz:
(pos[j]+tap)%sz+1].reshape(())
if c else out[0][(pos[j]+tap)%sz]
for (out, c, tap, sz, j) in zipped_outs ]
a_offset = self.shared_arg_offset
o_offset = self.n_outs + self.n_nit_sot
fn_args += [ args[a_offset+j] if i==0 else outs[o_offset+j][0]
for j in xrange(self.n_shared_outs) ]
fn_args += other_args
# 4. collecting slices where the output should be stored
fn_out_storage = [ [None] for x in xrange(self.n_mit_mot_outs)]
if i == 0 and self.n_nit_sot > 0:
fn_out_storage += [
[None] if store == 1 or c else [out[0][pos[j]]]
for out,c,store,j in sot_outs[:-self.n_nit_sot] ]
fn_out_storage += [[None]]*self.n_nit_sot
else:
fn_out_storage += [
[ None ] if store == 1 or c else [out[0][pos[j]]]
for out,c,store,j in sot_outs ]
fn_out_storage += [ [None] for x in xrange(self.n_shared_outs) ]
# 5. compute outputs
something = self.fn(fn_args, fn_out_storage)
offset_out = 0
# 5.1 Copy over the values for mit_mot outputs
for j in xrange(self.n_mit_mot):
for k in self.mit_mot_out_slices[j]:
outs[j][0][k+pos[j]] = something[offset_out][0]
offset_out += 1
# 5.2 Copy over the values for mit_sot/sit_sot outputs
begin = self.n_mit_mot
end = self.n_outs
offset_out -= self.n_mit_mot
for j in xrange(begin, end):
if store_steps[j] == 1 or self.vector_outs[j]:
outs[j][0][pos[j]] = something[offset_out+j][0]
# 5.3 Copy over the values for nit_sot outputs
begin = end
end += self.n_nit_sot
for j in xrange(begin,end):
if i == 0:
jout = j+offset_out
shape = (store_steps[j],) + something[jout][0].shape
if len(something[jout][0].shape) == 0:
self.vector_outs[j] = True
dtype = something[jout][0].dtype
if (outs[j][0] is None or
outs[j][0].shape[0] < store_steps[j] or
outs[j][0].shape[1:] != shape[1:] or
outs[j][0].dtype != dtype ):
if self.info['gpu']:
outs[j][0] = cuda.cuda_ndarray.cuda_ndarray.CudaNdarray.zeros(shape)
else:
outs[j][0] = numpy.zeros(shape, dtype)
elif outs[j][0].shape[0] != store_steps[j]:
outs[j][0] = outs[j][0][:store_steps[j]]
outs[j][0][pos[j]] = something[jout][0]
elif store_steps[j] == 1 or self.vector_outs[j]:
outs[j][0][pos[j]] = something[j+offset_out][0]
# 5.4 Copy over the values for outputs corresponding to shared
# variables
begin = end
end += self.n_shared_outs
for j in xrange(begin,end):
jout = j +offset_out
outs[j][0] = something[jout][0]
pos = [ (idx+1)%store for idx,store in
itertools.izip(pos, store_steps)
]
# 6. Check if you need to re-order output buffers
begin = self.n_mit_mot
end = self.n_outs + self.n_nit_sot
for idx in xrange(begin, end):
min_tap = self.mintaps[idx]
if ( store_steps[idx] < n_steps-self.mintaps[idx] and
pos[idx] < store_steps[idx] ):
part_1 = range(pos[idx], store_steps[idx])
part_2 = range(pos[idx] )
reordered = part_1 + part_2
if len(reordered) > 1:
if isinstance( outs[idx][0], cuda.CudaNdarray):
shape = outs[idx][0].shape
tmp = cuda.cuda_ndarray.cuda_ndarray.CudaNdarray.zeros(shape)
pdx = pos[idx]
tmp[:store_steps[idx]-pdx] = outs[idx][0][pdx:]
tmp[store_steps[idx]-pdx:] = outs[idx][0][:pdx]
outs[idx][0] = tmp
else:
outs[idx][0] = outs[idx][0][reordered]
for idx,val in enumerate(self.mask):
if val == 1:
if hasattr(outs[end+idx][0], 'copy'):
outs[end + idx][0] = outs[end+idx][0].copy()
else:
outs[end + idx][0] = copy.deepcopy(outs[end+idx][0])
### Infer Shape
def infer_shape(self, node, input_shapes):
seqs_shape = [ x[1:] for x in input_shapes[1:1+self.n_seqs] ]
n_outs = self.n_mit_mot + self.n_mit_sot + self.n_sit_sot
outs_shape = []
for idx in xrange(n_outs):
for k in self.tap_array[idx]:
outs_shape += [ input_shapes[idx+self.n_seqs+1][1:] ]
offset = 1 + self.n_seqs + n_outs
for idx in xrange(self.n_shared_outs):
outs_shape += [ input_shapes[idx+offset] ]
offset += self.n_nit_sot + self.n_other_ignore + self.n_shared_outs
inner_ins_shapes = seqs_shape + outs_shape + input_shapes[offset:]
outs_shape = scan_utils.infer_shape(
self.outputs
, self.inputs
, inner_ins_shapes)
offset = 1 + self.n_seqs
scan_outs = [x for x in input_shapes[offset:offset+n_outs]]
offset += n_outs
for x in xrange(self.n_nit_sot):
if outs_shape[n_outs+x] is not None:
scan_outs.append(
(node.inputs[offset+self.n_shared_outs+x],) +
tuple(outs_shape[n_outs+x]) )
else:
r = node.outputs[n_outs+x]
shp = (node.inputs[offset+self.n_shared_outs+x],)
shp += tuple([Shape_i(i)(r) for i in xrange(1,r.ndim)])
scan_outs.append( shp )
scan_outs += [ x for x in
input_shapes[offset:offset+self.n_shared_outs] ]
return scan_outs
### GRAD FUNCTION
def grad(self, args, g_outs):
# 1. forward pass - get the outputs after applying scan
scan_outputs = self(*args)
# 2. make sure they are given as a list
if not( type(scan_outputs) in (list,tuple)):
scan_outputs = [scan_outputs]
# 3. un-group / unzip the inputs
seqs = self.inputs[:self.n_seqs]
offset = self.n_seqs
n_ins_mit_mot = numpy.sum([0] + [ len(self.tap_array[x]) for x
in xrange(self.n_mit_mot) ])
outs_mit_mot = self.inputs[offset:offset+n_ins_mit_mot]
offset += n_ins_mit_mot
n_ins_mit_sot = numpy.sum([0] + [ len(self.tap_array[x]) for x
in xrange( self.n_mit_mot
, self.n_mit_mot+self.n_mit_sot)])
outs_mit_sot = self.inputs[offset:offset+n_ins_mit_sot]
offset += n_ins_mit_sot
outs_sit_sot = self.inputs[offset:offset+self.n_sit_sot]
offset += self.n_sit_sot
old_scan_shared_ins = self.inputs[offset:offset+self.n_shared_outs]
out_offset = ( self.n_mit_mot_outs
+ self.n_mit_sot
+ self.n_nit_sot
+ self.n_sit_sot )
old_scan_shared_outs = self.outputs[out_offset:]
arg_offset = ( 1
+ self.n_seqs
+ self.n_mit_mot
+ self.n_mit_sot
+ self.n_sit_sot)
old_scan_init = args[arg_offset: arg_offset+self.n_shared_outs]
offset += self.n_shared_outs
other_args = self.inputs[offset:]
# 4. Collect (possibly) differentiable inputs
diff_inputs = ( seqs +
outs_mit_mot +
outs_mit_sot +
outs_sit_sot +
other_args )
#args[-len(other_args):] )
# 5. construct the function that computes the gradient (we sum over
# the gradients with respect to all outputs)
def compute_gradient(y, g_y):
gmp = gradient.grad_sources_inputs(
[(y,g_y)], diff_inputs, False )
return [gmp.get(p, None) for p in diff_inputs ]
# 6. clean the outputs (i.e. remove update rules)
end = ( self.n_mit_mot_outs
+ self.n_mit_sot
+ self.n_sit_sot
+ self.n_nit_sot )
clean_outputs = self.outputs[:end]
g_outs_no_shared = g_outs[:end]
# 7.1. empty lists to hold gradients
# List of slices from outputs (used to compute the gradients)
inner_g_outs = []
g_out_slices = []
# List of outputs of the gradient function
inner_gfn_outs = []
# slices of the input
prev_inner_gfn_outs = []
zeros_like_diff_ins = []
pos = ( self.n_seqs + n_ins_mit_mot + n_ins_mit_sot +
self.n_sit_sot)
offset = len(args) - len(other_args) - pos
# 7.2. generate variables to represent previous steps of g_outs
for idx,diff_in in enumerate(diff_inputs):
prev_gfn_out = safe_new(diff_in)
if hasattr(diff_in,'name') and diff_in.name:
prev_gfn_out.name = 'g_prev_'+diff_in.name
else:
prev_gfn_out.name = 'g_prev_'+str(idx)
prev_inner_gfn_outs.append( prev_gfn_out)
if idx < pos:
zeros_like_diff_ins.append(tensor.zeros_like(diff_in))
else:
zeros_like_diff_ins.append(tensor.zeros_like(args[idx+offset]))
# 7.3. compute gradients of the inputs given one output
for dx, out in enumerate(clean_outputs):
inner_g_out = safe_new(out)
if g_outs_no_shared[dx]:
g_out_slices.append(g_outs_no_shared[dx][0])
else:
g_out_slices.append(None)
if out.name:
inner_g_out.name = 'g_'+out.name
else:
inner_g_out.name = 'g_'+str(dx)
inner_g_outs.append(inner_g_out)
_g_out = inner_g_out
grad_outs = compute_gradient(out, _g_out)
if not inner_gfn_outs:
for idx, gfn_out in enumerate(grad_outs):
if idx >= self.n_seqs:
inner_gfn_outs.append( prev_inner_gfn_outs[idx] )
else:
inner_gfn_outs.append( None )
# 7.4 Sum the gradients
# safety check, some of this inputs might still not be
# differentiable, for those we don't add them to the mix
# (assume their gradient is 0)
for i,(x,y) in enumerate(zip(grad_outs, inner_gfn_outs)):
if x and y:
inner_gfn_outs[i] = x+y
elif y:
inner_gfn_outs[i] = y
else:
inner_gfn_outs[i] = x
## 8. Mask the outputs that are not differentiable
# backwards pass
for i in xrange(len(inner_gfn_outs)):
if inner_gfn_outs[i] == None:
inner_gfn_outs[i] = tensor.zeros_like(diff_inputs[i])
## 9. Mask the g_outs that are Nones :
for i, out in enumerate(scan_outputs):
if g_outs[i] is None:
try:
# this try is for catching non ndarray inputs (random
# states) it is more of a safety check ( all random
# states should be after n_outs_not_shared ...
g_outs[i] = tensor.zeros_like(scan_outputs[i])
except:
g_outs[i] = theano.tensor.constant(
numpy.array(0, theano.config.floatX))
## 10. Get your sequence in order for the scan:
n_seqs = ( self.n_seqs +
n_ins_mit_mot +
n_ins_mit_sot +
self.n_sit_sot +
self.n_nit_sot )
offset = ( self.n_mit_mot_outs +
self.n_mit_sot +
self.n_sit_sot )
inner_seqs = ( seqs +
outs_mit_mot +
outs_mit_sot +
outs_sit_sot +
inner_g_outs[offset:offset+self.n_nit_sot])
scan_seqs = [ x[::-1] for x in args[1:self.n_seqs + 1]]
offset = 0
for idx in xrange(self.n_mit_mot + self.n_mit_sot):
mintap = numpy.min(self.tap_array[idx])
maxtap = numpy.max(self.tap_array[idx])
seq = scan_outputs[offset+idx][::-1]
for k in self.tap_array[idx]:
# We cut the sequence such that seq[i] to correspond to
# seq[i-k]
if maxtap < 0:
dim_offset = abs(maxtap)
else:
dim_offset = 0
if maxtap == mintap and maxtap != 0:
nw_seq =seq[:abs(maxtap)]
elif maxtap -k != 0 :
nw_seq = seq[dim_offset +k -mintap: -(maxtap -k)]
else:
nw_seq = seq[dim_offset +k -mintap: ]
if seq.name:
nw_seq.name = seq.name + '[%d:]'%k
scan_seqs.append(nw_seq)
offset += self.n_mit_sot
for idx in xrange(self.n_sit_sot):
seq = scan_outputs[offset+idx][:-1]
scan_seqs.append(seq[::-1])
offset = ( self.n_mit_mot_outs +
self.n_mit_sot +
self.n_sit_sot )
scan_seqs += [ x[::-1] for x in
g_outs[offset:offset+self.n_nit_sot]]
scan_mit_mot = []
inner_mit_mot = []
scan_mit_mot_outs = []
mit_mot_taps = []
mit_mot_out_slices = []
out_pos = 0
ins_pos = n_seqs
n_mit_mot_outs = 0
n_mit_mot_ins = 0
ins_pos = self.n_seqs
for idx in xrange(self.n_mit_mot):
scan_mit_mot.append( g_outs[idx][::-1] )
mit_mot_taps.append([])
mit_mot_out_slices.append([])
for jdx in xrange(len(self.mit_mot_out_slices[idx])):
inner_mit_mot.append( inner_g_outs[out_pos] )
mit_mot_taps[idx].append(
-self.mit_mot_out_slices[idx][jdx])
n_mit_mot_ins += 1
out_pos += 1
for jdx in xrange(len(self.tap_array[idx])):
inner_mit_mot.append( prev_inner_gfn_outs[ins_pos] )
scan_mit_mot_outs.append(
inner_gfn_outs[ ins_pos] )
n_mit_mot_ins += 1
ins_pos += 1
n_mit_mot_outs += 1
mit_mot_taps[idx].append( -self.tap_array[idx][jdx])
mit_mot_out_slices[idx].append(
-self.tap_array[idx][jdx] )
offset = self.n_mit_mot
for idx in xrange(self.n_mit_sot):
mit_mot_taps.append([])
mit_mot_out_slices.append([])
scan_mit_mot.append( g_outs[idx + offset][::-1] )
idx_tap = idx + self.n_mit_mot
for jdx in xrange(len(self.tap_array[idx_tap])):
inner_mit_mot.append( prev_inner_gfn_outs[ins_pos] )
mit_mot_taps[idx+offset].append(
-self.tap_array[idx_tap][jdx] )
mit_mot_out_slices[idx].append(
-self.tap_array[idx_tap][jdx] )
scan_mit_mot_outs.append(inner_gfn_outs[ ins_pos] )
n_mit_mot_ins += 1
ins_pos += 1
n_mit_mot_outs += 1
inner_mit_mot.append( inner_g_outs[out_pos] )
out_pos += 1
n_mit_mot_ins += 1
mit_mot_taps[idx+offset].append( 0 )
offset += self.n_mit_sot
for idx in xrange(self.n_sit_sot):
mit_mot_taps.append([0,1])
mit_mot_out_slices.append([1])
scan_mit_mot.append( g_outs[idx + offset][::-1] )
scan_mit_mot_outs.append(inner_gfn_outs[ ins_pos ])
inner_mit_mot += [ inner_g_outs[out_pos]
, prev_inner_gfn_outs[ins_pos] ]
n_mit_mot_outs += 1
out_pos += 1
ins_pos += 1
n_mit_mot_ins += 2
n_nit_sot = self.n_seqs
scan_nit_sot_outs = inner_gfn_outs[:self.n_seqs]
offset = ( self.n_seqs
+ n_ins_mit_sot
+ n_ins_mit_mot
+ self.n_sit_sot )
n_shared_outs = len(prev_inner_gfn_outs[offset:])
scan_shared_ins = prev_inner_gfn_outs[offset:]
scan_shared_init = zeros_like_diff_ins[offset:]
scan_shared_outs = inner_gfn_outs[offset:]
tap_array = mit_mot_taps
info = {}
info['n_seqs'] = n_seqs
info['n_mit_sot'] = 0
info['tap_array'] = tap_array
info['gpu'] = False
n_mit_mot = ( self.n_mit_mot
+ self.n_mit_sot
+ self.n_sit_sot )
info['n_mit_mot'] = n_mit_mot
info['n_mit_mot_outs'] = n_mit_mot_outs
info['mit_mot_out_slices'] = mit_mot_out_slices
info['truncate_gradient'] = self.truncate_gradient
info['n_sit_sot'] = 0
info['n_shared_outs'] = n_shared_outs + self.n_shared_outs
info['n_nit_sot'] = n_nit_sot
if self.name:
info['name'] = 'grad_of_' + self.name
else:
info['name'] = None
info['mode'] = self.mode
info['inplace'] = False
info['n_other_ignore'] = 0
n_mit_sot = 0
n_sit_sot = 0
n_other_ignore_seqs = 0
if self.truncate_gradient != -1 :
do_steps = tensor.minimum(args[0], self.truncate_gradient)
else:
do_steps = args[0]
offset = ( 1
+ self.n_seqs
+ self.n_mit_mot
+ self.n_mit_sot
+ self.n_sit_sot
+ self.n_nit_sot
+ self.n_shared_outs
+ self.n_other_ignore )
scan_inputs = ( [do_steps] +
scan_seqs +
scan_mit_mot +
scan_shared_init +
old_scan_init +
[ args[0] for x in xrange(n_nit_sot) ] +
args[offset:] )
offset = ( self.n_seqs
+ n_ins_mit_mot
+ n_ins_mit_sot
+ self.n_sit_sot
+ self.n_shared_outs )
inner_other_args = self.inputs[offset:]
inner_gfn_ins = ( inner_seqs +
inner_mit_mot +
scan_shared_ins +
old_scan_shared_ins +
inner_other_args )
inner_gfn_outs = ( scan_mit_mot_outs +
scan_nit_sot_outs +
scan_shared_outs +
old_scan_shared_outs )
local_op = Scan( inner_gfn_ins, inner_gfn_outs, info )
outputs = local_op(*scan_inputs)
if type(outputs) not in (list, tuple):
outputs = [ outputs ]
# Re-order the gradients correctly
gradients = [None]
offset = ( self.n_mit_mot
+ self.n_mit_sot
+ self.n_sit_sot )
gradients += [ x[::-1] for x in outputs[offset:offset+self.n_seqs]]
end = self.n_mit_mot + self.n_mit_sot + self.n_sit_sot
gradients += [ x[::-1] for x in outputs[:end]]
gradients += [ None for x in xrange(self.n_shared_outs)]
gradients += [ None for x in xrange(self.n_nit_sot) ]
gradients += [ None for x in xrange(self.n_other_ignore) ]
begin = end + self.n_seqs
end = begin + n_shared_outs
gradients += outputs[begin:end]
return gradients
@theano.compile.profilemode.register_profiler_printer
def profile_printer(fct_name, compile_time, fct_call_time, fct_call,
apply_time, op_cimpl, message, outputs_size,
other_time):
# Scan overhead profile
if any([isinstance(node.op, Scan) for (_,node) in apply_time.keys()]):
print
print 'Scan overhead:'
print '<Scan op time(s)> <sub scan fct time(s)> <sub scan op time(s)> <sub scan fct time(% scan op time)> <sub scan op time(% scan op time)> <node>'
total_super_scan_time = 0
total_scan_fct_time = 0
total_scan_op_time = 0
for (_,node),v in apply_time.items():
if isinstance(node.op, Scan):
scan_fct_time = sum(node.op.mode_instance.fct_call_time.values())
scan_op_time = sum(node.op.mode_instance.local_time)
total_super_scan_time += v
total_scan_fct_time += scan_fct_time
total_scan_op_time += scan_op_time
print ' %5.1fs %5.1fs %5.1fs %5.1f%% %5.1f%%'%(
v, scan_fct_time, scan_op_time, scan_fct_time/v*100,
scan_op_time/v*100), node
print ' total %5.1fs %5.1fs %5.1fs %5.1f%% %5.1f%%'%(
total_super_scan_time, total_scan_fct_time, total_scan_op_time, total_scan_fct_time/total_super_scan_time*100, total_scan_op_time/total_super_scan_time*100)
"""
This module provides utility functions for the Scan Op
See scan.py for details on scan
"""
__docformat__ = 'restructedtext en'
__authors__ = ( "Razvan Pascanu "
"Frederic Bastien "
"James Bergstra "
"Pascal Lamblin " )
__copyright__ = "(c) 2010, Universite de Montreal"
__contact__ = "Razvan Pascanu <r.pascanu@gmail>"
import copy_reg
import cPickle
import itertools
import logging
import numpy
import sys, time, copy
from theano import config
from theano.gof.python25 import partial
from theano.compile.pfunc import rebuild_collect_shared
from theano import gof
from theano import tensor
from theano.tensor.basic import get_constant_value
from theano.gof import Op, Apply
from theano.compile.io import *
from theano.compile.function_module import Supervisor, view_tree_set, alias_root
from theano.misc.safe_asarray import _asarray
import theano.compile.mode as mode_module
from theano.scalar import Scalar, ScalarVariable, ScalarConstant
from theano.sandbox import cuda
import theano
################ Utility Functions and Classes #######################
# Logging function for sending warning or info
_logger = logging.getLogger('theano.scan_utils')
def warning(*msg):
_logger.warning('WARNING theano.scan: '+' '.join(msg))
def info(*msg):
_logger.info('INFO theano.scan: '+' '.join(msg))
def safe_new(x):
if cuda.cuda_available and isinstance(x.type, cuda.CudaNdarrayType):
return tensor.TensorType(
broadcastable = x.type.broadcastable
, dtype = config.floatX)()
else:
return x.type()
def safe_to_cpu(x):
if cuda.cuda_available and isinstance(x.type, cuda.CudaNdarrayType):
return cuda.basic_ops.host_from_gpu(x)
else:
return x
def traverse(out, x,x_copy, d):
''' Function used by scan to parse the tree and figure out which nodes
it needs to replace. There are two options :
1) x and x_copy or on host, then you would replace x with x_copy
2) x is on gpu, x_copy on host, then you need to replace
host_from_gpu(x) with x_copy
This happens because initially shared variables are on GPU .. which is
fine for the main computational graph but confuses things a bit for the
inner graph of scan '''
if out == x:
d[out] = cuda.gpu_from_host(x_copy)
return d
elif out.owner is None:
return d
elif (out.owner.op == cuda.host_from_gpu
and out.owner.inputs == [x] ):
d[out] = x_copy
return d
else:
for inp in out.owner.inputs:
d = traverse(inp, x, x_copy, d)
return d
class EmptyObject(object):
def __init__(self):
pass
class ScanInnerFunction(object):
"""
Stripped down, simplified version of theano.function class that has a
low overhead at calling a function.
"""
def __init__( self
, fn
, input_storage
, output_storage
, env
, inputs
, outputs
, nonmutable_indices
, mode
, name
):
self.fn = fn
self.input_storage = input_storage
self.n_ins = len(input_storage)
self.n_outs = len(output_storage)
self.outputs_storage = output_storage
self.maker = EmptyObject()
self.maker.env = env
self.maker.inputs = inputs
for i in inputs:
i.update = None
self.maker.expanded_inputs = inputs
self.maker.outputs = outputs
self.maker.nonmutable_indices = nonmutable_indices
self.maker.mode = mode
self.name = name
def __call__(self, inputs, outputs):
t0 = time.time()
# put data into the storage
for idx in xrange(self.n_ins):
self.input_storage[idx][0] = inputs[idx]
for idx in xrange(self.n_outs):
self.outputs_storage[idx][0] = outputs[idx][0]
_t0 = time.time()
self.fn()
dt_fn = time.time() - _t0
for idx in xrange(self.n_outs):
if outputs[idx][0] is not None:
if outputs[idx][0] is not self.outputs_storage[idx][0]:
if outputs[idx][0].shape:
outputs[idx][0][:] = self.outputs_storage[idx][0]
else:
outputs[idx][0].itemset(self.outputs_storage[idx][0])
dt_call = time.time() - t0
if hasattr(self.maker.mode,'fct_call_time'):
self.maker.mode.fct_call_time[self] += dt_call
self.maker.mode.fct_call[self] += 1
self.maker.mode.fn_time += dt_fn
self.maker.mode.call_time += dt_call
return self.outputs_storage
def __getstate__(self):
state = self.__dict__.copy()
del state['fn']
del state['input_storage']
del state['outputs_storage']
del state['maker'].env
return state
def __setstate__(self):
self.__dict__ = state
name = self.name
mode = self.maker.mode
inputs = self.maker.inputs
outputs = self.maker.outputs
nonmutable_indices = self.maker.nonmutable_indices
new_inputs, new_outputs = gof.graph.clone( inputs, ouputs )
env = gof.env.Env(new_inputs, new_outputs)
nonmutable = []
for idx in nonmutable_indices :
nonmutable.append( new_inputs[idx] )
env.extend(
Supervisor( inp for inp in nonmutable if
not (hasattr(env,'destroyers') and
env.destroyers(inp))))
# If named nodes are replaced, keep the name
env.extend(gof.toolbox.PreserveNames())
optimizer, linker = mode.optimizer, copy.copy(mode.linker)
# optimize the env
t0 = time.time()
optimizer(env)
_logger.debug('Optimizing took %f seconds' %(time.time() - t0))
if not hasattr(linker, 'accept'):
raise ValueError( ( "'linker' parameter of FunctionFactory "
"should be a Linker with an accept method "
"or one of %s") %
mode_module.predefined_linkers.keys())
my_linker = linker.accept ( env )
input_storage = []
output_storage = []
for input in inputs:
input_storage += [[ None ]]
for output in outputs:
output_storage += [[ None ]]
t0 = time.time()
_fn, _i,_o = my_linker.make_thunk( input_storage = input_storage,
output_storage = output_storage)
_logger.debug('Linking took %f seconds' %(time.time() - t0))
fn = ScanInnerFunction( _fn
, input_storage
, output_storage
, env)
t2 = time.time()
self.fn = _fn
self.input_storage = input_storage
self.outputs_storage = output_storage
if hasattr(mode, 'fct_call_time'):
mode.fct_call_time.setdefault(fn, 0)
if hasattr(mode, 'fct_call'):
mode.fct_call.set_default(fn,0)
def scan_function( inputs
, outputs
, nonmutable_indices = None
, mode = None
, name = None
, slices = 0
):
"""
``Constructor`` of the ScanInnerFunction ( a simplified version of
theano.function ). This should only be used internally by Scan.
:param inputs: theano variable that represent the input of the function
:param outputs: theano expression that represents the outputs of the
function
:param nonmutable_indices: the subset of indices corresponding to
nonmutable inputs
:param mode: compilation mode for the function
:param name: name of the function
"""
t1 = time.time()
mode = mode_module.get_mode(mode)
if isinstance(mode, (list, tuple)): # "mode comparison" semantics
_logger.warning('Passing multiple modes is deprecated (20091019)')
if not mode:
raise ValueError("Please provide at least one mode.")
else:
mode = mode[0]
## Replacing the Function Maker
if not isinstance(outputs, (list, tuple)):
outputs = [outputs]
if not isinstance(inputs, (list, tuple)):
inputs = [inputs]
new_inputs, new_outputs = gof.graph.clone( inputs, outputs )
env = gof.env.Env(new_inputs, new_outputs)
nonmutable = []
for idx in nonmutable_indices :
nonmutable.append( new_inputs[idx] )
env.extend(
Supervisor( inp for inp in nonmutable if
not (hasattr(env,'destroyers') and env.destroyers(inp))))
# If named nodes are replaced, keep the name
env.extend(gof.toolbox.PreserveNames())
optimizer, linker = mode.optimizer, copy.copy(mode.linker)
# optimize the env
t0 = time.time()
optimizer(env)
_logger.debug('Optimizing took %f seconds' %(time.time() - t0))
mask = [ 0 for x in env.outputs[slices:] ]
for i,out in enumerate(env.outputs):
if (out in env.inputs or
isinstance(out, tensor.Constant)):
env.change_input('output', i, Clone()(out) )
for i in xrange(len(env.outputs[slices:])):
views_of_output_i = set()
view_tree_set(alias_root(env.outputs[i]), views_of_output_i)
copied = False
# do not allow outputs to be aliased
for j in xrange(i+1, len(env.outputs)):
if env.outputs[j] in views_of_output_i:
mask[i] = 1
copied = True
break
if not copied:
for input_j in env.inputs:
# do not allow outputs to be aliased to an inputs (j), unless
# a) that j'th input has been 'destroyed' by e.g. in-place computations
if hasattr(env,'get_destroyers_of') and env.get_destroyers_of(input_j):
continue
if input_j in views_of_output_i:
mask[i] = 1
break
if not hasattr(linker, 'accept'):
raise ValueError( ( "'linker' parameter of FunctionFactory "
"should be a Linker with an accept method "
"or one of %s") %
mode_module.predefined_linkers.keys())
my_linker = linker.accept ( env )
input_storage = []
output_storage = []
for input in inputs:
input_storage += [[ None ]]
for output in outputs:
output_storage += [[ None ]]
t0 = time.time()
_fn, _i,_o = my_linker.make_thunk( input_storage = input_storage,
output_storage = output_storage)
_logger.debug('Linking took %f seconds' %(time.time() - t0))
if hasattr(mode, 'apply_time'):
for i, node in enumerate(env.toposort()):
mode.apply_time[(i,node)] = 0.0
assert len(_fn.thunk_groups[i])==1
mode.op_cimpl[node.op] = hasattr(_fn.thunk_groups[i][0],'cthunk')
fn = ScanInnerFunction( _fn
, input_storage
, output_storage
, env
, inputs
, outputs
, nonmutable_indices
, mode
, name
)
t2 = time.time()
if hasattr(mode, 'compile_time'):
mode.compile_time += t2-t1
if hasattr(mode, 'fct_call_time'):
mode.fct_call_time.setdefault(fn, 0)
if hasattr(mode, 'fct_call'):
mode.fct_call.setdefault(fn,0)
return mask, fn
# Hashing a dictionary/list/tuple by xoring the hash of each element
def hash_listsDictsTuples(x):
hash_value = 0
if isinstance(x, dict):
for k,v in x.iteritems():
hash_value ^= hash_listsDictsTuples(k)
hash_value ^= hash_listsDictsTuples(v)
elif isinstance(x, (list,tuple)):
for v in x:
hash_value ^= hash_listsDictsTuples(v)
else:
try:
hash_value ^= hash(x)
except:
pass
return hash_value
def clone( output
, replace = None
, strict = True
, copy_inputs = True):
"""
Function that allows replacing subgraphs of a computational
graph. It returns a copy of the initial subgraph with the corresponding
substitutions.
:type output: Theano Variables ( or Theano expressions)
:param outputs: Theano expression that represents the computational
graph
:type replace: dict
:param replace: dictionary describing which subgraphs should be
replaced by what
"""
inps, outs, other_stuff = rebuild_collect_shared( output
, []
, replace
, []
, strict
, copy_inputs
)
return outs
def get_updates_and_outputs(outputs_updates):
"""
This function tries to recognize the updates dictionary and the
list of outputs from the input argument and return them in a
predefined order
The code that follows tries to be as flexible as possible allowing the
user to return the output and updates in any order, and giving the
updates however (s)he wants ( as a dictionary or a list o pairs ..)
Is there a way to compress all this by writing it in a more
pythonic/functional way?
"""
outputs = []
updates = {}
# we will try now to separate the outputs from the updates
if not isinstance(outputs_updates, (list,tuple)):
if isinstance(outputs_updates, dict) :
# we have just an update dictionary
updates = outputs_updates
else:
outputs = [outputs_updates]
elif len(outputs_updates) == 1:
if isinstance(outputs_updates[0], (dict, tuple)):
updates = dict(otuputs_updates[1])
else:
outputs = outputs_updates
else:
elem0 = outputs_updates[0]
elem1 = outputs_updates[1]
t_el0 = type(elem0)
t_el1 = type(elem1)
if ( t_el0 == dict or
( t_el0 in (list,tuple) and
isinstance(elem0[0], (list,tuple)))):
# elem0 is the updates dictionary / list
updates = elem0
outputs = elem1
if not isinstance(outputs, (list,tuple)):
outputs = [outputs]
elif ( isinstance(elem1, dict) or
( isinstance(elem1, (list,tuple)) and
isinstance(elem1[0], (list,tuple))) ):
# elem1 is the updates dictionary / list
updates = elem1
outputs = elem0
if not isinstance(outputs, (list,tuple)):
outputs = [outputs]
else :
if ( isinstance(outputs_updates, (list,tuple)) and
isinstance(outputs_updates[0], (list,tuple))):
outputs = []
updates = outputs_updates
else:
outputs = outputs_updates
updates = {}
# in case you return a tuple .. convert it to a list (there are certain
# operation that are not permited on tuples, like element assignment)
outputs = list(outputs)
# If you return numbers (highly unlikely) this will not go well for
# theano. We need to convert them to Theano constants:
for i,out in enumerate(outputs):
outputs[i] = tensor.as_tensor(out)
return outputs, updates
def check_NaN_Inf_None(x):
isNone = x is None
try:
isNaN = numpy.isnan(x)
isInf = numpy.isinf(x)
isStr = isinstance(x, str)
except:
isNaN = False
isInf = False
isStr = False
if not isNaN and not isInf:
try:
val = get_constant_value(x)
isInf = numpy.isinf(val)
isNaN = numpy.isnan(val)
except:
isNaN = False
isInf = False
if isinstance(x, gof.Constant) and isinstance(x.data, str):
isStr = True
else:
isStr = False
return isNone or isNaN or isInf or isStr
def expand( tensor_var, size):
'''
Transoforms the shape of a tensor from (d1, d2 ... ) to ( d1+size, d2, ..)
by adding 0s at the end of the tensor.
'''
# Corner case that I might use in an optimization
if size == 0:
return tensor_var
shapes = [ tensor_var.shape[x] for x in xrange(tensor_var.ndim) ]
zeros_shape = [size+shapes[0]] + shapes[1:]
empty = tensor.zeros( zeros_shape
, dtype = tensor_var.dtype)
return tensor.set_subtensor(empty[:shapes[0]], tensor_var)
class Clone(Op):
def __init__(self):
self.view_map = {0:[0]}
def __eq__(self, other):
return type(self) == type(other)
def __hash__(self):
return hash(type(self))
def __str__(self):
return 'clone[as_view]'
def make_node(self, *inputs):
x = inputs[0]
return Apply(self, inputs, [x.type()] )
def perform( self, node, args, outs):
outs[0][0] = args[0]
def infer_shape(self, node, input_shapes):
return input_shapes
def grad(self, args, g_outs):
return g_outs
cloneOp = Clone()
def equal_computations(x,y, strict=False):
'''
Checks if to theano graphs represent the same computations (applied to
different inputs).
'''
if not x.type == y.type:
return False
elif not x.owner and not y.owner:
if not strict:
return True
else:
if isinstance(x, tensor.Constant):
# not they both have the same type
return x.data == y.data
else:
return x == y
elif x.owner and not y.owner:
return False
elif not x.owner and y.owner:
return False
elif not x.owner.op == y.owner.op:
return False
elif not len(x.owner.inputs) == len(y.owner.inputs):
return False
else:
for xx,yy in zip(x.owner.inputs,y.owner.inputs):
if not equal_computations(xx,yy):
return False
return True
def infer_shape( outs, inputs, input_shapes):
'''
Compute the shape of the outputs given the shape of the inputs
of a theano graph ( assuming that all ops on the way have infer_shape
implemented).
'''
shape_dict = {}
for inp, inp_shp in zip(inputs, input_shapes):
shape_dict[inp] = inp_shp
def local_traverse(out, shape_dict):
if out in shape_dict:
return shape_dict
elif not out.owner:
if isinstance(out, tensor.TensorConstant):
shape_dict[out] = out.data.shape
return shape_dict
elif isinstance(out, tensor.sharedvar.TensorSharedVariable):
shape_dict[out] = out.value.shape
return shape_dict
else:
raise ValueError('Could not figure shape of', out)
else:
for inp in out.owner.inputs:
if not inp in shape_dict:
shape_dict = local_traverse(inp,shape_dict)
try:
self = out.owner.op
node = out.owner
input_shapes = [ shape_dict[i] for i in out.owner.inputs]
shapes = self.infer_shape(node, input_shapes)
out_idx = node.outputs.index(out)
shape_dict[out] = shapes[out_idx]
except:
shape_dict[out] = None
return shape_dict
for out in outs:
shape_dict = local_traverse(out, shape_dict)
return [ shape_dict[o] for o in outs]
def scan_can_remove_outs(op, out_idxs):
'''
Looks at all outputs defined by indices ``out_idxs`` and see whom can be
removed from the scan op without affecting the rest. Return two lists,
the first one with the indices of outs that can be removed, the second
with the outputs that can not be removed.
'''
non_removable = [ o for i,o in enumerate(op.outputs) if i not in
out_idxs]
required_inputs = gof.graph.inputs(non_removable)
out_ins = []
offset = op.n_seqs
lim = op.n_mit_mot + op.n_mit_sot + op.n_sit_sot
for idx in range(lim):
n_ins = len(op.info['tap_array'][idx])
out_ins += [op.inputs[offset:offset+n_ins]]
offset += n_ins
out_ins += [ [] for k in xrange(op.n_nit_sot) ]
out_ins += [ [op.inputs[offset+k]] for k in xrange(op.n_shared_outs)]
added = True
out_idxs_mask = [1 for idx in out_idxs]
while added:
added = False
for pos,idx in enumerate(out_idxs):
if ( out_idxs_mask[pos] and
numpy.any([x in required_inputs for x in out_ins[idx]]) ):
# This output is required ..
out_idxs_mask[pos] = 0
required_inputs += gof.graph.inputs([op.outputs[idx]])
added = True
required_outs = [x for i,x in enumerate(out_idxs)
if out_idxs_mask[i] == 0]
not_required = [x for i,x in enumerate(out_idxs) if out_idxs_mask[i]==1]
return (required_outs, not_required)
def compress_outs(op, not_required, inputs):
'''
Helpful function that gets a Scan op, a list of indices indicating
which outputs are not required anymore and should be removed, and
a list of inputs to the apply node corresponding to the scan op and
produces the list of inputs and outputs and the info dictionary where
the indicated outputs are eliminated. Note that eliminating an output
means removing its inputs from the inner funciton and from the
node inputs, and changing the dictionary.
'''
info = {}
info['tap_array'] = []
info['n_seqs'] = op.info['n_seqs']
info['n_mit_mot'] = 0
info['n_mit_mot_outs'] = 0
info['mit_mot_out_slices'] = []
info['n_mit_sot'] = 0
info['n_sit_sot'] = 0
info['n_shared_outs'] = 0
info['n_nit_sot'] = 0
info['n_other_ignore'] = op.info['n_other_ignore']
info['truncate_gradient'] = op.info['truncate_gradient']
info['name'] = op.info['name']
info['inplace'] = op.info['inplace']
info['gpu'] = op.info['gpu']
info['mode'] = op.info['mode']
op_inputs = op.inputs[:op.n_seqs]
op_outputs = []
node_inputs = inputs[:op.n_seqs + 1]
map_old_new = {}
offset = 0
ni_offset = op.n_seqs+1
i_offset = op.n_seqs
o_offset = 0
curr_pos = 0
for idx in xrange(op.info['n_mit_mot']):
if offset + idx not in not_required:
map_old_new[offset+idx] = curr_pos
curr_pos += 1
info['n_mit_mot'] += 1
info['tap_array'] += [op.tap_array[offset+idx]]
info['mit_mot_out_slices'] += [op.mit_mot_out_slices[offset+idx]]
# input taps
for jdx in op.tap_array[offset+idx]:
op_inputs += [op.inputs[i_offset]]
i_offset += 1
# output taps
for jdx in op.mit_mot_out_slices[offset+idx]:
op_outputs += [op.outputs[o_offset]]
o_offset += 1
# node inputs
node_inputs += [inputs[ni_offset+idx]]
else:
o_offset += len(op.mit_mot_out_slices[offset+idx])
i_offset += len(op.tap_array[offset+idx])
info['n_mit_mot_outs'] = len(op_outputs)
offset += op.n_mit_mot
ni_offset += op.n_mit_mot
for idx in xrange(op.info['n_mit_sot']):
if offset + idx not in not_required:
map_old_new[offset+idx] = curr_pos
curr_pos += 1
info['n_mit_sot'] += 1
info['tap_array'] += [op.tap_array[offset+idx]]
#input taps
for jdx in op.tap_array[offset+idx]:
op_inputs += [op.inputs[i_offset]]
i_offset += 1
#output taps
op_outputs += [op.outputs[o_offset]]
o_offset+=1
#node inputs
node_inputs += [inputs[ni_offset+idx]]
else:
o_offset+=1
i_offset+=len(op.tap_array[offset+idx])
offset += op.n_mit_sot
ni_offset += op.n_mit_sot
for idx in xrange(op.info['n_sit_sot']):
if offset + idx not in not_required:
map_old_new[offset+idx] = curr_pos
curr_pos += 1
info['n_sit_sot'] += 1
info['tap_array'] += [op.tap_array[offset+idx]]
#input taps
op_inputs += [op.inputs[i_offset]]
i_offset += 1
#output taps
op_outputs += [op.outputs[o_offset]]
o_offset+=1
#node inputs
node_inputs += [inputs[ni_offset+idx]]
else:
o_offset+=1
i_offset+=1
offset += op.n_sit_sot
ni_offset += op.n_sit_sot
nit_sot_ins = []
for idx in xrange(op.info['n_nit_sot']):
if offset + idx not in not_required:
map_old_new[offset+idx] = curr_pos
curr_pos += 1
info['n_nit_sot'] += 1
op_outputs += [op.outputs[o_offset]]
o_offset+=1
nit_sot_ins += [inputs[ni_offset+idx+op.n_shared_outs]]
else:
o_offset += 1
offset += op.n_nit_sot
shared_ins = []
for idx in xrange(op.info['n_shared_outs']):
if offset + idx not in not_required:
map_old_new[offset+idx] = curr_pos
curr_pos += 1
info['n_shared_outs'] += 1
op_outputs += [ op.outputs[o_offset]]
o_offset +=1
op_inputs += [ op.inputs[i_offset]]
i_offset += 1
shared_ins += [inputs[ni_offset+idx]]
else:
o_offset += 1
i_offset += 1
node_inputs += shared_ins
node_inputs += nit_sot_ins
# other stuff
op_inputs += op.inputs[i_offset:]
node_inputs += inputs[ni_offset+op.n_shared_outs+op.n_nit_sot:]
return (op_inputs, op_outputs, info, node_inputs, map_old_new)
"""
This module provides syntax shortcut for the Scan Op
See scan.py for details on scan
"""
__docformat__ = 'restructedtext en'
__authors__ = ( "Razvan Pascanu "
"Frederic Bastien "
"James Bergstra "
"Pascal Lamblin " )
__copyright__ = "(c) 2010, Universite de Montreal"
__contact__ = "Razvan Pascanu <r.pascanu@gmail>"
import logging
import scan
# Logging function for sending warning or info
_logger = logging.getLogger('theano.scan_views')
def warning(*msg):
_logger.warning('WARNING theano.scan: '+' '.join(msg))
def info(*msg):
_logger.info('INFO theano.scan: '+' '.join(msg))
################ Declaration of Views for Scan #######################
# The ``map`` view of Scan Op.
def map( fn
, sequences
, non_sequences = None
, truncate_gradient = -1
, go_backwards = False
, mode = None
, name = None ):
"""
Similar behaviour as python's map.
:param fn: The function that ``map`` applies at each iteration step
(see ``scan`` for more info).
:param sequences: List of sequences over which ``map`` iterates
(see ``scan`` for more info).
:param non_sequences: List of arguments passed to ``fn``. ``map`` will
not iterate over these arguments (see ``scan`` for
more info).
:param truncate_gradient: See ``scan``.
:param go_backwards: Boolean value that decides the direction of
iteration. True means that sequences are parsed
from the end towards the begining, while False
is the other way around.
:param mode: See ``scan``.
:param name: See ``scan``.
"""
return scan.scan( fn = fn
, sequences = sequences
, outputs_info = []
, non_sequences = non_sequences
, truncate_gradient = truncate_gradient
, go_backwards = go_backwards
, mode = mode
, name = name )
# The ``reduce`` view of Scan Op.
def reduce( fn
, sequences
, outputs_info
, non_sequences = None
, go_backwards = False
, mode = None
, name = None ):
"""
Similar behaviour as python's reduce
:param fn: The function that ``reduce`` applies at each iteration step
(see ``scan`` for more info).
:param sequences: List of sequences over which ``reduce`` iterates
(see ``scan`` for more info)
:param outputs_info: List of dictionaries describing the outputs of
reduce (see ``scan`` for more info).
:param non_sequences: List of arguments passed to ``fn``. ``reduce`` will
not iterate over these arguments (see ``scan`` for
more info).
:param go_backwards: Boolean value that decides the direction of
iteration. True means that sequences are parsed
from the end towards the begining, while False
is the other way around.
:param mode: See ``scan``.
:param name: See ``scan``.
"""
# Makes sure the outputs_info is a list.
if not isinstance(outputs_info, (list,tuple)):
outs_info = [outputs_info]
else:
outs_info = list(outputs_info)
for i,out_info in enumerate(outs_info):
if out_info:
if not isinstance(out_info, dict):
# Specifies that it should return only the last step.
outs_info[i] = dict(
initial = out_info, return_steps = 1, store_steps = 1)
else:
# Specifies that it should return only the last step.
outs_info[i]['store_steps'] = 1
outs_info[i]['return_steps'] = 1
# NOTE : If the user asks for more then the last step,
# it means he does not understand ``reduce``. We could
# issue a warning in that case
return scan.scan( fn = fn
, sequences = sequences
, outputs_info = outs_info
, non_sequences = non_sequences
, go_backwards = go_backwards
, truncate_gradient = 1
, mode = mode
, name = name )
# The ``foldl`` view of Scan Op.
def foldl( fn
, sequences
, outputs_info
, non_sequences = None
, mode = None
, name = None ):
"""
Similar behaviour as haskell's foldl
:param fn: The function that ``foldl`` applies at each iteration step
(see ``scan`` for more info).
:param sequences: List of sequences over which ``foldl`` iterates
(see ``scan`` for more info)
:param outputs_info: List of dictionaries describing the outputs of
reduce (see ``scan`` for more info).
:param non_sequences: List of arguments passed to `fn`. ``foldl`` will
not iterate over these arguments (see ``scan`` for
more info).
:param mode: See ``scan``.
:param name: See ``scan``.
"""
return reduce( fn = fn
, sequences = sequences
, outputs_info = outputs_info
, non_sequences = non_sequences
, go_backwards = False
, mode = mode
, name = name )
# The ``foldl`` view of Scan Op.
def foldr( fn
, sequences
, outputs_info
, non_sequences = None
, mode = None
, name = None ):
"""
Similar behaviour as haskell' foldr
:param fn: The function that ``foldr`` applies at each iteration step
(see ``scan`` for more info).
:param sequences: List of sequences over which ``foldr`` iterates
(see ``scan`` for more info)
:param outputs_info: List of dictionaries describing the outputs of
reduce (see ``scan`` for more info).
:param non_sequences: List of arguments passed to `fn`. ``foldr`` will
not iterate over these arguments (see ``scan`` for
more info).
:param mode: See ``scan``.
:param name: See ``scan``.
"""
return reduce( fn = fn
, sequences = sequences
, outputs_info = outputs_info
, non_sequences = non_sequences
, go_backwards = True
, mode = mode
, name = name )
import unittest
import theano
import numpy
import theano.sandbox.rng_mrg
from theano.tests import unittest_tools as utt
'''
Questions and notes about scan that should be answered :
* Even though it does not make it publically known in
the documentation, scan allows you to set both a return_steps
flag and a store_steps flag ( the first one is a soft condition telling
you how many steps to return, the second one determines how much memory
to allocate). There is an optimization as well, that transforms
return_steps to
store_steps. Questions :
- what happens if both flags are set ?
answer: whatever return_steps says is ignored, and store_steps is used
- the optimization works only with return_steps = -1; can it be made
to work with other values ?
answer: 6 Jul 2010 RP :it is a bit harry to figure out from the
subtensors what exactly you need
* Scan seems to do copies of every input variable. Is that needed?
answer : probably not, but it doesn't hurt also ( what we copy is
theano variables, which just cary information about the type / dimension
of the data)
* There is some of scan functionality that is not well documented
'''
class multiple_outputs_numeric_grad:
"""WRITEME"""
type_eps = {'float64': 1e-7,
'float32': 3e-3}
def __init__(self, f, pt, ndarray_mask = None, eps=None):
"""Return the gradient of f at pt.
This function computes the gradient by a one-sided finite differences
of a fixed step size (eps).
It is assumed that f(...) will return a scalar.
:param eps: the stepsize for the finite differencing. None means
input dtype-dependent. See `type_eps`.
"""
def prod(inputs):
rval = 1
for i in inputs:
rval *= i
return rval
packed_pt = False
if not isinstance(pt, (list, tuple)):
pt = [pt]
packed_pt = True
# This mask tells us if we are dealing with an ndarray input or
# something else ( a random state ? ) with which we shouldn't really
# mess up
if not ndarray_mask:
ndarray_mask = [True for x in pt ]
dtype_eps = multiple_outputs_numeric_grad.type_eps['float64']
for i,p in enumerate(pt):
if ndarray_mask[i]:
pt[i] = numpy.array(p)
_eps = multiple_outputs_numeric_grad.type_eps[str(
pt[i].dtype)]
if _eps > dtype_eps:
dtype_eps = _eps
self.ndarray_mask = ndarray_mask
#'''
# Compute clean output:
f_x = f(*pt)
gx = []
# now iterate over the elements of x and call f on those + delta x
for i in xrange(len(pt)):
if ndarray_mask[i]:
# It is a ndarray that we can tweak
if eps:
_eps = eps
else:
_eps = dtype_eps
if pt[i].ndim :
_g = []
# it has several dimensions:
for pos in xrange(prod(pt[i].shape)):
t = pt[i].copy()
t = t.flatten()
t[pos] += _eps
t = t.reshape(pt[i].shape)
f_eps = f(*(pt[:i]+[t]+pt[i+1:]))
_g.append(numpy.asarray((f_eps - f_x)/_eps))
gx.append(numpy.asarray(_g).reshape(pt[i].shape))
else:
t= numpy.array(pt[i] + _eps)
f_eps = f(*(pt[:i]+[t]+pt[i+1:]))
gx.append(numpy.asarray((f_eps-f_x)/_eps))
self.gx = gx
@staticmethod
def abs_rel_err(a,b,eps=1.0e-10):
"""Return a small number when a and b are close, relative to how big
they are"""
return abs(a-b) / (abs(a)+abs(b)+eps)
def max_err(self, _g_pt):
"""Return the biggest relative error between g_pt and self.gx"""
g_pt = []
for i in xrange(len(_g_pt)):
if self.ndarray_mask[i]:
g_pt.append(_g_pt[i])
elif isinstance(_g_pt[i], numpy.ndarray):
assert numpy.all( _g_pt[i] == 0)
if len(g_pt) != len(self.gx):
raise ValueError('argument has wrong number of elements'
, len(g_pt))
errs = []
for i, (a,b) in enumerate(zip(g_pt, self.gx)):
if a.shape != b.shape:
raise ValueError('argument element %i has wrong shape %s'
%(i,str((a.shape, b.shape))))
vv = multiple_outputs_numeric_grad.abs_rel_err(a,b)
errs.append(numpy.max(
multiple_outputs_numeric_grad.abs_rel_err(a,b)))
if numpy.all(numpy.isfinite(errs)):
return numpy.max(errs), numpy.argmax(errs)
else:
return float('inf'), 0
#TODO: Test this function, and if it works,
# use it with the normal verify_grad rather than the
# copy-and-pasted one above.
# Also - add a reference to this technique in the
# verify_grad method so that other ops with multiple outputs can be tested.
# DONE - rp
def scan_project_sum(*args, **kwargs):
rng = theano.tensor.shared_randomstreams.RandomStreams(123)
scan_outputs, updates = theano.scan(*args, **kwargs)
if type(scan_outputs) not in [list,tuple]:
scan_outputs = [scan_outputs]
# we should ignore the random-state updates so that
# the uniform numbers are the same every evaluation and on every call
rng.add_default_updates = False
factors = [ rng.uniform(size=s.shape, low = 0.1, high = 0.9) for s
in scan_outputs ]
# Random values (?)
return (sum([(s*f).sum() for s,f in zip(scan_outputs,factors)]), updates)
def asarrayX(value):
return theano._asarray(value, dtype=theano.config.floatX)
class T_Scan(unittest.TestCase):
#class T_Scan(object):
def setUp(self):
utt.seed_rng()
# generator network, only one output , type scalar ; no sequence or
# non sequence arguments
def test_generator_one_output_scalar(self):
def f_pow2(x_tm1):
return 2*x_tm1
state = theano.tensor.scalar('state')
n_steps = theano.tensor.iscalar('nsteps')
output, updates = theano.scan(f_pow2, [],state, [],n_steps = n_steps, truncate_gradient
= -1, go_backwards = False)
my_f = theano.function([state,n_steps], output, updates = updates,
allow_input_downcast = True)
rng = numpy.random.RandomState(utt.fetch_seed())
state = rng.uniform()
steps = 5
numpy_values = numpy.array([ state*(2**(k+1)) for k
in xrange(steps) ])
theano_values = my_f(state,steps)
assert numpy.allclose(numpy_values,theano_values)
# simple rnn, one input, one state, weights for each; input/state
# are vectors, weights are scalars
def test_one_sequence_one_output_weights(self):
def f_rnn(u_t,x_tm1,W_in, W):
return u_t*W_in+x_tm1*W
u = theano.tensor.vector('u')
x0 = theano.tensor.scalar('x0')
W_in = theano.tensor.scalar('win')
W = theano.tensor.scalar('w')
output, updates = theano.scan(f_rnn, u,x0,[W_in,W]
, n_steps = None
, truncate_gradient = -1
, go_backwards = False)
f2 = theano.function([u,x0,W_in,W], output, updates = updates,
allow_input_downcast = True)
# get random initial values
rng = numpy.random.RandomState(utt.fetch_seed())
v_u = rng.uniform( size = (4,), low = -5., high = 5.)
v_x0 = rng.uniform()
W = rng.uniform()
W_in = rng.uniform()
# compute the output in numpy
v_out = numpy.zeros((4,))
v_out[0] = v_u[0]*W_in + v_x0 * W
for step in xrange(1,4):
v_out[step] = v_u[step]*W_in + v_out[step-1] * W
theano_values = f2(v_u,v_x0, W_in, W)
assert numpy.allclose(theano_values, v_out)
# simple rnn, one input, one state, weights for each; input/state
# are vectors, weights are scalars; using shared variables
def test_one_sequence_one_output_weights_shared(self):
rng = numpy.random.RandomState(utt.fetch_seed())
u = theano.tensor.vector('u')
x0 = theano.tensor.scalar('x0')
W_in = theano.shared(asarrayX(rng.uniform()), name = 'w_in')
W = theano.shared(asarrayX(rng.uniform()), name ='w')
def f_rnn_shared(u_t,x_tm1, tmp_W_in, tmp_W):
return u_t*tmp_W_in+x_tm1*tmp_W
output, updates = theano.scan(f_rnn_shared, u,x0,[W_in, W]
, n_steps =None
, truncate_gradient= -1
, go_backwards = False)
f3 = theano.function([u,x0], output, updates = updates,
allow_input_downcast = True)
# get random initial values
v_u = rng.uniform( size = (4,), low = -5., high = 5.)
v_x0 = rng.uniform()
# compute the output i numpy
v_out = numpy.zeros((4,))
v_out[0] = v_u[0]*W_in.get_value() + v_x0*W.get_value()
for step in xrange(1,4):
v_out[step] = v_u[step]*W_in.get_value() + v_out[step-1]*W.get_value()
theano_values = f3(v_u, v_x0)
assert numpy.allclose(theano_values, v_out)
# some rnn with multiple outputs and multiple inputs; other
# dimension instead of scalars/vectors
def test_multiple_inputs_multiple_outputs(self):
rng = numpy.random.RandomState(utt.fetch_seed())
vW_in2 = asarrayX(rng.uniform(size = (2,), low = -5.,high = 5.))
vW = asarrayX(rng.uniform(size = (2,2), low = -5.,high = 5.))
vWout = asarrayX(rng.uniform(size = (2,), low = -5.,high = 5.))
vW_in1 = asarrayX(rng.uniform(size = (2,2), low = -5.,high = 5.))
v_u1 = asarrayX(rng.uniform(size = (3,2), low = -5., high = 5.))
v_u2 = asarrayX(rng.uniform(size = (3,), low = -5.,high = 5.))
v_x0 = asarrayX(rng.uniform(size = (2,), low = -5.,high = 5.))
v_y0 = asarrayX(rng.uniform())
W_in2 = theano.shared(vW_in2, name='win2')
W = theano.shared(vW, name='w')
W_out = theano.shared(vWout, name = 'wout')
W_in1 = theano.tensor.matrix('win')
u1 = theano.tensor.matrix('u1')
u2 = theano.tensor.vector('u2')
x0 = theano.tensor.vector('x0')
y0 = theano.tensor.scalar('y0')
def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, W_in1):
return [theano.dot(u1_t,W_in1) + u2_t * W_in2 + \
theano.dot(x_tm1, W), theano.dot(x_tm1, W_out)]
outputs, updates = theano.scan(f_rnn_cmpl,[u1,u2],[x0,y0],W_in1
, n_steps = None
, truncate_gradient = -1
, go_backwards = False)
f4 = theano.function([u1,u2,x0,y0,W_in1], outputs
, updates = updates,
allow_input_downcast = True)
# compute the values in numpy
v_x = numpy.zeros((3,2),dtype=theano.config.floatX)
v_y = numpy.zeros((3,),dtype=theano.config.floatX)
v_x[0] = numpy.dot(v_u1[0],vW_in1) + v_u2[0]*vW_in2 + \
numpy.dot(v_x0,vW)
v_y[0] = numpy.dot(v_x0,vWout)
for i in xrange(1,3):
v_x[i] = numpy.dot(v_u1[i],vW_in1) + v_u2[i]*vW_in2 + \
numpy.dot(v_x[i-1],vW)
v_y[i] = numpy.dot(v_x[i-1], vWout)
(theano_x,theano_y) = f4( v_u1, v_u2, v_x0, v_y0, vW_in1)
assert numpy.allclose(theano_x , v_x)
assert numpy.allclose(theano_y , v_y)
def test_multiple_outs_taps(self):
l = 5
rng = numpy.random.RandomState(utt.fetch_seed())
vW_in2 = asarrayX(rng.uniform(size = (2,), low = -.2,high = .2))
vW = asarrayX(rng.uniform(size = (2,2), low = -.2,high = .2))
vWout = asarrayX(rng.uniform(size = (2,), low = -.2,high = .2))
vW_in1 = asarrayX(rng.uniform(size = (2,2), low = -.2,high = .2))
v_u1 = asarrayX(rng.uniform(size = (l,2), low = -.2, high = .2))
v_u2 = asarrayX(rng.uniform(size = (l+2,2), low = -.2,high = .2))
v_x0 = asarrayX(rng.uniform(size = (2,), low = -.2,high = .2))
v_y0 = asarrayX(rng.uniform(size = (3,)))
W_in2 = theano.shared(vW_in2, name='win2')
W = theano.shared(vW, name='w')
W_out = theano.shared(vWout, name = 'wout')
W_in1 = theano.tensor.matrix('win')
u1 = theano.tensor.matrix('u1')
u2 = theano.tensor.matrix('u2')
x0 = theano.tensor.vector('x0')
y0 = theano.tensor.vector('y0')
def f_rnn_cmpl(u1_t, u2_tm1, u2_t, u2_tp1
, x_tm1, y_tm1, y_tm3, W_in1):
return [theano.dot(u1_t,W_in1) + (u2_t+u2_tm1*u2_tp1)* W_in2 + \
theano.dot(x_tm1, W), (y_tm1+y_tm3)*theano.dot(x_tm1
, W_out),
theano.dot(u1_t, W_in1)]
outputs, updates = theano.scan(f_rnn_cmpl
, [ u1
, dict(input=u2,taps=[-1,0,1]) ]
, [x0
, dict(initial = y0
, taps=[-1,-3])
, None]
, W_in1
, n_steps = None
, truncate_gradient = -1
, go_backwards = False )
f = theano.function([u1,u2,x0,y0,W_in1], outputs,
updates = updates, allow_input_downcast = True)
theano_out = f( v_u1
, v_u2
, v_x0
, v_y0
, vW_in1)
ny0 = numpy.zeros((5,2))
ny1 = numpy.zeros((5,))
ny2 = numpy.zeros((5,2))
ny0[0] = numpy.dot(v_u1[0], vW_in1) + \
(v_u2[1] + v_u2[0]*v_u2[2])* vW_in2 + numpy.dot(v_x0,vW)
ny1[0] = (v_y0[2]+v_y0[0])* numpy.dot(v_x0, vWout)
ny2[0] = numpy.dot(v_u1[0], vW_in1)
ny0[1] = numpy.dot(v_u1[1], vW_in1) + \
(v_u2[2] + v_u2[1]*v_u2[3])* vW_in2 + numpy.dot(ny0[0],vW)
ny1[1] = (ny1[0]+v_y0[1])* numpy.dot(ny0[0], vWout)
ny2[1] = numpy.dot(v_u1[1], vW_in1)
ny0[2] = numpy.dot(v_u1[2], vW_in1) + \
(v_u2[3] + v_u2[2]*v_u2[4])* vW_in2 +\
numpy.dot(ny0[1],vW)
ny1[2] = (ny1[1]+v_y0[2])* numpy.dot(ny0[1], vWout)
ny2[2] = numpy.dot(v_u1[2], vW_in1)
ny0[3] = numpy.dot(v_u1[3], vW_in1) + \
(v_u2[4] + v_u2[3]*v_u2[5])* vW_in2 +\
numpy.dot(ny0[2],vW)
ny1[3] = (ny1[2]+ny1[0])* numpy.dot(ny0[2], vWout)
ny2[3] = numpy.dot(v_u1[3], vW_in1)
ny0[4] = numpy.dot(v_u1[4], vW_in1) + \
(v_u2[5] + v_u2[4]*v_u2[6])* vW_in2 +\
numpy.dot(ny0[3],vW)
ny1[4] = (ny1[3]+ny1[1])* numpy.dot(ny0[3], vWout)
ny2[4] = numpy.dot(v_u1[4], vW_in1)
# simple rnn, one input, one state, weights for each; input/state are
# vectors, weights are scalars; using shared variables and past
# taps (sequences and outputs)
def test_using_taps_input_output(self):
rng = numpy.random.RandomState(utt.fetch_seed())
vW = asarrayX(rng.uniform())
vW_in = asarrayX(rng.uniform())
vu = asarrayX(rng.uniform(size=(4,), low = -5., high = 5.))
vx0 = asarrayX(rng.uniform(size=(2,), low = -5., high = 5.))
u = theano.tensor.vector('u')
x0 = theano.tensor.vector('x0')
W_in = theano.shared(vW_in, name = 'w_in')
W = theano.shared(vW, name ='w')
def f_rnn_shared(u_tm2, x_tm1, x_tm2):
return u_tm2*W_in+x_tm1*W+x_tm2
outputs, updates = theano.scan(f_rnn_shared, dict(input=u, taps=-2),
dict(initial = x0, taps = [-1,-2]), []
, n_steps = None
, truncate_gradient = -1
, go_backwards = False)
f7 = theano.function([u,x0], outputs, updates = updates,
allow_input_downcast = True)
theano_out = f7(vu,vx0)
# compute output in numpy
# a bit of explaining:
# due to the definition of sequences taps in scan, v_0[0] is
# actually v_0[-2], and v_0[1] is v_0[-1]. The values v_0[2]
# and v_0[3] do not get uesd ( because you do not use v_0[t]
# in scan) which might seem strange, but then again why not use
# v_0[t] instead of v_0[t-2] in a real application ??
# also vx0[0] corresponds to vx0[-2], vx0[1] to vx0[-1]
numpy_out = numpy.zeros((2,))
numpy_out[0] = vu[0]*vW_in + vx0[1]*vW + vx0[0]
numpy_out[1] = vu[1]*vW_in + numpy_out[0]*vW + vx0[1]
assert numpy.allclose(numpy_out , theano_out)
# simple rnn, one input, one state, weights for each; input/state are
# vectors, weights are scalars; using shared variables and past
# taps (sequences and outputs) and future taps for sequences
def test_past_future_taps_shared(self):
rng = numpy.random.RandomState(utt.fetch_seed())
vW = asarrayX(rng.uniform())
vW_in = asarrayX(rng.uniform())
vu = asarrayX(rng.uniform(size=(6,), low = -5., high = 5.))
vx0 = asarrayX(rng.uniform(size=(2,), low = -5., high = 5.))
u = theano.tensor.vector('u')
x0 = theano.tensor.vector('x0')
W_in = theano.shared(vW_in, name = 'w_in')
W = theano.shared(vW, name ='w')
def f_rnn_shared(u_tm2,u_tp2, x_tm1, x_tm2):
return (u_tm2+u_tp2)*W_in+x_tm1*W+x_tm2
output,updates = theano.scan(f_rnn_shared
, dict( input = u, taps=[-2,2])
, dict(initial = x0, taps = [-1,-2])
, []
, n_steps = None
, truncate_gradient =-1
, go_backwards = False)
f8 = theano.function([u,x0], output, updates = updates,
allow_input_downcast = True)
theano_out = f8(vu,vx0)
# compute output in numpy
numpy_out = numpy.zeros(2)
# think of vu[0] as vu[-2], vu[4] as vu[2]
# and vx0[0] as vx0[-2], vx0[1] as vx0[-1]
numpy_out[0] = (vu[0]+vu[4])*vW_in + vx0[1]*vW + vx0[0]
numpy_out[1] = (vu[1]+vu[5])*vW_in + numpy_out[0]*vW + vx0[1]
assert numpy.allclose(numpy_out , theano_out)
# simple rnn ; compute inplace version 1
def test_inplace1(self):
rng = numpy.random.RandomState(utt.fetch_seed())
vW = asarrayX(numpy.random.uniform())
vW_in = asarrayX(numpy.random.uniform())
vu0 = asarrayX(rng.uniform(size=(3,), low = -5., high = 5.))
vu1 = asarrayX(rng.uniform(size=(3,), low = -5., high = 5.))
vu2 = asarrayX(rng.uniform(size=(3,), low = -5., high = 5.))
vx0 = asarrayX(rng.uniform())
vx1 = asarrayX(rng.uniform())
u0 = theano.tensor.vector('u0')
u1 = theano.tensor.vector('u1')
u2 = theano.tensor.vector('u2')
mu0 = theano.Param( u0, mutable = False)
mu1 = theano.Param( u1, mutable = True)
mu2 = theano.Param( u2, mutable = True)
x0 = theano.tensor.scalar('x0')
x1 = theano.tensor.scalar('y0')
W_in = theano.shared(vW_in,'Win')
W = theano.shared(vW,'W')
mode = theano.compile.mode.get_mode(None).including('inplace')
def f_rnn_shared(u0_t,u1_t, u2_t, x0_tm1,x1_tm1):
return [u0_t*W_in + x0_tm1*W + u1_t*u2_t
, u0_t*W_in + x1_tm1*W+ u1_t+u2_t ]
outputs, updates = theano.scan(f_rnn_shared, [u0,u1,u2],
[dict( initial = x0, inplace =u2)
, dict(initial = x1, inplace = u1)]
, []
, n_steps = None
, truncate_gradient = -1
, go_backwards = False
, mode=mode )
f9 = theano.function([mu0,mu1,mu2,x0,x1]
, outputs
, updates = updates
, mode = mode
, allow_input_downcast = True)
# compute output in numpy
numpy_x0 = numpy.zeros((3,))
numpy_x1 = numpy.zeros((3,))
numpy_x0[0] = vu0[0] * vW_in + vx0 * vW + vu1[0]*vu2[0]
numpy_x1[0] = vu0[0] * vW_in + vx1 * vW + vu1[0]+vu2[0]
for i in xrange(1,3):
numpy_x0[i] = vu0[i]* vW_in + numpy_x0[i-1]*vW + vu1[i]*vu2[i]
numpy_x1[i] = vu0[i]* vW_in + numpy_x1[i-1]*vW + vu1[i]+vu2[i]
# note theano computes inplace, so call function after numpy
# equivalent is done
(theano_x0, theano_x1) = f9(vu0,vu1,vu2,vx0,vx1)
# assert that theano does what it should
assert numpy.allclose( theano_x0 , numpy_x0)
assert numpy.allclose( theano_x1 , numpy_x1)
# assert that it was done in place
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# Old way of doing inplace operations is depricated .. tests don't
# make sense anymroe
##assert numpy.allclose( theano_x0 , vu2)
## assert numpy.allclose( theano_x1 , vu1)
# simple rnn ; compute inplace version 2
def test_inplace2(self):
rng = numpy.random.RandomState(utt.fetch_seed())
vW = asarrayX(numpy.random.uniform())
vW_in = asarrayX(numpy.random.uniform())
vu0 = asarrayX(rng.uniform(size=(3,), low = -5., high = 5.))
vu1 = asarrayX(rng.uniform(size=(4,), low = -5., high = 5.))
vu2 = asarrayX(rng.uniform(size=(5,), low = -5., high = 5.))
vx0 = asarrayX(rng.uniform())
vx1 = asarrayX(rng.uniform())
u0 = theano.tensor.vector('u0')
u1 = theano.tensor.vector('u1')
u2 = theano.tensor.vector('u2')
mu0 = theano.Param( u0, mutable = True)
mu1 = theano.Param( u1, mutable = True)
mu2 = theano.Param( u2, mutable = True)
x0 = theano.tensor.scalar('x0')
x1 = theano.tensor.scalar('y0')
W_in = theano.shared(vW_in,'Win')
W = theano.shared(vW,'W')
mode = theano.compile.mode.get_mode(None).including('inplace')
def f_rnn_shared(u0_t,u1_t,u1_tp1, u2_tm1,u2_t,u2_tp1, x0_tm1,x1_tm1):
return [u0_t*W_in + x0_tm1*W + u1_t*u1_tp1, \
u0_t*W_in + x1_tm1*W+ u2_tm1+u2_t+u2_tp1 ]
outputs, updates = theano.scan(f_rnn_shared,
[u0,dict(input = u1, taps = [0,1])
,dict( input = u2, taps= [-1,0,+1])]
, [dict( initial = x0)
, dict(initial = x1)]
, []
, n_steps = None
, truncate_gradient = -1
, go_backwards = False
, mode=mode )
f9 = theano.function([mu0,mu1,mu2,x0,x1]
, outputs
, updates = updates
, mode = mode
, allow_input_downcast = True)
# compute output in numpy
numpy_x0 = numpy.zeros((3,))
numpy_x1 = numpy.zeros((3,))
numpy_x0[0] = vu0[0] * vW_in + vx0 * vW + vu1[0]*vu1[1]
numpy_x1[0] = vu0[0] * vW_in + vx1 * vW + vu2[0]+vu2[1]+vu2[2]
for i in xrange(1,3):
numpy_x0[i] = vu0[i]* vW_in + numpy_x0[i-1]*vW + vu1[i]*vu1[i+1]
numpy_x1[i] = vu0[i]* vW_in + numpy_x1[i-1]*vW + \
vu2[i]+vu2[i+1]+vu2[i+2]
# note theano computes inplace, so call function after numpy
# equivalent is done
(theano_x0, theano_x1) = f9(vu0,vu1,vu2,vx0,vx1)
# assert that theano does what it should
assert numpy.allclose( theano_x0 , numpy_x0)
assert numpy.allclose( theano_x1 , numpy_x1)
# assert that it was done in place
# not that x0 should not be inplace of vu2 because you are using
# past values of u2, and therefore you are not allowed to work
# inplace !!
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# Old way of doing inplace operations is depricated .. tests don't
# make sense anymroe
#assert not numpy.allclose( theano_x0 , vu2[1:4])
#assert numpy.allclose( theano_x1 , vu1[0:3])
# Shared variable with updates
def test_shared_arguments_with_updates(self):
rng = numpy.random.RandomState(utt.fetch_seed())
vW1 = asarrayX(rng.rand(2,3))
vW2 = asarrayX(rng.rand(3,2))
vu1 = asarrayX(rng.rand(3,2))
vu2 = asarrayX(rng.rand(3,3))
vy0 = asarrayX(rng.rand(3,2))
vy1 = asarrayX(rng.rand(2))
vy2 = asarrayX(rng.rand(3))
# Their is a bug when floatX=float32 when we remove this line.
# The trace back is:
#Traceback (most recent call last):
# File "/u/bastienf/repos/Theano/theano/tests/test_scan.py", line 434, in test_shared_arguments_with_updates
# theano_y0,theano_y1,theano_y2 = f10(vu2, vy0)
# File "/u/bastienf/repos/theano/compile/function_module.py", line 480, in __call__
# self.fn()
# File "/u/bastienf/repos/theano/compile/profilemode.py", line 59, in profile_f
# raise_with_op(node)
# File "/u/bastienf/repos/theano/compile/profilemode.py", line 52, in profile_f
# th()
# File "/u/bastienf/repos/theano/gof/cc.py", line 1141, in <lambda>
# thunk = lambda p = p, i = node_input_storage, o = node_output_storage, n = node: p(n, [x[0] for x in i], o)
# File "/u/bastienf/repos/theano/scan.py", line 922, in perform
# inplace_map)
# File "/u/bastienf/repos/theano/scan.py", line 1054, in scan
# something = fn(*fn_args)
# File "/u/bastienf/repos/theano/compile/function_module.py", line 458, in __call__
# s.storage[0] = s.type.filter(arg, strict=s.strict)
# File "/u/bastienf/repos/theano/tensor/basic.py", line 415, in filter
# data = theano._asarray(data, dtype = self.dtype) #TODO - consider to pad shape with ones
# File "/u/bastienf/repos/theano/misc/safe_asarray.py", line 30, in _asarray
# rval = numpy.asarray(a, dtype=dtype, order=order)
# File "/u/lisa/local/byhost/ceylon.iro.umontreal.ca//lib64/python2.5/site-packages/numpy/core/numeric.py", line 230, in asarray
# return array(a, dtype, copy=False, order=order)
#TypeError: ('__array__() takes no arguments (1 given)', <theano.scan.Scan object at 0x3dbbf90>(?_steps, u1, u2, y0, y1, 0.0, W1, W2), 'Sequence id of Apply node=0')
#
# This don't seam to be a theano related bug...
vu1 = asarrayX(rng.rand(3,2))
W1 = theano.shared(vW1,'W1')
W2 = theano.shared(vW2,'W2')
u1 = theano.shared(vu1,'u1')
y1 = theano.shared(vy1,'y1')
def f(u1_t, u2_t, y0_tm3, y0_tm2, y0_tm1, y1_tm1):
y0_t = theano.dot(theano.dot(u1_t,W1),W2) + 0.1*y0_tm1 + \
0.33*y0_tm2 + 0.17*y0_tm3
y1_t = theano.dot(u2_t, W2) + y1_tm1
y2_t = theano.dot(u1_t, W1)
nwW1 = W1 + .1
nwW2 = W2 + .05
# return outputs followed by a list of updates
return ([y0_t, y1_t, y2_t], [( W1,nwW1), (W2, nwW2)])
u2 = theano.tensor.matrix('u2')
y0 = theano.tensor.matrix('y0')
outputs,updates = theano.scan(f, [u1,u2]
, [ dict(initial = y0
, taps = [-3,-2,-1])
, y1
, None]
, []
, n_steps = None
, go_backwards = False
, truncate_gradient = -1)
f10 = theano.function([u2,y0], outputs, updates = updates,
allow_input_downcast = True)
allstuff = f10(vu2, vy0)
theano_y0,theano_y1,theano_y2 = allstuff
# do things in numpy
numpy_y0 = numpy.zeros((6,2))
numpy_y1 = numpy.zeros((4,2))
numpy_y2 = numpy.zeros((3,3))
numpy_y0[:3] = vy0
numpy_y1[0] = vy1
numpy_W1 = vW1.copy()
numpy_W2 = vW2.copy()
for idx in xrange(3):
numpy_y0[idx+3] = numpy.dot( numpy.dot(vu1[idx,:], numpy_W1)
, numpy_W2) + \
0.1*numpy_y0[idx+2] + \
0.33*numpy_y0[idx+1] + 0.17*numpy_y0[idx]
numpy_y1[idx+1] = numpy.dot( vu2[idx,:], numpy_W2) +\
numpy_y1[idx]
numpy_y2[idx] = numpy.dot( vu1[idx,:], numpy_W1)
numpy_W1 = numpy_W1 + .1
numpy_W2 = numpy_W2 + .05
assert numpy.allclose( theano_y0 , numpy_y0[3:])
assert numpy.allclose( theano_y1 , numpy_y1[1:])
assert numpy.allclose( theano_y2 , numpy_y2 )
assert numpy.allclose( W1.get_value() , numpy_W1 )
assert numpy.allclose( W2.get_value() , numpy_W2 )
def test_simple_shared_mrg_random(self):
theano_rng = theano.sandbox.rng_mrg.MRG_RandomStreams(utt.fetch_seed())
values, updates = theano.scan(lambda : theano_rng.uniform((2,),-1,1)
, []
, []
, []
, n_steps = 5
, truncate_gradient = -1
, go_backwards = False)
my_f = theano.function([], values, updates = updates,
allow_input_downcast = True )
# Just check for run-time errors
theano_v = my_f()
theano_v = my_f()
def test_simple_shared_random(self):
theano_rng = theano.tensor.shared_randomstreams.RandomStreams(
utt.fetch_seed())
values, updates = theano.scan(lambda : theano_rng.uniform((2,),-1,1)
, []
, []
, []
, n_steps = 5
, truncate_gradient = -1
, go_backwards = False)
my_f = theano.function([], values, updates = updates,
allow_input_downcast = True )
rng_seed = numpy.random.RandomState(utt.fetch_seed()).randint(2**30)
rng = numpy.random.RandomState(int(rng_seed)) #int() is for 32bit
numpy_v = numpy.zeros((10,2))
for i in xrange(10):
numpy_v[i] = rng.uniform(-1,1,size = (2,))
theano_v = my_f()
assert numpy.allclose( theano_v , numpy_v [:5,:])
theano_v = my_f()
assert numpy.allclose( theano_v , numpy_v[5:,:])
def test_gibbs_chain(self):
rng = numpy.random.RandomState(utt.fetch_seed())
v_W = numpy.array(rng.rand(20,30) -.5, dtype = 'float32')
v_vsample = numpy.array(rng.binomial(1,0.5, size=(3,20), )
, dtype = 'float32')
v_bvis = numpy.array(rng.rand(20) -.5, dtype='float32')
v_bhid = numpy.array(rng.rand(30) -.5, dtype='float32')
W = theano.shared(v_W, 'vW')
bhid = theano.shared(v_bhid, 'vbhid')
bvis = theano.shared(v_bvis, 'vbvis')
vsample = theano.tensor.matrix(dtype='float32')
trng = theano.tensor.shared_randomstreams.RandomStreams(
utt.fetch_seed())
def f(vsample_tm1):
hmean_t = theano.tensor.nnet.sigmoid(theano.dot(vsample_tm1,W)
+ bhid)
hsample_t = theano.tensor.cast(trng.binomial(hmean_t.shape
, 1
, hmean_t)
,dtype='float32')
vmean_t = theano.tensor.nnet.sigmoid(theano.dot(hsample_t,W.T)
+ bvis)
return theano.tensor.cast(trng.binomial(vmean_t.shape,1,vmean_t)
, dtype='float32')
theano_vsamples, updates = theano.scan(f, [], vsample,[]
, n_steps = 10
, truncate_gradient=-1
, go_backwards = False)
my_f = theano.function([vsample], theano_vsamples[-1]
, updates = updates
, allow_input_downcast = True)
_rng = numpy.random.RandomState(utt.fetch_seed())
rng_seed = _rng.randint(2**30)
nrng1 = numpy.random.RandomState(int(rng_seed)) # int() is for 32bit
rng_seed = _rng.randint(2**30)
nrng2 = numpy.random.RandomState(int(rng_seed)) # int() is for 32bit
def numpy_implementation(vsample):
for idx in range(10):
hmean = 1./(1. + numpy.exp(-(numpy.dot(vsample,v_W)
+ v_bhid)))
hsample = numpy.array(nrng1.binomial(1,hmean
, size = hmean.shape)
, dtype='float32')
vmean = 1./(1. + numpy.exp(-(numpy.dot(hsample,v_W.T)
+ v_bvis)))
vsample = numpy.array(nrng2.binomial(1,vmean
, size = vmean.shape)
,dtype='float32')
return vsample
t_result = my_f(v_vsample)
n_result = numpy_implementation(v_vsample)
assert numpy.allclose( t_result , n_result)
def test_only_shared_no_input_no_output(self):
rng = numpy.random.RandomState(utt.fetch_seed())
v_state = asarrayX(rng.uniform())
state = theano.shared(v_state,'vstate')
def f_2():
return {state: 2*state}
n_steps = theano.tensor.iscalar('nstep')
output, updates = theano.scan(f_2,[],[],[]
, n_steps = n_steps
, truncate_gradient = -1
, go_backwards = False)
this_f = theano.function([n_steps], output, updates = updates,
allow_input_downcast = True)
n_steps = 3
this_f(n_steps)
numpy_state = v_state* (2**(n_steps))
assert numpy.allclose(state.get_value(), numpy_state)
def test_map_functionality(self):
def f_rnn(u_t):
return u_t + 3
u = theano.tensor.vector('u')
outputs, updates = theano.scan(f_rnn, u,[],[]
, n_steps =None
, truncate_gradient = -1
, go_backwards = False)
f2 = theano.function([u], outputs, updates = updates,
allow_input_downcast = True)
rng = numpy.random.RandomState(utt.fetch_seed())
v_u = rng.uniform(size=(5,), low = -5., high = 5.)
numpy_result = v_u + 3
theano_result = f2(v_u)
assert numpy.allclose(theano_result , numpy_result)
def test_map(self):
v = theano.tensor.vector('v')
abs_expr,abs_updates = theano.map(lambda x: abs(x), v,[],
truncate_gradient = -1, go_backwards = False)
f = theano.function([v],abs_expr,updates = abs_updates,
allow_input_downcast = True)
rng = numpy.random.RandomState(utt.fetch_seed())
vals = rng.uniform(size=(10,), low = -5., high = 5.)
abs_vals = abs(vals)
theano_vals = f(vals)
assert numpy.allclose(abs_vals , theano_vals)
def test_backwards(self):
def f_rnn(u_t,x_tm1,W_in, W):
return u_t*W_in+x_tm1*W
u = theano.tensor.vector('u')
x0 = theano.tensor.scalar('x0')
W_in = theano.tensor.scalar('win')
W = theano.tensor.scalar('w')
output, updates = theano.scan(f_rnn, u,x0,[W_in,W]
, n_steps = None
, truncate_gradient = -1
, go_backwards = True)
f2 = theano.function([u,x0,W_in,W], output, updates = updates,
allow_input_downcast = True)
# get random initial values
rng = numpy.random.RandomState(utt.fetch_seed())
v_u = rng.uniform( size = (4,), low = -5., high = 5.)
v_x0 = rng.uniform()
W = rng.uniform()
W_in = rng.uniform()
# compute the output in numpy
v_out = numpy.zeros((4,))
v_out[0] = v_u[3]*W_in + v_x0 * W
for step in xrange(1,4):
v_out[step] = v_u[3-step]*W_in + v_out[step-1] * W
theano_values = f2(v_u,v_x0, W_in, W)
assert numpy.allclose( theano_values , v_out)
def test_reduce(self):
v = theano.tensor.vector('v')
s = theano.tensor.scalar('s')
result, updates = theano.reduce(lambda x,y: x+y, v,s)
f = theano.function([v,s], result, updates = updates,
allow_input_downcast = True)
rng = numpy.random.RandomState(utt.fetch_seed())
v_v = rng.uniform( size = (5,), low = -5., high = 5.)
assert abs(numpy.sum(v_v) - f(v_v, 0.)) < 1e-3
def test_grad_one_output(self):
def f_rnn(u_t,x_tm1,W_in, W):
return u_t*W_in+x_tm1*W
u = theano.tensor.vector('u')
x0 = theano.tensor.scalar('x0')
W_in = theano.tensor.scalar('W_in')
W = theano.tensor.scalar('W')
cost, updates = scan_project_sum(f_rnn, u, x0, [W_in,W]
, n_steps = None
, truncate_gradient = -1
, go_backwards = False)
gu,gx0,gW_in,gW = theano.tensor.grad(cost, [u,x0,W_in, W])
grad_fn = theano.function([u,x0,W_in, W], [gu,gx0,gW_in, gW],
updates = updates, no_default_updates = True,
allow_input_downcast = True)
cost_fn = theano.function([u,x0,W_in, W], cost, updates = updates,
no_default_updates = True,
allow_input_downcast = True)
# get random initial values
rng = numpy.random.RandomState(utt.fetch_seed())
v_u = numpy.array(rng.uniform( size = (10,), low = -.5
, high = .5)
,dtype=theano.config.floatX)
v_x0 = numpy.array(rng.uniform(), dtype= theano.config.floatX)
W = numpy.array(rng.uniform(), dtype= theano.config.floatX)
W_in = numpy.array(rng.uniform(), dtype= theano.config.floatX)
analytic_grad = grad_fn(v_u, v_x0, W_in, W)
num_grad = multiple_outputs_numeric_grad(cost_fn
, [v_u, v_x0, W_in, W])
max_err, max_err_pos = num_grad.max_err(analytic_grad)
if max_err > 1e-2:
raise Exception(theano.tensor.verify_grad.E_grad,
(max_err, 1e-2, max_err_pos))
def test_grad_multiple_outs(self):
rng = numpy.random.RandomState(utt.fetch_seed())
vW_in2 = asarrayX(rng.uniform(size = (2,), low = -.1,high = .1))
vW = asarrayX(rng.uniform(size = (2,2), low = -.1,high = .1))
vWout = asarrayX(rng.uniform(size = (2,), low = -.1,high = .1))
vW_in1 = asarrayX(rng.uniform(size = (2,2), low = -.1,high = .1))
v_u1 = asarrayX(rng.uniform(size = (7,2), low = -.1, high = .1))
v_u2 = asarrayX(rng.uniform(size = (7,), low = -.1,high = .1))
v_x0 = asarrayX(rng.uniform(size = (2,), low = -.1,high = .1))
v_y0 = asarrayX(rng.uniform())
W_in2 = theano.shared(vW_in2, name='win2')
W = theano.shared(vW, name='w')
W_out = theano.shared(vWout, name = 'wout')
W_in1 = theano.tensor.matrix('win')
u1 = theano.tensor.matrix('u1')
u2 = theano.tensor.vector('u2')
x0 = theano.tensor.vector('x0')
y0 = theano.tensor.scalar('y0')
def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, W_in1):
return [theano.dot(u1_t,W_in1) + u2_t* W_in2 + \
theano.dot(x_tm1, W), theano.dot(x_tm1, W_out)]
cost, updates = scan_project_sum(f_rnn_cmpl, [u1,u2], [x0,y0]
, W_in1
, n_steps = None
, truncate_gradient = -1
, go_backwards = False)
vparams = [v_u1, v_u2, v_x0, v_y0,vW_in1]
params = [u1,u2,x0,y0,W_in1 ]
gparams = theano.tensor.grad(cost, params)
grad_fn = theano.function([u1,u2,x0,y0,W_in1], gparams,
updates = updates, no_default_updates = True,
allow_input_downcast = True)
cost_fn = theano.function([u1,u2,x0,y0,W_in1], cost,
updates = updates, no_default_updates = True,
allow_input_downcast = True)
num_grad = multiple_outputs_numeric_grad(cost_fn
, [v_u1
, v_u2
, v_x0
, v_y0
, vW_in1])
analytic_grad = grad_fn(v_u1,v_u2, v_x0,v_y0, vW_in1)
max_err, max_err_pos = num_grad.max_err(analytic_grad)
if max_err > 1e-2:
raise Exception(theano.tensor.verify_grad.E_grad,
(max_err, 1e-2, max_err_pos))
def test_grad_multiple_outs_taps(self):
l = 5
rng = numpy.random.RandomState(utt.fetch_seed())
vW_in2 = asarrayX(rng.uniform(size = (2,), low = -.2,high = .2))
vW = asarrayX(rng.uniform(size = (2,2), low = -.2,high = .2))
vWout = asarrayX(rng.uniform(size = (2,), low = -.2,high = .2))
vW_in1 = asarrayX(rng.uniform(size = (2,2), low = -.2,high = .2))
v_u1 = asarrayX(rng.uniform(size = (l,2), low = -.2, high = .2))
v_u2 = asarrayX(rng.uniform(size = (l+2,2), low = -.2,high = .2))
v_x0 = asarrayX(rng.uniform(size = (2,), low = -.2,high = .2))
v_y0 = asarrayX(rng.uniform(size = (3,)))
W_in2 = theano.shared(vW_in2, name='win2')
W = theano.shared(vW, name='w')
W_out = theano.shared(vWout, name = 'wout')
W_in1 = theano.tensor.matrix('win')
u1 = theano.tensor.matrix('u1')
u2 = theano.tensor.matrix('u2')
x0 = theano.tensor.vector('x0')
y0 = theano.tensor.vector('y0')
def f_rnn_cmpl(u1_t, u2_tm1, u2_t, u2_tp1
, x_tm1, y_tm1, y_tm3, W_in1):
return [theano.dot(u1_t,W_in1) + (u2_t+u2_tm1*u2_tp1)* W_in2 + \
theano.dot(x_tm1, W), (y_tm1+y_tm3)*theano.dot(x_tm1
, W_out),
theano.dot(u1_t, W_in1)]
cost, updates = scan_project_sum(
f_rnn_cmpl
, [ u1
, dict(input=u2,taps=[-1,0,1]) ]
, [x0
, dict(initial = y0
, taps=[-1,-3])
, None]
, W_in1
, n_steps = None
, truncate_gradient = -1
, go_backwards = False )
vparams = [v_u1, v_u2, v_x0, v_y0,vW_in1]
params = [u1,u2,x0,y0,W_in1 ]
gparams = theano.tensor.grad(cost, params)
grad_fn = theano.function([u1,u2,x0,y0,W_in1], gparams,
updates = updates, no_default_updates = True,
allow_input_downcast = True)
cost_fn = theano.function([u1,u2,x0,y0,W_in1], cost,
updates = updates, no_default_updates = True,
allow_input_downcast = True)
num_grad = multiple_outputs_numeric_grad(cost_fn
, [v_u1
, v_u2
, v_x0
, v_y0
, vW_in1])
analytic_grad = grad_fn(v_u1,v_u2, v_x0,v_y0, vW_in1)
max_err, max_err_pos = num_grad.max_err(analytic_grad)
if max_err > 1e-2:
raise Exception(theano.tensor.verify_grad.E_grad,
(max_err, 1e-2, max_err_pos))
def test_grad_multiple_outs_taps_backwards(self):
l = 5
rng = numpy.random.RandomState(utt.fetch_seed())
vW_in2 = asarrayX(rng.uniform(size = (2,), low = -.2,high = .2))
vW = asarrayX(rng.uniform(size = (2,2), low = -.2,high = .2))
vWout = asarrayX(rng.uniform(size = (2,), low = -.2,high = .2))
vW_in1 = asarrayX(rng.uniform(size = (2,2), low = -.2,high = .2))
v_u1 = asarrayX(rng.uniform(size = (l,2), low = -.2, high = .2))
v_u2 = asarrayX(rng.uniform(size = (l+2,2), low = -.2,high = .2))
v_x0 = asarrayX(rng.uniform(size = (2,), low = -.2,high = .2))
v_y0 = asarrayX(rng.uniform(size = (3,)))
W_in2 = theano.shared(vW_in2, name='win2')
W = theano.shared(vW, name='w')
W_out = theano.shared(vWout, name = 'wout')
W_in1 = theano.tensor.matrix('win')
u1 = theano.tensor.matrix('u1')
u2 = theano.tensor.matrix('u2')
x0 = theano.tensor.vector('x0')
y0 = theano.tensor.vector('y0')
def f_rnn_cmpl(u1_t, u2_tm1, u2_t, u2_tp1, x_tm1
, y_tm1, y_tm3, W_in1):
return [theano.dot(u1_t,W_in1) + (u2_t+u2_tm1*u2_tp1)* W_in2 + \
theano.dot(x_tm1, W), (y_tm1+y_tm3)*theano.dot(x_tm1
, W_out)]
cost, updates = scan_project_sum(f_rnn_cmpl,[u1,
dict(input=u2,taps=[-1,0,1])],[x0,dict(initial=y0,
taps=[-1,-3])],W_in1, n_steps = None,
truncate_gradient = -1, go_backwards = True)
vparams = [v_u1, v_u2, v_x0, v_y0,vW_in1]
params = [u1,u2,x0,y0,W_in1 ]
gparams = theano.tensor.grad(cost, params)
grad_fn = theano.function([u1,u2,x0,y0,W_in1], gparams,
updates = updates, no_default_updates = True,
allow_input_downcast = True)
cost_fn = theano.function([u1,u2,x0,y0,W_in1], cost,
updates = updates, no_default_updates = True,
allow_input_downcast = True)
num_grad = multiple_outputs_numeric_grad(cost_fn,[ v_u1
, v_u2
, v_x0
, v_y0
, vW_in1])
analytic_grad = grad_fn(v_u1,v_u2, v_x0,v_y0, vW_in1)
max_err, max_err_pos = num_grad.max_err(analytic_grad)
if max_err > 1e-2:
raise Exception(theano.tensor.verify_grad.E_grad,
(max_err, 1e-2, max_err_pos))
def test_grad_multiple_outs_some_uncomputable(self):
rng = numpy.random.RandomState(utt.fetch_seed())
vW_in = asarrayX(rng.uniform(size = (2,2), low = -3.,high = 3.))
v_u = asarrayX(rng.uniform(size = (5,2), low = -3., high = 3.))
v_u2 = numpy.array([1,3,4,6,8], dtype='int32')
v_x0 = asarrayX(rng.uniform(size = (2,), low = -3.,high = 3.))
W_in = theano.tensor.matrix('win')
u = theano.tensor.matrix('u1')
u2 = theano.tensor.ivector('u2')
x0 = theano.tensor.vector('x0', dtype= theano.config.floatX)
# trng = theano.tensor.shared_randomstreams.RandomStreams(
# utt.fetch_seed())
def f_rnn_cmpl(u_t,u2_t, x_tm1, W_in):
trng1 = theano.tensor.shared_randomstreams.RandomStreams(123)
x_t = theano.tensor.cast(u2_t,theano.config.floatX) +\
theano.dot(u_t, W_in) + x_tm1 + \
trng1.uniform(low=-1.1, high=1.1,
dtype=theano.config.floatX)
return x_t, 2*u2_t
cost, updates = scan_project_sum(f_rnn_cmpl,[u,u2],[x0, None],W_in
, n_steps = None
, truncate_gradient = -1
, go_backwards = False)
vparams = [v_u,v_u2, v_x0,vW_in]
params = [u,u2,x0,W_in ]
gparams = theano.tensor.grad(cost, params)
grad_fn = theano.function([u,u2,x0,W_in], gparams,
updates = updates, no_default_updates = True,
allow_input_downcast = True)
cost_fn = theano.function([u,u2,x0,W_in], cost,
updates = updates, no_default_updates = True,
allow_input_downcast = True)
def reset_rng_fn(fn, *args):
for idx,arg in enumerate(fn.maker.expanded_inputs):
if ( arg.value and type(arg.value.data) ==
type(numpy.random.RandomState(123))):
obj = fn.maker.expanded_inputs[idx].value
obj.data = numpy.random.RandomState(123)
fn.maker.expanded_inputs[idx].value = obj
return fn(*args)
reset_rng_cost_fn = lambda *args : reset_rng_fn(cost_fn, *args)
reset_rng_grad_fn = lambda *args : reset_rng_fn(grad_fn, *args)
num_grad = multiple_outputs_numeric_grad(reset_rng_cost_fn,\
[v_u,v_u2,v_x0,vW_in], ndarray_mask = [True, False, True, True] )
analytic_grad = reset_rng_grad_fn(v_u,v_u2, v_x0, vW_in)
max_err, max_err_pos = num_grad.max_err(analytic_grad)
if max_err > 1e-2:
raise Exception(theano.tensor.verify_grad.E_grad,
(max_err, 1e-2, max_err_pos))
def test_grad_multiple_outs_some_truncate(self):
rng = numpy.random.RandomState(utt.fetch_seed())
vW_in = asarrayX(rng.uniform(size = (2,2), low = -.1,high = .1))
v_u = asarrayX(rng.uniform(size = (5,2), low = -.1, high = .1))
v_x0 = asarrayX(rng.uniform(size = (2,), low = -.1,high = .1))
W_in = theano.tensor.matrix('win')
u = theano.tensor.matrix('u1')
x0 = theano.tensor.vector('x0')
# trng = theano.tensor.shared_randomstreams.RandomStreams(
# utt.fetch_seed())
def f_rnn_cmpl(u_t, x_tm1, W_in):
trng1 = theano.tensor.shared_randomstreams.RandomStreams(123)
x_t = theano.dot(u_t, W_in) + x_tm1 + trng1.uniform(low=-.1
, high=.1)
x_t = theano.tensor.cast(x_t, dtype=theano.config.floatX)
return x_t
cost, updates = scan_project_sum(f_rnn_cmpl,u,x0,W_in
, n_steps = None
, truncate_gradient = 3
, go_backwards = False)
vparams = [v_u, v_x0,vW_in]
params = [u,x0,W_in ]
gparams = theano.tensor.grad(cost, params)
grad_fn = theano.function([u,x0,W_in], gparams,
updates = updates, no_default_updates = True,
allow_input_downcast = True)
cost_fn = theano.function([u,x0,W_in], cost,
updates = updates, no_default_updates = True,
allow_input_downcast = True)
def reset_rng_fn(fn, *args):
for idx,arg in enumerate(fn.maker.expanded_inputs):
if ( arg.value and type(arg.value.data) ==
type(numpy.random.RandomState(123))):
obj = fn.maker.expanded_inputs[idx].value
obj.data = numpy.random.RandomState(123)
fn.maker.expanded_inputs[idx].value = obj
out = fn(*args)
return out
reset_rng_cost_fn = lambda *args : reset_rng_fn(cost_fn, *args)
reset_rng_grad_fn = lambda *args : reset_rng_fn(grad_fn, *args)
num_grad = multiple_outputs_numeric_grad(reset_rng_cost_fn,\
[v_u,v_x0,vW_in] )
analytic_grad = reset_rng_grad_fn(v_u, v_x0, vW_in)
assert numpy.allclose(analytic_grad[0][:2],numpy.zeros((2,2)))
def test_draw_as_input_to_scan(self):
trng = theano.tensor.shared_randomstreams.RandomStreams(123)
x = theano.tensor.matrix('x')
y = trng.binomial(size = x.shape, p = x)
z,updates = theano.scan(lambda a:a, non_sequences=y, n_steps=2)
f = theano.function([x],[y,z], updates = updates,
allow_input_downcast = True)
rng = numpy.random.RandomState(utt.fetch_seed())
nx = rng.uniform( size = (10,10) )
ny1,nz1 = f(nx)
ny2,nz2 = f(nx)
assert numpy.allclose([ny1,ny1], nz1)
assert numpy.allclose([ny2,ny2], nz2)
assert not numpy.allclose(ny1,ny2)
def test_grad_of_shared(self):
x1 = theano.shared(3.)
x1.name = 'x1'
x2 = theano.tensor.vector('x2')
y, updates = theano.scan(
lambda v: theano.tensor.cast(v*x1,
theano.config.floatX)
, sequences = x2)
m = theano.tensor.grad(y.sum(), x1)
f = theano.function([x2], m, allow_input_downcast = True)
assert numpy.allclose(f([2,3]) , 5)
def test_computing_gradient(self):
x1 = theano.tensor.scalar()
x2 = theano.shared(numpy.array([1,2,3,4,5]))
K = x2*x1
out,updates = theano.scan(lambda i,v: theano.tensor.grad(K[i], v),
sequences = theano.tensor.arange(K.shape[0])
, non_sequences=x1)
f = theano.function([x1], out, allow_input_downcast = True)
assert numpy.all( f(3.) != 0. )
def test_shared_updates(self):
X = theano.shared( numpy.array(1))
out,updates = theano.scan( lambda :{X: X+1}
, outputs_info = []
, non_sequences= []
, sequences = []
, n_steps = 10)
f = theano.function([],[], updates = updates)
f()
assert X.get_value() == 11
def test_memory_aliasing_updates(self):
x = theano.shared( numpy.array(1))
y = theano.shared( numpy.array(1))
out,updates = theano.scan( lambda :{x: x+1, y:x}
, outputs_info = []
, non_sequences= []
, sequences = []
, n_steps = 10)
f = theano.function([],[], updates = updates)
f()
assert not numpy.may_share_memory(x.container.storage[0],
y.container.storage[0])
assert x.get_value() != y.get_value()
def test_scan_output_padding(self):
"""
Scan outputs are usually lists, whose entries correspond to the
intermediate result. When n_steps=1, some extra machinery is
required in order to mimic this interface. Scan thus calls
tensor.shape_padleft on the inner function outputs.
However, this is not the proper behavior for:
* shared variables : these should not be padded in any way
* when return_steps is explicitely set to 1. Output should NOT be
a list, but a tensor corresponding to the result of the last
iteration.
This unit test addresses the bug fix of changeset ba7157e95cb1.
"""
a = theano.tensor.vector()
init_a = theano.tensor.vector()
b = theano.shared(numpy.random.rand(5,4))
def inner_func(a):
return a+1, {b:2*b}
out, updates = theano.scan(inner_func,
outputs_info = [{'initial': init_a, 'return_steps': 1}],
n_steps=1)
assert out.type.ndim == a.type.ndim
assert updates[b].type.ndim == b.type.ndim
out, updates = theano.scan(inner_func, outputs_info=[init_a]
, n_steps=1)
assert out.type.ndim == a.type.ndim+1
assert updates[b].type.ndim == b.type.ndim
def test_scan_extra_inputs_hessian(self):
x = theano.tensor.vector('x')
A = theano.tensor.matrix('A')
fc1 = theano.shared(0.5)
fc2 = theano.shared(0.9)
y = fc1*theano.dot(x*x,theano.dot(A,x))
gy = theano.tensor.grad(y,x)
hy, updates = theano.scan(
lambda i, gy, x: theano.tensor.grad(gy[i]*fc2, x),
sequences = theano.tensor.arange(gy.shape[0]),
non_sequences = [gy,x])
f = theano.function([x,A], hy, allow_input_downcast = True)
vx = numpy.array([1.,1.] , dtype = theano.config.floatX)
vA = numpy.array([[1.,1.],[1.,0.]], dtype = theano.config.floatX)
vR = numpy.array([[3.6,1.8],[1.8,0.9]], dtype = theano.config.floatX)
assert numpy.allclose(f(vx,vA), vR)
def test_cloning_no_replace_strict_copy_inputs(self):
# This has nothing to do with scan, but it refers to the clone
# function that scan uses internally and that pfunc uses now and
# that users might want to use
x = theano.tensor.vector('x')
y = theano.tensor.vector('y')
z = theano.shared(0.25)
f1 = z*(x+y)**2+5
f2 = theano.clone( f1
, replace= None
, strict = True
, copy_inputs = True)
f2_inp = theano.gof.graph.inputs([f2])
assert z in f2_inp
assert x in f2_inp
assert y in f2_inp
def test_cloning_no_replace_strict_not_copy_inputs(self):
# This has nothing to do with scan, but it refers to the clone
# function that scan uses internally and that pfunc uses now and
# that users might want to use
x = theano.tensor.vector('x')
y = theano.tensor.vector('y')
z = theano.shared(0.25)
f1 = z*(x+y)**2+5
f2 = theano.clone( f1
, replace= None
, strict = True
, copy_inputs = False)
f2_inp = theano.gof.graph.inputs([f2])
assert not z in f2_inp
assert not x in f2_inp
assert not y in f2_inp
def test_cloning_replace_strict_copy_inputs(self):
# This has nothing to do with scan, but it refers to the clone
# function that scan uses internally and that pfunc uses now and
# that users might want to use
x = theano.tensor.vector('x')
y = theano.tensor.vector('y')
y2 = theano.tensor.vector('y2')
z = theano.shared(0.25)
f1 = z*(x+y)**2+5
f2 = theano.clone( f1
, replace= {y: y2}
, strict = True
, copy_inputs = True)
f2_inp = theano.gof.graph.inputs([f2])
assert z in f2_inp
assert x in f2_inp
assert y2 in f2_inp
def test_cloning_replace_not_strict_copy_inputs(self):
# This has nothing to do with scan, but it refers to the clone
# function that scan uses internally and that pfunc uses now and
# that users might want to use
x = theano.tensor.vector('x')
y = theano.tensor.fvector('y')
y2 = theano.tensor.dvector('y2')
z = theano.shared(0.25)
f1 = z*(x+y)**2+5
f2 = theano.clone( f1
, replace= {y: y2}
, strict = False
, copy_inputs = True)
f2_inp = theano.gof.graph.inputs([f2])
assert z in f2_inp
assert x in f2_inp
assert y2 in f2_inp
def test_cloning_replace_strict_not_copy_inputs(self):
# This has nothing to do with scan, but it refers to the clone
# function that scan uses internally and that pfunc uses now and
# that users might want to use
x = theano.tensor.vector('x')
y = theano.tensor.vector('y')
y2 = theano.tensor.vector('y2')
z = theano.shared(0.25)
f1 = z*(x+y)**2+5
f2 = theano.clone( f1
, replace= {y: y2}
, strict = True
, copy_inputs = False)
f2_inp = theano.gof.graph.inputs([f2])
assert not z in f2_inp
assert not x in f2_inp
assert not y2 in f2_inp
def test_cloning_replace_not_strict_not_copy_inputs(self):
# This has nothing to do with scan, but it refers to the clone
# function that scan uses internally and that pfunc uses now and
# that users might want to use
x = theano.tensor.vector('x')
y = theano.tensor.fvector('y')
y2 = theano.tensor.dvector('y2')
z = theano.shared(0.25)
f1 = z*(x+y)**2+5
f2 = theano.clone( f1
, replace= {y: y2}
, strict = False
, copy_inputs = False)
f2_inp = theano.gof.graph.inputs([f2])
assert not z in f2_inp
assert not x in f2_inp
assert not y2 in f2_inp
### TEST RE-ordering of inputs
# some rnn with multiple outputs and multiple inputs; other
# dimension instead of scalars/vectors
def test_reordering(self):
rng = numpy.random.RandomState(utt.fetch_seed())
vW_in2 = asarrayX(rng.uniform(size = (2,), low = -5.,high = 5.))
vW = asarrayX(rng.uniform(size = (2,2), low = -5.,high = 5.))
vWout = asarrayX(rng.uniform(size = (2,), low = -5.,high = 5.))
vW_in1 = asarrayX(rng.uniform(size = (2,2), low = -5.,high = 5.))
v_u1 = asarrayX(rng.uniform(size = (3,2), low = -5., high = 5.))
v_u2 = asarrayX(rng.uniform(size = (3,), low = -5.,high = 5.))
v_x0 = asarrayX(rng.uniform(size = (2,), low = -5.,high = 5.))
v_y0 = asarrayX(rng.uniform(size = (3,)))
W_in2 = theano.shared(vW_in2, name='win2')
W = theano.shared(vW, name='w')
W_out = theano.shared(vWout, name = 'wout')
W_in1 = theano.tensor.matrix('win')
u1 = theano.tensor.matrix('u1')
u2 = theano.tensor.vector('u2')
x0 = theano.tensor.vector('x0')
y0 = theano.tensor.vector('y0')
def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, y_tm3, W_in1):
return [y_tm3+1, y_tm3+2, theano.dot(u1_t,W_in1) + u2_t * W_in2 + \
theano.dot(x_tm1, W),
y_tm1 + theano.dot(x_tm1, W_out)]
outputs, updates = theano.scan( f_rnn_cmpl
, [ u1
, u2]
, [ None
, None
, x0
, dict(initial=y0, taps=[-1,-3])]
, W_in1
, n_steps = None
, truncate_gradient = -1
, go_backwards = False)
f4 = theano.function([u1,u2,x0,y0,W_in1], outputs
, updates = updates
, allow_input_downcast = True)
# compute the values in numpy
v_x = numpy.zeros((3,2),dtype=theano.config.floatX)
v_y = numpy.zeros((3,),dtype=theano.config.floatX)
v_x[0] = numpy.dot(v_u1[0],vW_in1) + v_u2[0]*vW_in2 + \
numpy.dot(v_x0,vW)
v_y[0] = numpy.dot(v_x0,vWout) + v_y0[2]
for i in xrange(1,3):
v_x[i] = numpy.dot(v_u1[i],vW_in1) + v_u2[i]*vW_in2 + \
numpy.dot(v_x[i-1],vW)
v_y[i] = numpy.dot(v_x[i-1], vWout) + v_y[i-1]
(theano_dump1, theano_dump2, theano_x,theano_y) = f4( v_u1
, v_u2
, v_x0
, v_y0
, vW_in1)
assert numpy.allclose(theano_x , v_x)
assert numpy.allclose(theano_y , v_y)
### TEST store steps / return steps
def test_return_steps(self):
rng = numpy.random.RandomState(utt.fetch_seed())
vW_in2 = asarrayX(rng.uniform(size = (2,), low = -5.,high = 5.))
vW = asarrayX(rng.uniform(size = (2,2), low = -5.,high = 5.))
vWout = asarrayX(rng.uniform(size = (2,), low = -5.,high = 5.))
vW_in1 = asarrayX(rng.uniform(size = (2,2), low = -5.,high = 5.))
v_u1 = asarrayX(rng.uniform(size = (8,2), low = -5., high = 5.))
v_u2 = asarrayX(rng.uniform(size = (8,), low = -5.,high = 5.))
v_x0 = asarrayX(rng.uniform(size = (2,), low = -5.,high = 5.))
v_y0 = asarrayX(rng.uniform(size = (3,)))
W_in2 = theano.shared(vW_in2, name='win2')
W = theano.shared(vW, name='w')
W_out = theano.shared(vWout, name = 'wout')
W_in1 = theano.tensor.matrix('win')
u1 = theano.tensor.matrix('u1')
u2 = theano.tensor.vector('u2')
x0 = theano.tensor.vector('x0')
y0 = theano.tensor.vector('y0')
def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, y_tm3, W_in1):
return [y_tm3+1, theano.dot(u1_t,W_in1) + u2_t * W_in2 + \
theano.dot(x_tm1, W),
y_tm1 + theano.dot(x_tm1, W_out)]
outputs, updates = theano.scan( f_rnn_cmpl
, [ u1
, u2]
, [ dict(store_steps = 3)
, dict(initial = x0, return_steps = 2)
, dict(initial=y0, taps=[-1,-3],
return_steps = 4)]
, W_in1
, n_steps = None
, truncate_gradient = -1
, go_backwards = False)
f4 = theano.function([u1,u2,x0,y0,W_in1], outputs
, updates = updates
, allow_input_downcast = True
)
# compute the values in numpy
v_x = numpy.zeros((8,2),dtype=theano.config.floatX)
v_y = numpy.zeros((8,),dtype=theano.config.floatX)
v_x[0] = numpy.dot(v_u1[0],vW_in1) + v_u2[0]*vW_in2 + \
numpy.dot(v_x0,vW)
v_y[0] = numpy.dot(v_x0,vWout) + v_y0[2]
for i in xrange(1,8):
v_x[i] = numpy.dot(v_u1[i],vW_in1) + v_u2[i]*vW_in2 + \
numpy.dot(v_x[i-1],vW)
v_y[i] = numpy.dot(v_x[i-1], vWout) + v_y[i-1]
(theano_dump, theano_x,theano_y) = f4( v_u1, v_u2, v_x0, v_y0, vW_in1)
assert numpy.allclose(theano_x , v_x[-2:])
assert numpy.allclose(theano_y , v_y[-4:])
def test_scan_as_tensor_on_gradients(self):
"""
Bug reported by cityhall on scan when computing the gradients
"""
to_scan = theano.tensor.dvector('to_scan')
seq = theano.tensor.dmatrix('seq')
f1 = theano.tensor.dscalar('f1')
def scanStep(prev, seq, f1):
return prev + f1 * seq
scanned, _ = theano.scan(fn = scanStep, \
sequences = [seq], \
outputs_info = [to_scan], \
non_sequences = [f1])
f_scan = theano.function(inputs=[to_scan, seq, f1], outputs=scanned
, allow_input_downcast = True)
t_grad = theano.tensor.grad(scanned.sum(), wrt=[to_scan, f1],
consider_constant=[seq])
f_grad = theano.function(inputs=[to_scan, seq, f1], outputs=t_grad,
allow_input_downcast = True)
def test_save_mem(self):
rng = numpy.random.RandomState(utt.fetch_seed())
vW_in2 = asarrayX(rng.uniform(size = (2,), low = -5.,high = 5.))
vW = asarrayX(rng.uniform(size = (2,2), low = -5.,high = 5.))
vWout = asarrayX(rng.uniform(size = (2,), low = -5.,high = 5.))
vW_in1 = asarrayX(rng.uniform(size = (2,2), low = -5.,high = 5.))
v_u1 = asarrayX(rng.uniform(size = (8,2), low = -5., high = 5.))
v_u2 = asarrayX(rng.uniform(size = (8,), low = -5.,high = 5.))
v_x0 = asarrayX(rng.uniform(size = (2,), low = -5.,high = 5.))
v_y0 = asarrayX(rng.uniform(size = (3,)))
W_in2 = theano.shared(vW_in2, name='win2')
W = theano.shared(vW, name='w')
W_out = theano.shared(vWout, name = 'wout')
W_in1 = theano.tensor.matrix('win')
u1 = theano.tensor.matrix('u1')
u2 = theano.tensor.vector('u2')
x0 = theano.tensor.vector('x0')
y0 = theano.tensor.vector('y0')
def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, y_tm3, W_in1):
return [y_tm3+1, theano.dot(u1_t,W_in1) + u2_t * W_in2 + \
theano.dot(x_tm1, W),
y_tm1 + theano.dot(x_tm1, W_out)]
outputs, updates = theano.scan( f_rnn_cmpl
, [ u1
, u2]
, [ dict(return_steps = 1)
, dict(initial = x0
, return_steps = 1)
, dict(initial=y0, taps=[-1,-3],
return_steps = 1)]
, W_in1
, n_steps = None
, truncate_gradient = -1
, go_backwards = False)
f4 = theano.function([u1,u2,x0,y0,W_in1], outputs
, updates = updates
, allow_input_downcast = True
)
# compute the values in numpy
v_x = numpy.zeros((8,2),dtype=theano.config.floatX)
v_y = numpy.zeros((8,),dtype=theano.config.floatX)
v_x[0] = numpy.dot(v_u1[0],vW_in1) + v_u2[0]*vW_in2 + \
numpy.dot(v_x0,vW)
v_y[0] = numpy.dot(v_x0,vWout) + v_y0[2]
for i in xrange(1,8):
v_x[i] = numpy.dot(v_u1[i],vW_in1) + v_u2[i]*vW_in2 + \
numpy.dot(v_x[i-1],vW)
v_y[i] = numpy.dot(v_x[i-1], vWout) + v_y[i-1]
(theano_dump, theano_x,theano_y) = f4( v_u1, v_u2, v_x0, v_y0, vW_in1)
assert numpy.allclose(theano_x , v_x[-1:])
assert numpy.allclose(theano_y , v_y[-1:])
def caching_nsteps_by_scan_op(self):
import theano
import theano.tensor as T
import scipy
W = T.matrix('weights')
initial = T.vector('initial')
inpt = T.matrix('inpt')
def one_step(x_t, h_tm1, W):
expr = T.dot(h_tm1, W) + x_t
return expr
expr, _ = theano.scan(
fn=one_step,
sequences=[inpt],
outputs_info=[initial],
non_sequences=[W])
sh = expr.shape[0]
shapef = theano.function([W], expr,
givens={initial: theano.shared(
scipy.ones(5,
dtype=theano.config.floatX)),
inpt: theano.shared(
scipy.ones((5, 5),
dtype=theano.config.floatX))})
# First execution to cache n_steps
shapef(scipy.ones((5, 5), dtype=theano.config.floatX))
cost = expr.sum()
d_cost_wrt_W = T.grad(cost, [W])
f = theano.function([W, inpt], d_cost_wrt_W,
givens={initial: theano.shared(scipy.zeros(5))})
rval = numpy.asarray([[5187989]*5]*5, dtype = theano.config.floatX)
assert numpy.allclose( f(scipy.ones((5, 5),
dtype=theano.config.floatX)
, scipy.ones((10, 5),
dtype=theano.config.floatX))
,rval)
def test_save_mem_reduced_number_of_steps(self):
def f_rnn(u_t):
return u_t+1., u_t+2., u_t+3., u_t+4.,u_t+5, u_t+6, u_t+7.
u = theano.tensor.vector('u')
idx = theano.tensor.iscalar('idx')
jdx = theano.tensor.iscalar('jdx')
[x1,x2,x3,x4,x5,x6,x7], updates = theano.scan(f_rnn, u
, n_steps = None
, truncate_gradient = -1
, go_backwards = False)
f2 = theano.function([u, idx, jdx]
,[ x1[:2],x2[4], x3[idx], x4[:idx],x5[-10],
x6[-jdx], x7[:-jdx]]
, updates = updates,
allow_input_downcast = True)
# get random initial values
rng = numpy.random.RandomState(utt.fetch_seed())
v_u = rng.uniform( size = (20,), low = -5., high = 5.)
# compute the output in numpy
tx1,tx2,tx3,tx4,tx5,tx6,tx7 = f2(v_u,3,15)
assert numpy.allclose(tx1, v_u[:2] +1.)
assert numpy.allclose(tx2, v_u[4] +2.)
assert numpy.allclose(tx3, v_u[3] +3.)
assert numpy.allclose(tx4, v_u[:3] +4.)
assert numpy.allclose(tx5, v_u[-10] +5.)
assert numpy.allclose(tx6, v_u[-15] +6.)
assert numpy.allclose(tx7, v_u[:-15]+7.)
scan_node = f2.maker.env.outputs[0].owner.inputs[0]
# Maybe ugly, way to check if the optimization had
# been applied
def test_save_mem_store_steps(self):
def f_rnn(u_t, x1_tm1, x1_tm3, x2_tm1, x3tm2, x3_tm1, x4_tm1 ):
return u_t+1., u_t+2., u_t+3., u_t+4.,u_t+5, u_t+6, u_t+7
u = theano.tensor.vector('u')
idx = theano.tensor.iscalar('idx')
jdx = theano.tensor.iscalar('jdx')
x10 = theano.tensor.vector('x10')
x20 = theano.tensor.scalar('x20')
x30 = theano.tensor.vector('x30')
x40 = theano.tensor.scalar('x40')
[x1,x2,x3,x4,x5,x6,x7], updates = theano.scan(f_rnn, u
, [None, None, None
, dict(initial = x10, taps=[-1,-2])
, x20
, dict(initial = x30, taps=[-1,-2])
, x40]
, n_steps = None
, truncate_gradient = -1
, go_backwards = False)
f2 = theano.function([u, x10, x20, x30, x40]
,[ x1[-7], x2[-3:-1], x3[-6:]
, x4[-1], x5[-1]]
, updates = updates,
allow_input_downcast = True)
# get random initial values
rng = numpy.random.RandomState(utt.fetch_seed())
v_u = rng.uniform( size = (20,), low = -5., high = 5.)
# compute the output in numpy
tx1,tx2,tx3,tx4,tx5 = f2(v_u,[0,0],0,[0,0],0)
assert numpy.allclose(tx1, v_u[-7] +1.)
assert numpy.allclose(tx2, v_u[-3:-1] +2.)
assert numpy.allclose(tx3, v_u[-6:] +3.)
assert numpy.allclose(tx4, v_u[-1] +4.)
assert numpy.allclose(tx5, v_u[-1] +5.)
def test_remove_stuff(self):
x = theano.tensor.vector()
def lm(m):
trng = theano.tensor.shared_randomstreams.RandomStreams(
utt.fetch_seed())
return [ 2*m+ trng.uniform(low =-1.1, high =1.1,
dtype = theano.config.floatX),
m + trng.uniform(size=[3])]
[o1,o2], updates = theano.scan( lm,
sequences = x,
n_steps = None,
truncate_gradient = -1,
go_backwards = False)
go1 = theano.tensor.grad(o1.mean(), wrt = x)
f = theano.function([x],go1, updates = updates,
allow_input_downcast = True)
print f([1,2,3])
if __name__ == '__main__':
#'''
print ' Use nosetests to run these tests '
'''
scan_tst = T_Scan()
#''
print 1
scan_tst.test_generator_one_output_scalar()
#''
print 2
scan_tst.test_one_sequence_one_output_weights()
#''
print 3
scan_tst.test_one_sequence_one_output_weights_shared()
#''
print 4
scan_tst.test_multiple_inputs_multiple_outputs()
#''
print 5
scan_tst.test_using_taps_input_output()
#''
print 6
scan_tst.test_past_future_taps_shared()
#''
print 7
scan_tst.test_inplace1()
#''
print 8
scan_tst.test_inplace2()
#''
print 9
scan_tst.test_shared_arguments_with_updates()
print 10
scan_tst.test_simple_shared_random()
print 11
scan_tst.test_only_shared_no_input_no_output()
print 12
scan_tst.test_map_functionality()
print 13
scan_tst.test_map()
#''
print 14
scan_tst.test_backwards()
#''
print 15
scan_tst.test_reduce()
print 15.5
scan_tst.test_save_mem()
#''
print 16
scan_tst.test_grad_one_output()
#''
print 17
scan_tst.test_grad_multiple_outs()
#''
print 17.5
scan_tst.test_multiple_outs_taps()
#''
print 18
scan_tst.test_grad_multiple_outs_taps()
#''
print 19
scan_tst.test_grad_multiple_outs_taps_backwards()
#''
print 20
scan_tst.test_grad_multiple_outs_some_uncomputable()
#''
print 21
scan_tst.test_grad_multiple_outs_some_truncate()
#''
print 22
scan_tst.test_grad_of_shared()
#''
print 23
scan_tst.test_computing_gradient()
#''
print 24
scan_tst.test_scan_output_padding()
print 25
scan_tst.test_scan_extra_inputs_hessian()
#''
print 26
scan_tst.test_cloning_no_replace_strict_copy_inputs()
print 27
scan_tst.test_cloning_no_replace_strict_not_copy_inputs()
print 28
scan_tst.test_cloning_replace_strict_copy_inputs()
print 29
scan_tst.test_cloning_replace_not_strict_copy_inputs()
print 30
scan_tst.test_cloning_replace_strict_not_copy_inputs()
print 31
scan_tst.test_cloning_replace_not_strict_not_copy_inputs()
#''
print 32
scan_tst.test_draw_as_input_to_scan()
#''
print 33
scan_tst.test_reordering()
#''
print 34
scan_tst.test_return_steps()
#''
print 35
scan_tst.test_scan_as_tensor_on_gradients()
#''
print 36
scan_tst.test_save_mem_reduced_number_of_steps()
#''
print 37
scan_tst.test_save_mem_store_steps()
#'''
......@@ -2,29 +2,29 @@
import unittest
import theano
import numpy
from theano import config
from theano.tests import unittest_tools as utt
#from theano.scan import stepper
'''
Questions and notes about scan that should be answered :
* Even though it does not make it publically known in
the documentation, scan allows you to set both a return_steps
flag and a store_steps flag ( the first one is a soft condition telling
you how many steps to return, the second one determines how much memory to
allocate). There is an optimization as well, that transforms return_steps to
you how many steps to return, the second one determines how much memory
to allocate). There is an optimization as well, that transforms
return_steps to
store_steps. Questions :
- what happens if both flags are set ?
answer: whatever return_steps says is ignored, and store_steps is used
- the optimization works only with return_steps = -1; can it be made to work
with other values ?
answer: 6 Jul 2010 RP :it is a bit harry to figure out from the subtensors what
exactly you need
- the optimization works only with return_steps = -1; can it be made
to work with other values ?
answer: 6 Jul 2010 RP :it is a bit harry to figure out from the
subtensors what exactly you need
* Scan seems to do copies of every input variable. Is that needed?
answer : probably not, but it doesn't hurt also ( what we copy is theano variables,
which just cary information about the type / dimension of the data)
answer : probably not, but it doesn't hurt also ( what we copy is
theano variables, which just cary information about the type / dimension
of the data)
* There is some of scan functionality that is not well documented
......@@ -39,12 +39,12 @@ class multiple_outputs_numeric_grad:
def __init__(self, f, pt, ndarray_mask = None, eps=None):
"""Return the gradient of f at pt.
This function computes the gradient by a one-sided finite differences of a
fixed step size (eps).
This function computes the gradient by a one-sided finite differences
of a fixed step size (eps).
It is assumed that f(...) will return a scalar.
:param eps: the stepsize for the finite differencing. None means input
dtype-dependent. See `type_eps`.
:param eps: the stepsize for the finite differencing. None means
input dtype-dependent. See `type_eps`.
"""
def prod(inputs):
......@@ -61,18 +61,20 @@ class multiple_outputs_numeric_grad:
# something else ( a random state ? ) with which we shouldn't really
# mess up
if not ndarray_mask:
ndarray_mask = [True for x in pt ]
ndarray_mask = [True for x in pt ]
dtype_eps = multiple_outputs_numeric_grad.type_eps['float64']
for i,p in enumerate(pt):
if ndarray_mask[i]:
pt[i] = numpy.array(p)
_eps = multiple_outputs_numeric_grad.type_eps[str(pt[i].dtype)]
_eps = multiple_outputs_numeric_grad.type_eps[str(
pt[i].dtype)]
if _eps > dtype_eps:
dtype_eps = _eps
dtype_eps = _eps
self.ndarray_mask = ndarray_mask
#'''
# Compute clean output:
f_x = f(*pt)
gx = []
......@@ -103,20 +105,31 @@ class multiple_outputs_numeric_grad:
@staticmethod
def abs_rel_err(a,b,eps=1.0e-10):
"""Return a small number when a and b are close, relative to how big they are"""
"""Return a small number when a and b are close, relative to how big
they are"""
return abs(a-b) / (abs(a)+abs(b)+eps)
def max_err(self, g_pt):
def max_err(self, _g_pt):
"""Return the biggest relative error between g_pt and self.gx"""
g_pt = []
for i in xrange(len(_g_pt)):
if self.ndarray_mask[i]:
g_pt.append(_g_pt[i])
elif isinstance(_g_pt[i], numpy.ndarray):
assert numpy.all( _g_pt[i] == 0)
if len(g_pt) != len(self.gx):
raise ValueError('argument has wrong number of elements', len(g_pt))
raise ValueError('argument has wrong number of elements'
, len(g_pt))
errs = []
for i, (a, b) in enumerate(zip(g_pt, self.gx)):
for i, (a,b) in enumerate(zip(g_pt, self.gx)):
if a.shape != b.shape:
raise ValueError('argument element %i has wrong shape %s' %(i,str((a.shape,
b.shape))))
raise ValueError('argument element %i has wrong shape %s'
%(i,str((a.shape, b.shape))))
vv = multiple_outputs_numeric_grad.abs_rel_err(a,b)
errs.append(numpy.max(multiple_outputs_numeric_grad.abs_rel_err(a,b)))
errs.append(numpy.max(
multiple_outputs_numeric_grad.abs_rel_err(a,b)))
if numpy.all(numpy.isfinite(errs)):
return numpy.max(errs), numpy.argmax(errs)
else:
......@@ -128,7 +141,8 @@ class multiple_outputs_numeric_grad:
# use it with the normal verify_grad rather than the
# copy-and-pasted one above.
# Also - add a reference to this technique in the
# verify_grad method so that other ops with multiple outputs can be tested. DONE - rp
# verify_grad method so that other ops with multiple outputs can be tested.
# DONE - rp
def scan_project_sum(*args, **kwargs):
rng = theano.tensor.shared_randomstreams.RandomStreams(123)
scan_outputs, updates = theano.scan(*args, **kwargs)
......@@ -137,16 +151,18 @@ def scan_project_sum(*args, **kwargs):
# we should ignore the random-state updates so that
# the uniform numbers are the same every evaluation and on every call
rng.add_default_updates = False
factors = [ rng.uniform(size=s.shape, low = 0.1, high = 0.9) for s in scan_outputs ]
factors = [ rng.uniform(size=s.shape, low = 0.1, high = 0.9) for s
in scan_outputs ]
# Random values (?)
return (sum([(s*f).sum() for s,f in zip(scan_outputs,factors)]),updates)
return (sum([(s*f).sum() for s,f in zip(scan_outputs,factors)]), updates)
def asarrayX(value):
return theano._asarray(value, dtype=theano.config.floatX)
class T_Scan(unittest.TestCase):
#class T_Scan(unittest.TestCase):
class T_Scan(object):
def setUp(self):
utt.seed_rng()
......@@ -157,17 +173,19 @@ class T_Scan(unittest.TestCase):
def f_pow2(x_tm1):
return 2*x_tm1
state = theano.tensor.scalar()
n_steps = theano.tensor.scalar()
state = theano.tensor.scalar('state')
n_steps = theano.tensor.iscalar('nsteps')
output, updates = theano.scan(f_pow2, [],state, [],n_steps = n_steps, truncate_gradient
= -1, go_backwards = False)
my_f = theano.function([state,n_steps], output, updates = updates)
my_f = theano.function([state,n_steps], output, updates = updates,
allow_input_downcast = True)
rng = numpy.random.RandomState(utt.fetch_seed())
state = asarrayX(rng.uniform())
state = rng.uniform()
steps = 5
numpy_values = numpy.array([ state*(2**(k+1)) for k in xrange(steps) ])
numpy_values = numpy.array([ state*(2**(k+1)) for k
in xrange(steps) ])
theano_values = my_f(state,steps)
assert numpy.allclose(numpy_values,theano_values)
......@@ -178,28 +196,30 @@ class T_Scan(unittest.TestCase):
def f_rnn(u_t,x_tm1,W_in, W):
return u_t*W_in+x_tm1*W
u = theano.tensor.vector()
x0 = theano.tensor.scalar()
W_in = theano.tensor.scalar()
W = theano.tensor.scalar()
u = theano.tensor.vector('u')
x0 = theano.tensor.scalar('x0')
W_in = theano.tensor.scalar('win')
W = theano.tensor.scalar('w')
output, updates = theano.scan(f_rnn, u,x0,[W_in,W], n_steps = None, truncate_gradient =
-1, go_backwards = False)
output, updates = theano.scan(f_rnn, u,x0,[W_in,W]
, n_steps = None
, truncate_gradient = -1
, go_backwards = False)
f2 = theano.function([u,x0,W_in,W], output, updates = updates)
f2 = theano.function([u,x0,W_in,W], output, updates = updates,
allow_input_downcast = True)
# get random initial values
rng = numpy.random.RandomState(utt.fetch_seed())
v_u = asarrayX(rng.uniform(size = (4,), low = -5., high = 5.))
v_x0 = asarrayX(rng.uniform())
W = asarrayX(rng.uniform())
W_in = asarrayX(rng.uniform())
v_u = rng.uniform( size = (4,), low = -5., high = 5.)
v_x0 = rng.uniform()
W = rng.uniform()
W_in = rng.uniform()
# compute the output in numpy
v_out = numpy.zeros((4,))
v_out[0] = v_u[0]*W_in + v_x0 * W
for step in xrange(1,4):
v_out[step] = v_u[step]*W_in + v_out[step-1] * W
theano_values = f2(v_u,v_x0, W_in, W)
assert numpy.allclose(theano_values, v_out)
......@@ -208,28 +228,29 @@ class T_Scan(unittest.TestCase):
# are vectors, weights are scalars; using shared variables
def test_one_sequence_one_output_weights_shared(self):
rng = numpy.random.RandomState(utt.fetch_seed())
u = theano.tensor.vector()
x0 = theano.tensor.scalar()
u = theano.tensor.vector('u')
x0 = theano.tensor.scalar('x0')
W_in = theano.shared(asarrayX(rng.uniform()), name = 'w_in')
W = theano.shared(asarrayX(rng.uniform()), name ='w')
def f_rnn_shared(u_t,x_tm1, tmp_W_in, tmp_W):
return u_t*tmp_W_in+x_tm1*tmp_W
output, updates = theano.scan(f_rnn_shared, u,x0,[W_in, W], n_steps =None,
truncate_gradient= -1, go_backwards = False)
f3 = theano.function([u,x0], output, updates = updates)
output, updates = theano.scan(f_rnn_shared, u,x0,[W_in, W]
, n_steps =None
, truncate_gradient= -1
, go_backwards = False)
f3 = theano.function([u,x0], output, updates = updates,
allow_input_downcast = True)
# get random initial values
v_u = asarrayX(rng.uniform(size = (4,), low = -5., high = 5.))
v_x0 = asarrayX(rng.uniform())
v_u = rng.uniform( size = (4,), low = -5., high = 5.)
v_x0 = rng.uniform()
# compute the output i numpy
v_out = numpy.zeros((4,))
v_out[0] = (v_u[0] * W_in.get_value(borrow=True) +
v_x0*W.get_value(borrow=True))
v_out[0] = v_u[0]*W_in.get_value() + v_x0*W.get_value()
for step in xrange(1,4):
v_out[step] = (v_u[step] * W_in.get_value(borrow=True) +
v_out[step-1] * W.get_value(borrow=True))
v_out[step] = v_u[step]*W_in.get_value() + v_out[step-1]*W.get_value()
theano_values = f3(v_u, v_x0)
assert numpy.allclose(theano_values, v_out)
......@@ -258,28 +279,125 @@ class T_Scan(unittest.TestCase):
y0 = theano.tensor.scalar('y0')
def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, W_in1):
return [theano.dot(u1_t,W_in1) + u2_t* W_in2 + \
return [theano.dot(u1_t,W_in1) + u2_t * W_in2 + \
theano.dot(x_tm1, W), theano.dot(x_tm1, W_out)]
outputs, updates = theano.scan(f_rnn_cmpl,[u1,u2],[x0,y0],W_in1, n_steps = None,
truncate_gradient = -1, go_backwards = False)
outputs, updates = theano.scan(f_rnn_cmpl,[u1,u2],[x0,y0],W_in1
, n_steps = None
, truncate_gradient = -1
, go_backwards = False)
f4 = theano.function([u1,u2,x0,y0,W_in1], outputs
, updates = updates,
allow_input_downcast = True)
f4 = theano.function([u1,u2,x0,y0,W_in1], outputs, updates = updates)
# compute the values in numpy
v_x = numpy.zeros((3,2),dtype=theano.config.floatX)
v_y = numpy.zeros((3,),dtype=theano.config.floatX)
v_x[0] = numpy.dot(v_u1[0],vW_in1) + v_u2[0]*vW_in2 + numpy.dot(v_x0,vW)
v_x[0] = numpy.dot(v_u1[0],vW_in1) + v_u2[0]*vW_in2 + \
numpy.dot(v_x0,vW)
v_y[0] = numpy.dot(v_x0,vWout)
for i in xrange(1,3):
v_x[i] = numpy.dot(v_u1[i],vW_in1) + v_u2[i]*vW_in2 + numpy.dot(v_x[i-1],vW)
v_x[i] = numpy.dot(v_u1[i],vW_in1) + v_u2[i]*vW_in2 + \
numpy.dot(v_x[i-1],vW)
v_y[i] = numpy.dot(v_x[i-1], vWout)
(theano_x,theano_y) = f4( v_u1, v_u2, v_x0, v_y0, vW_in1)
assert numpy.allclose(theano_x , v_x)
assert numpy.allclose(theano_y , v_y)
def test_multiple_outs_taps(self):
l = 5
rng = numpy.random.RandomState(utt.fetch_seed())
vW_in2 = asarrayX(rng.uniform(size = (2,), low = -.2,high = .2))
vW = asarrayX(rng.uniform(size = (2,2), low = -.2,high = .2))
vWout = asarrayX(rng.uniform(size = (2,), low = -.2,high = .2))
vW_in1 = asarrayX(rng.uniform(size = (2,2), low = -.2,high = .2))
v_u1 = asarrayX(rng.uniform(size = (l,2), low = -.2, high = .2))
v_u2 = asarrayX(rng.uniform(size = (l+2,2), low = -.2,high = .2))
v_x0 = asarrayX(rng.uniform(size = (2,), low = -.2,high = .2))
v_y0 = asarrayX(rng.uniform(size = (3,)))
W_in2 = theano.shared(vW_in2, name='win2')
W = theano.shared(vW, name='w')
W_out = theano.shared(vWout, name = 'wout')
W_in1 = theano.tensor.matrix('win')
u1 = theano.tensor.matrix('u1')
u2 = theano.tensor.matrix('u2')
x0 = theano.tensor.vector('x0')
y0 = theano.tensor.vector('y0')
def f_rnn_cmpl(u1_t, u2_tm1, u2_t, u2_tp1
, x_tm1, y_tm1, y_tm3, W_in1):
return [theano.dot(u1_t,W_in1) + (u2_t+u2_tm1*u2_tp1)* W_in2 + \
theano.dot(x_tm1, W), (y_tm1+y_tm3)*theano.dot(x_tm1
, W_out),
theano.dot(u1_t, W_in1)]
outputs, updates = theano.scan(f_rnn_cmpl
, [ u1
, dict(input=u2,taps=[-1,0,1]) ]
, [x0
, dict(initial = y0
, taps=[-1,-3])
, None]
, W_in1
, n_steps = None
, truncate_gradient = -1
, go_backwards = False )
f = theano.function([u1,u2,x0,y0,W_in1], outputs,
updates = updates, allow_input_downcast = True)
theano_out = f( v_u1
, v_u2
, v_x0
, v_y0
, vW_in1)
ny0 = numpy.zeros((5,2))
ny1 = numpy.zeros((5,))
ny2 = numpy.zeros((5,2))
ny0[0] = numpy.dot(v_u1[0], vW_in1) + \
(v_u2[1] + v_u2[0]*v_u2[2])* vW_in2 + numpy.dot(v_x0,vW)
ny1[0] = (v_y0[2]+v_y0[0])* numpy.dot(v_x0, vWout)
ny2[0] = numpy.dot(v_u1[0], vW_in1)
ny0[1] = numpy.dot(v_u1[1], vW_in1) + \
(v_u2[2] + v_u2[1]*v_u2[3])* vW_in2 + numpy.dot(ny0[0],vW)
ny1[1] = (ny1[0]+v_y0[1])* numpy.dot(ny0[0], vWout)
ny2[1] = numpy.dot(v_u1[1], vW_in1)
ny0[2] = numpy.dot(v_u1[2], vW_in1) + \
(v_u2[3] + v_u2[2]*v_u2[4])* vW_in2 +\
numpy.dot(ny0[1],vW)
ny1[2] = (ny1[1]+v_y0[2])* numpy.dot(ny0[1], vWout)
ny2[2] = numpy.dot(v_u1[2], vW_in1)
ny0[3] = numpy.dot(v_u1[3], vW_in1) + \
(v_u2[4] + v_u2[3]*v_u2[5])* vW_in2 +\
numpy.dot(ny0[2],vW)
ny1[3] = (ny1[2]+ny1[0])* numpy.dot(ny0[2], vWout)
ny2[3] = numpy.dot(v_u1[3], vW_in1)
ny0[4] = numpy.dot(v_u1[4], vW_in1) + \
(v_u2[5] + v_u2[4]*v_u2[6])* vW_in2 +\
numpy.dot(ny0[3],vW)
ny1[4] = (ny1[3]+ny1[1])* numpy.dot(ny0[3], vWout)
ny2[4] = numpy.dot(v_u1[4], vW_in1)
#import pdb; pdb.set_trace()
# simple rnn, one input, one state, weights for each; input/state are
# vectors, weights are scalars; using shared variables and past
# taps (sequences and outputs)
......@@ -290,8 +408,8 @@ class T_Scan(unittest.TestCase):
vu = asarrayX(rng.uniform(size=(4,), low = -5., high = 5.))
vx0 = asarrayX(rng.uniform(size=(2,), low = -5., high = 5.))
u = theano.tensor.vector()
x0 = theano.tensor.vector()
u = theano.tensor.vector('u')
x0 = theano.tensor.vector('x0')
W_in = theano.shared(vW_in, name = 'w_in')
W = theano.shared(vW, name ='w')
......@@ -299,23 +417,26 @@ class T_Scan(unittest.TestCase):
return u_tm2*W_in+x_tm1*W+x_tm2
outputs, updates = theano.scan(f_rnn_shared, dict(input=u, taps=-2),
dict(initial = x0, taps = [-1,-2]), [], n_steps = None, truncate_gradient = -1,
go_backwards = False)
dict(initial = x0, taps = [-1,-2]), []
, n_steps = None
, truncate_gradient = -1
, go_backwards = False)
f7 = theano.function([u,x0], outputs, updates = updates)
f7 = theano.function([u,x0], outputs, updates = updates,
allow_input_downcast = True)
theano_out = f7(vu,vx0)
# compute output in numpy
# a bit of explaining:
# due to the definition of sequences taps in scan, v_0[0] is actually v_0[-2],
# and v_0[1] is v_0[-1]. The values v_0[2] and v_0[3] do not get uesd ( because you
# do not use v_0[t] in scan) which might seem strange, but then again why not use
# due to the definition of sequences taps in scan, v_0[0] is
# actually v_0[-2], and v_0[1] is v_0[-1]. The values v_0[2]
# and v_0[3] do not get uesd ( because you do not use v_0[t]
# in scan) which might seem strange, but then again why not use
# v_0[t] instead of v_0[t-2] in a real application ??
# also vx0[0] corresponds to vx0[-2], vx0[1] to vx0[-1]
numpy_out = numpy.zeros((2,))
numpy_out[0] = vu[0]*vW_in + vx0[1]*vW + vx0[0]
numpy_out[1] = vu[1]*vW_in + numpy_out[0]*vW + vx0[1]
assert numpy.allclose(numpy_out , theano_out)
......@@ -330,19 +451,24 @@ class T_Scan(unittest.TestCase):
vu = asarrayX(rng.uniform(size=(6,), low = -5., high = 5.))
vx0 = asarrayX(rng.uniform(size=(2,), low = -5., high = 5.))
u = theano.tensor.vector()
x0 = theano.tensor.vector()
u = theano.tensor.vector('u')
x0 = theano.tensor.vector('x0')
W_in = theano.shared(vW_in, name = 'w_in')
W = theano.shared(vW, name ='w')
def f_rnn_shared(u_tm2,u_tp2, x_tm1, x_tm2):
return (u_tm2+u_tp2)*W_in+x_tm1*W+x_tm2
output,updates = theano.scan(f_rnn_shared, dict( input = u, taps=[-2,2]),\
dict(initial = x0, taps = [-1,-2]), [], n_steps = None, truncate_gradient =-1,
go_backwards = False)
output,updates = theano.scan(f_rnn_shared
, dict( input = u, taps=[-2,2])
, dict(initial = x0, taps = [-1,-2])
, []
, n_steps = None
, truncate_gradient =-1
, go_backwards = False)
f8 = theano.function([u,x0], output, updates = updates)
f8 = theano.function([u,x0], output, updates = updates,
allow_input_downcast = True)
theano_out = f8(vu,vx0)
# compute output in numpy
numpy_out = numpy.zeros(2)
......@@ -350,7 +476,6 @@ class T_Scan(unittest.TestCase):
# and vx0[0] as vx0[-2], vx0[1] as vx0[-1]
numpy_out[0] = (vu[0]+vu[4])*vW_in + vx0[1]*vW + vx0[0]
numpy_out[1] = (vu[1]+vu[5])*vW_in + numpy_out[0]*vW + vx0[1]
assert numpy.allclose(numpy_out , theano_out)
......@@ -377,12 +502,22 @@ class T_Scan(unittest.TestCase):
W = theano.shared(vW,'W')
mode = theano.compile.mode.get_mode(None).including('inplace')
def f_rnn_shared(u0_t,u1_t, u2_t, x0_tm1,x1_tm1):
return [u0_t*W_in + x0_tm1*W + u1_t*u2_t, u0_t*W_in + x1_tm1*W+ u1_t+u2_t ]
return [u0_t*W_in + x0_tm1*W + u1_t*u2_t
, u0_t*W_in + x1_tm1*W+ u1_t+u2_t ]
outputs, updates = theano.scan(f_rnn_shared, [u0,u1,u2],
[dict( initial = x0, inplace =u2), dict(initial = x1, inplace = u1)],
[], n_steps = None, truncate_gradient = -1, go_backwards = False, mode=mode )
f9 = theano.function([mu0,mu1,mu2,x0,x1], outputs , updates = updates, mode = mode)
[dict( initial = x0, inplace =u2)
, dict(initial = x1, inplace = u1)]
, []
, n_steps = None
, truncate_gradient = -1
, go_backwards = False
, mode=mode )
f9 = theano.function([mu0,mu1,mu2,x0,x1]
, outputs
, updates = updates
, mode = mode
, allow_input_downcast = True)
# compute output in numpy
numpy_x0 = numpy.zeros((3,))
......@@ -393,14 +528,20 @@ class T_Scan(unittest.TestCase):
numpy_x0[i] = vu0[i]* vW_in + numpy_x0[i-1]*vW + vu1[i]*vu2[i]
numpy_x1[i] = vu0[i]* vW_in + numpy_x1[i-1]*vW + vu1[i]+vu2[i]
# note theano computes inplace, so call function after numpy equivalent is done
# note theano computes inplace, so call function after numpy
# equivalent is done
(theano_x0, theano_x1) = f9(vu0,vu1,vu2,vx0,vx1)
# assert that theano does what it should
assert numpy.allclose( theano_x0 , numpy_x0)
assert numpy.allclose( theano_x1 , numpy_x1)
# assert that it was done in place
assert numpy.allclose( theano_x0 , vu2)
assert numpy.allclose( theano_x1 , vu1)
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# Old way of doing inplace operations is depricated .. tests don't
# make sense anymroe
##assert numpy.allclose( theano_x0 , vu2)
## assert numpy.allclose( theano_x1 , vu1)
# simple rnn ; compute inplace version 2
def test_inplace2(self):
......@@ -429,10 +570,20 @@ class T_Scan(unittest.TestCase):
u0_t*W_in + x1_tm1*W+ u2_tm1+u2_t+u2_tp1 ]
outputs, updates = theano.scan(f_rnn_shared,
[u0,dict(input = u1, taps = [0,1]),dict( input = u2, taps= [-1,0,+1])],
[dict( initial = x0, inplace =u2), dict(initial = x1, inplace = u1)],
[], n_steps = None, truncate_gradient = -1, go_backwards = False, mode=mode )
f9 = theano.function([mu0,mu1,mu2,x0,x1], outputs , updates = updates, mode = mode)
[u0,dict(input = u1, taps = [0,1])
,dict( input = u2, taps= [-1,0,+1])]
, [dict( initial = x0)
, dict(initial = x1)]
, []
, n_steps = None
, truncate_gradient = -1
, go_backwards = False
, mode=mode )
f9 = theano.function([mu0,mu1,mu2,x0,x1]
, outputs
, updates = updates
, mode = mode
, allow_input_downcast = True)
# compute output in numpy
numpy_x0 = numpy.zeros((3,))
......@@ -441,18 +592,25 @@ class T_Scan(unittest.TestCase):
numpy_x1[0] = vu0[0] * vW_in + vx1 * vW + vu2[0]+vu2[1]+vu2[2]
for i in xrange(1,3):
numpy_x0[i] = vu0[i]* vW_in + numpy_x0[i-1]*vW + vu1[i]*vu1[i+1]
numpy_x1[i] = vu0[i]* vW_in + numpy_x1[i-1]*vW + vu2[i]+vu2[i+1]+vu2[i+2]
numpy_x1[i] = vu0[i]* vW_in + numpy_x1[i-1]*vW + \
vu2[i]+vu2[i+1]+vu2[i+2]
# note theano computes inplace, so call function after numpy equivalent is done
# note theano computes inplace, so call function after numpy
# equivalent is done
(theano_x0, theano_x1) = f9(vu0,vu1,vu2,vx0,vx1)
# assert that theano does what it should
assert numpy.allclose( theano_x0 , numpy_x0)
assert numpy.allclose( theano_x1 , numpy_x1)
# assert that it was done in place
# not that x0 should not be inplace of vu2 because you are using past values of u2,
# and therefore you are not allowed to work inplace !!
assert not numpy.allclose( theano_x0 , vu2[1:4])
assert numpy.allclose( theano_x1 , vu1[0:3])
# not that x0 should not be inplace of vu2 because you are using
# past values of u2, and therefore you are not allowed to work
# inplace !!
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# Old way of doing inplace operations is depricated .. tests don't
# make sense anymroe
#assert not numpy.allclose( theano_x0 , vu2[1:4])
#assert numpy.allclose( theano_x1 , vu1[0:3])
......@@ -460,13 +618,13 @@ class T_Scan(unittest.TestCase):
def test_shared_arguments_with_updates(self):
rng = numpy.random.RandomState(utt.fetch_seed())
vW1 = asarrayX(rng.rand(20,30))
vW2 = asarrayX(rng.rand(30,20))
vu1 = asarrayX(rng.rand(3,20))
vu2 = asarrayX(rng.rand(3,30))
vy0 = asarrayX(rng.rand(3,20))
vy1 = asarrayX(rng.rand(20))
vy2 = asarrayX(rng.rand(30))
vW1 = asarrayX(rng.rand(2,3))
vW2 = asarrayX(rng.rand(3,2))
vu1 = asarrayX(rng.rand(3,2))
vu2 = asarrayX(rng.rand(3,3))
vy0 = asarrayX(rng.rand(3,2))
vy1 = asarrayX(rng.rand(2))
vy2 = asarrayX(rng.rand(3))
# Their is a bug when floatX=float32 when we remove this line.
# The trace back is:
......@@ -496,8 +654,7 @@ class T_Scan(unittest.TestCase):
#TypeError: ('__array__() takes no arguments (1 given)', <theano.scan.Scan object at 0x3dbbf90>(?_steps, u1, u2, y0, y1, 0.0, W1, W2), 'Sequence id of Apply node=0')
#
# This don't seam to be a theano related bug...
#vu1 = rng.rand(3,20)
vu1 = asarrayX(rng.rand(3,20))
vu1 = asarrayX(rng.rand(3,2))
W1 = theano.shared(vW1,'W1')
W2 = theano.shared(vW2,'W2')
......@@ -516,43 +673,62 @@ class T_Scan(unittest.TestCase):
u2 = theano.tensor.matrix('u2')
y0 = theano.tensor.matrix('y0')
outputs,updates = theano.scan(f, [u1,u2], [ dict(initial = y0, taps = [-3,-2,-1]),y1,
None], [], n_steps = None, go_backwards = False, truncate_gradient = -1)
f10 = theano.function([u2,y0], outputs, updates = updates)
theano_y0,theano_y1,theano_y2 = f10(vu2, vy0)
outputs,updates = theano.scan(f, [u1,u2]
, [ dict(initial = y0
, taps = [-3,-2,-1])
, y1
, None]
, []
, n_steps = None
, go_backwards = False
, truncate_gradient = -1)
f10 = theano.function([u2,y0], outputs, updates = updates,
allow_input_downcast = True)
allstuff = f10(vu2, vy0)
theano_y0,theano_y1,theano_y2 = allstuff
# do things in numpy
numpy_y0 = numpy.zeros((6,20))
numpy_y1 = numpy.zeros((4,20))
numpy_y2 = numpy.zeros((3,30))
numpy_y0 = numpy.zeros((6,2))
numpy_y1 = numpy.zeros((4,2))
numpy_y2 = numpy.zeros((3,3))
numpy_y0[:3] = vy0
numpy_y1[0] = vy1
numpy_W1 = vW1.copy()
numpy_W2 = vW2.copy()
numpy_W2 = vW2.copy()
for idx in xrange(3):
numpy_y0[idx+3] = numpy.dot( numpy.dot(vu1[idx,:], numpy_W1), numpy_W2) + \
0.1*numpy_y0[idx+2] + 0.33*numpy_y0[idx+1] + 0.17*numpy_y0[idx]
numpy_y1[idx+1] = numpy.dot( vu2[idx,:], numpy_W2) + numpy_y1[idx]
numpy_y0[idx+3] = numpy.dot( numpy.dot(vu1[idx,:], numpy_W1)
, numpy_W2) + \
0.1*numpy_y0[idx+2] + \
0.33*numpy_y0[idx+1] + 0.17*numpy_y0[idx]
numpy_y1[idx+1] = numpy.dot( vu2[idx,:], numpy_W2) +\
numpy_y1[idx]
numpy_y2[idx] = numpy.dot( vu1[idx,:], numpy_W1)
numpy_W1 = numpy_W1 + .1
numpy_W2 = numpy_W2 + .05
assert numpy.allclose( theano_y0 , numpy_y0[3:])
assert numpy.allclose( theano_y1 , numpy_y1[1:])
assert numpy.allclose( theano_y2 , numpy_y2 )
assert numpy.allclose(W1.get_value(borrow=True), numpy_W1)
assert numpy.allclose(W2.get_value(borrow=True), numpy_W2)
assert numpy.allclose( W1.get_value() , numpy_W1 )
assert numpy.allclose( W2.get_value() , numpy_W2 )
def test_simple_shared_random(self):
theano_rng = theano.tensor.shared_randomstreams.RandomStreams(utt.fetch_seed())
theano_rng = theano.tensor.shared_randomstreams.RandomStreams(
utt.fetch_seed())
values, updates = theano.scan(lambda : theano_rng.uniform((2,),-1,1), [],[],[],n_steps
= 5, truncate_gradient = -1, go_backwards = False)
my_f = theano.function([], values, updates = updates )
values, updates = theano.scan(lambda : theano_rng.uniform((2,),-1,1)
, []
, []
, []
, n_steps = 5
, truncate_gradient = -1
, go_backwards = False)
my_f = theano.function([], values, updates = updates,
allow_input_downcast = True )
rng_seed = numpy.random.RandomState(utt.fetch_seed()).randint(2**30)
rng = numpy.random.RandomState(int(rng_seed)) #int() is for 32bit
......@@ -571,24 +747,36 @@ class T_Scan(unittest.TestCase):
def test_gibbs_chain(self):
rng = numpy.random.RandomState(utt.fetch_seed())
v_W = numpy.array(rng.rand(20,30) -.5, dtype = 'float32')
v_vsample = numpy.array(rng.binomial(1,0.5, size=(3,20), ), dtype = 'float32')
v_vsample = numpy.array(rng.binomial(1,0.5, size=(3,20), )
, dtype = 'float32')
v_bvis = numpy.array(rng.rand(20) -.5, dtype='float32')
v_bhid = numpy.array(rng.rand(30) -.5, dtype='float32')
W = theano.shared(v_W)
bhid = theano.shared(v_bhid)
bvis = theano.shared(v_bvis)
W = theano.shared(v_W, 'vW')
bhid = theano.shared(v_bhid, 'vbhid')
bvis = theano.shared(v_bvis, 'vbvis')
vsample = theano.tensor.matrix(dtype='float32')
trng = theano.tensor.shared_randomstreams.RandomStreams(utt.fetch_seed())
trng = theano.tensor.shared_randomstreams.RandomStreams(
utt.fetch_seed())
def f(vsample_tm1):
hmean_t = theano.tensor.nnet.sigmoid(theano.dot(vsample_tm1,W)+ bhid)
hsample_t = theano.tensor.cast(trng.binomial(hmean_t.shape,1,hmean_t),dtype='float32')
vmean_t = theano.tensor.nnet.sigmoid(theano.dot(hsample_t,W.T)+ bvis)
return theano.tensor.cast(trng.binomial(vmean_t.shape,1,vmean_t), dtype='float32')
theano_vsamples, updates = theano.scan(f, [], vsample,[], n_steps = 10,
truncate_gradient=-1, go_backwards = False)
my_f = theano.function([vsample], theano_vsamples[-1], updates = updates)
hmean_t = theano.tensor.nnet.sigmoid(theano.dot(vsample_tm1,W)
+ bhid)
hsample_t = theano.tensor.cast(trng.binomial(hmean_t.shape
, 1
, hmean_t)
,dtype='float32')
vmean_t = theano.tensor.nnet.sigmoid(theano.dot(hsample_t,W.T)
+ bvis)
return theano.tensor.cast(trng.binomial(vmean_t.shape,1,vmean_t)
, dtype='float32')
theano_vsamples, updates = theano.scan(f, [], vsample,[]
, n_steps = 10
, truncate_gradient=-1
, go_backwards = False)
my_f = theano.function([vsample], theano_vsamples[-1]
, updates = updates
, allow_input_downcast = True)
_rng = numpy.random.RandomState(utt.fetch_seed())
rng_seed = _rng.randint(2**30)
......@@ -598,10 +786,16 @@ class T_Scan(unittest.TestCase):
nrng2 = numpy.random.RandomState(int(rng_seed)) # int() is for 32bit
def numpy_implementation(vsample):
for idx in range(10):
hmean = 1./(1. + numpy.exp(-(numpy.dot(vsample,v_W) + v_bhid)))
hsample = numpy.array(nrng1.binomial(1,hmean, size = hmean.shape), dtype='float32')
vmean = 1./(1. + numpy.exp(-(numpy.dot(hsample,v_W.T) + v_bvis)))
vsample = numpy.array(nrng2.binomial(1,vmean, size = vmean.shape),dtype='float32')
hmean = 1./(1. + numpy.exp(-(numpy.dot(vsample,v_W)
+ v_bhid)))
hsample = numpy.array(nrng1.binomial(1,hmean
, size = hmean.shape)
, dtype='float32')
vmean = 1./(1. + numpy.exp(-(numpy.dot(hsample,v_W.T)
+ v_bvis)))
vsample = numpy.array(nrng2.binomial(1,vmean
, size = vmean.shape)
,dtype='float32')
return vsample
......@@ -614,44 +808,51 @@ class T_Scan(unittest.TestCase):
def test_only_shared_no_input_no_output(self):
rng = numpy.random.RandomState(utt.fetch_seed())
v_state = asarrayX(rng.uniform())
state = theano.shared(v_state)
state = theano.shared(v_state,'vstate')
def f_2():
return {state: 2*state}
n_steps = theano.tensor.scalar()
output, updates = theano.scan(f_2,[],[],[],n_steps = n_steps, truncate_gradient = -1,
go_backwards = False)
this_f = theano.function([n_steps], output, updates = updates)
n_steps = theano.tensor.iscalar('nstep')
output, updates = theano.scan(f_2,[],[],[]
, n_steps = n_steps
, truncate_gradient = -1
, go_backwards = False)
this_f = theano.function([n_steps], output, updates = updates,
allow_input_downcast = True)
n_steps = 3
this_f(n_steps)
numpy_state = v_state* (2**(n_steps))
assert numpy.allclose(state.get_value(borrow=True), numpy_state)
assert numpy.allclose(state.get_value(), numpy_state)
def test_map_functionality(self):
def f_rnn(u_t):
return u_t + 3
u = theano.tensor.vector()
u = theano.tensor.vector('u')
outputs, updates = theano.scan(f_rnn, u,[],[], n_steps =None , truncate_gradient = -1,
go_backwards = False)
outputs, updates = theano.scan(f_rnn, u,[],[]
, n_steps =None
, truncate_gradient = -1
, go_backwards = False)
f2 = theano.function([u], outputs, updates = updates)
f2 = theano.function([u], outputs, updates = updates,
allow_input_downcast = True)
rng = numpy.random.RandomState(utt.fetch_seed())
v_u = rng.uniform(size=(5,), low = -5., high = 5.).astype(config.floatX)
v_u = rng.uniform(size=(5,), low = -5., high = 5.)
numpy_result = v_u + 3
theano_result = f2(v_u)
assert numpy.allclose(theano_result , numpy_result)
def test_map(self):
v = theano.tensor.vector()
v = theano.tensor.vector('v')
abs_expr,abs_updates = theano.map(lambda x: abs(x), v,[],
truncate_gradient = -1, go_backwards = False)
f = theano.function([v],abs_expr,updates = abs_updates)
f = theano.function([v],abs_expr,updates = abs_updates,
allow_input_downcast = True)
rng = numpy.random.RandomState(utt.fetch_seed())
vals = rng.uniform(size=(10,), low = -5., high = 5.).astype(config.floatX)
vals = rng.uniform(size=(10,), low = -5., high = 5.)
abs_vals = abs(vals)
theano_vals = f(vals)
assert numpy.allclose(abs_vals , theano_vals)
......@@ -660,21 +861,24 @@ class T_Scan(unittest.TestCase):
def f_rnn(u_t,x_tm1,W_in, W):
return u_t*W_in+x_tm1*W
u = theano.tensor.vector()
x0 = theano.tensor.scalar()
W_in = theano.tensor.scalar()
W = theano.tensor.scalar()
u = theano.tensor.vector('u')
x0 = theano.tensor.scalar('x0')
W_in = theano.tensor.scalar('win')
W = theano.tensor.scalar('w')
output, updates = theano.scan(f_rnn, u,x0,[W_in,W], n_steps = None, truncate_gradient =
-1, go_backwards = True)
output, updates = theano.scan(f_rnn, u,x0,[W_in,W]
, n_steps = None
, truncate_gradient = -1
, go_backwards = True)
f2 = theano.function([u,x0,W_in,W], output, updates = updates)
f2 = theano.function([u,x0,W_in,W], output, updates = updates,
allow_input_downcast = True)
# get random initial values
rng = numpy.random.RandomState(utt.fetch_seed())
v_u = asarrayX(rng.uniform(size=(4,), low=-5., high=5.))
v_x0 = asarrayX(rng.uniform())
W = asarrayX(rng.uniform())
W_in = asarrayX(rng.uniform())
v_u = rng.uniform( size = (4,), low = -5., high = 5.)
v_x0 = rng.uniform()
W = rng.uniform()
W_in = rng.uniform()
# compute the output in numpy
v_out = numpy.zeros((4,))
......@@ -686,13 +890,14 @@ class T_Scan(unittest.TestCase):
assert numpy.allclose( theano_values , v_out)
def test_reduce(self):
v = theano.tensor.vector()
s = theano.tensor.scalar()
v = theano.tensor.vector('v')
s = theano.tensor.scalar('s')
result, updates = theano.reduce(lambda x,y: x+y, v,s)
f = theano.function([v,s], result, updates = updates)
f = theano.function([v,s], result, updates = updates,
allow_input_downcast = True)
rng = numpy.random.RandomState(utt.fetch_seed())
v_v = rng.uniform(size = (5,), low = -5., high = 5.).astype(config.floatX)
v_v = rng.uniform( size = (5,), low = -5., high = 5.)
assert abs(numpy.sum(v_v) - f(v_v, 0.)) < 1e-3
......@@ -705,22 +910,31 @@ class T_Scan(unittest.TestCase):
W_in = theano.tensor.scalar('W_in')
W = theano.tensor.scalar('W')
cost, updates = scan_project_sum(f_rnn, u, x0, [W_in,W], n_steps = None,
truncate_gradient = -1, go_backwards = False)
cost, updates = scan_project_sum(f_rnn, u, x0, [W_in,W]
, n_steps = None
, truncate_gradient = -1
, go_backwards = False)
gu,gx0,gW_in,gW = theano.tensor.grad(cost, [u,x0,W_in, W])
#import pdb; pdb.set_trace()
grad_fn = theano.function([u,x0,W_in, W], [gu,gx0,gW_in, gW],
updates = updates, no_default_updates = True)
updates = updates, no_default_updates = True,
allow_input_downcast = True)
cost_fn = theano.function([u,x0,W_in, W], cost, updates = updates,
no_default_updates = True, allow_input_downcast=True)
no_default_updates = True,
allow_input_downcast = True)
# get random initial values
rng = numpy.random.RandomState(utt.fetch_seed())
v_u = numpy.array(rng.uniform( size = (10,), low = -.5, high = .5),dtype=theano.config.floatX)
v_u = numpy.array(rng.uniform( size = (10,), low = -.5
, high = .5)
,dtype=theano.config.floatX)
v_x0 = numpy.array(rng.uniform(), dtype= theano.config.floatX)
W = numpy.array(rng.uniform(), dtype= theano.config.floatX)
W_in = numpy.array(rng.uniform(), dtype= theano.config.floatX)
num_grad = multiple_outputs_numeric_grad(cost_fn, [v_u, v_x0, W_in, W])
analytic_grad = grad_fn(v_u, v_x0, W_in, W)
num_grad = multiple_outputs_numeric_grad(cost_fn
, [v_u, v_x0, W_in, W])
max_err, max_err_pos = num_grad.max_err(analytic_grad)
if max_err > 1e-2:
......@@ -752,18 +966,27 @@ class T_Scan(unittest.TestCase):
return [theano.dot(u1_t,W_in1) + u2_t* W_in2 + \
theano.dot(x_tm1, W), theano.dot(x_tm1, W_out)]
cost, updates = scan_project_sum(f_rnn_cmpl,[u1,u2],[x0,y0],W_in1, n_steps = None,
truncate_gradient = -1, go_backwards = False)
cost, updates = scan_project_sum(f_rnn_cmpl, [u1,u2], [x0,y0]
, W_in1
, n_steps = None
, truncate_gradient = -1
, go_backwards = False)
vparams = [v_u1, v_u2, v_x0, v_y0,vW_in1]
params = [u1,u2,x0,y0,W_in1 ]
gparams = theano.tensor.grad(cost, params)
grad_fn = theano.function([u1,u2,x0,y0,W_in1], gparams,
updates = updates, no_default_updates = True)
updates = updates, no_default_updates = True,
allow_input_downcast = True)
cost_fn = theano.function([u1,u2,x0,y0,W_in1], cost,
updates = updates, no_default_updates = True,
allow_input_downcast=True)
num_grad = multiple_outputs_numeric_grad(cost_fn,[v_u1,v_u2,v_x0,v_y0,vW_in1])
allow_input_downcast = True)
num_grad = multiple_outputs_numeric_grad(cost_fn
, [v_u1
, v_u2
, v_x0
, v_y0
, vW_in1])
analytic_grad = grad_fn(v_u1,v_u2, v_x0,v_y0, vW_in1)
max_err, max_err_pos = num_grad.max_err(analytic_grad)
......@@ -782,7 +1005,7 @@ class T_Scan(unittest.TestCase):
v_u1 = asarrayX(rng.uniform(size = (l,2), low = -.2, high = .2))
v_u2 = asarrayX(rng.uniform(size = (l+2,2), low = -.2,high = .2))
v_x0 = asarrayX(rng.uniform(size = (2,), low = -.2,high = .2))
v_y0 = asarrayX(rng.uniform(size = (4,)))
v_y0 = asarrayX(rng.uniform(size = (3,)))
W_in2 = theano.shared(vW_in2, name='win2')
W = theano.shared(vW, name='w')
......@@ -793,22 +1016,41 @@ class T_Scan(unittest.TestCase):
x0 = theano.tensor.vector('x0')
y0 = theano.tensor.vector('y0')
def f_rnn_cmpl(u1_t, u2_tm1, u2_t, u2_tp1, x_tm1, y_tm1, y_tm3, W_in1):
def f_rnn_cmpl(u1_t, u2_tm1, u2_t, u2_tp1
, x_tm1, y_tm1, y_tm3, W_in1):
return [theano.dot(u1_t,W_in1) + (u2_t+u2_tm1*u2_tp1)* W_in2 + \
theano.dot(x_tm1, W), (y_tm1+y_tm3)*theano.dot(x_tm1, W_out)]
cost, updates = scan_project_sum(f_rnn_cmpl,[u1,
dict(input=u2,taps=[-1,0,1])],[x0,dict(initial=y0,
taps=[-1,-3])],W_in1, n_steps = None,
truncate_gradient = -1, go_backwards = False)
theano.dot(x_tm1, W), (y_tm1+y_tm3)*theano.dot(x_tm1
, W_out),
theano.dot(u1_t, W_in1)]
cost, updates = scan_project_sum(
f_rnn_cmpl
, [ u1
, dict(input=u2,taps=[-1,0,1]) ]
, [x0
, dict(initial = y0
, taps=[-1,-3])
, None]
, W_in1
, n_steps = None
, truncate_gradient = -1
, go_backwards = False )
vparams = [v_u1, v_u2, v_x0, v_y0,vW_in1]
params = [u1,u2,x0,y0,W_in1 ]
gparams = theano.tensor.grad(cost, params)
grad_fn = theano.function([u1,u2,x0,y0,W_in1], gparams,
updates = updates, no_default_updates = True)
cost_fn = theano.function([u1,u2,x0,y0,W_in1], cost,
updates = updates, no_default_updates = True)
updates = updates, no_default_updates = True,
allow_input_downcast = True)
num_grad = multiple_outputs_numeric_grad(cost_fn,[v_u1,v_u2,v_x0,v_y0,vW_in1])
cost_fn = theano.function([u1,u2,x0,y0,W_in1], cost,
updates = updates, no_default_updates = True,
allow_input_downcast = True)
num_grad = multiple_outputs_numeric_grad(cost_fn
, [v_u1
, v_u2
, v_x0
, v_y0
, vW_in1])
analytic_grad = grad_fn(v_u1,v_u2, v_x0,v_y0, vW_in1)
max_err, max_err_pos = num_grad.max_err(analytic_grad)
if max_err > 1e-2:
......@@ -825,7 +1067,7 @@ class T_Scan(unittest.TestCase):
v_u1 = asarrayX(rng.uniform(size = (l,2), low = -.2, high = .2))
v_u2 = asarrayX(rng.uniform(size = (l+2,2), low = -.2,high = .2))
v_x0 = asarrayX(rng.uniform(size = (2,), low = -.2,high = .2))
v_y0 = asarrayX(rng.uniform(size = (4,)))
v_y0 = asarrayX(rng.uniform(size = (3,)))
W_in2 = theano.shared(vW_in2, name='win2')
W = theano.shared(vW, name='w')
......@@ -836,9 +1078,11 @@ class T_Scan(unittest.TestCase):
x0 = theano.tensor.vector('x0')
y0 = theano.tensor.vector('y0')
def f_rnn_cmpl(u1_t, u2_tm1, u2_t, u2_tp1, x_tm1, y_tm1, y_tm3, W_in1):
def f_rnn_cmpl(u1_t, u2_tm1, u2_t, u2_tp1, x_tm1
, y_tm1, y_tm3, W_in1):
return [theano.dot(u1_t,W_in1) + (u2_t+u2_tm1*u2_tp1)* W_in2 + \
theano.dot(x_tm1, W), (y_tm1+y_tm3)*theano.dot(x_tm1, W_out)]
theano.dot(x_tm1, W), (y_tm1+y_tm3)*theano.dot(x_tm1
, W_out)]
cost, updates = scan_project_sum(f_rnn_cmpl,[u1,
dict(input=u2,taps=[-1,0,1])],[x0,dict(initial=y0,
taps=[-1,-3])],W_in1, n_steps = None,
......@@ -847,11 +1091,17 @@ class T_Scan(unittest.TestCase):
params = [u1,u2,x0,y0,W_in1 ]
gparams = theano.tensor.grad(cost, params)
grad_fn = theano.function([u1,u2,x0,y0,W_in1], gparams,
updates = updates, no_default_updates = True)
updates = updates, no_default_updates = True,
allow_input_downcast = True)
cost_fn = theano.function([u1,u2,x0,y0,W_in1], cost,
updates = updates, no_default_updates = True)
updates = updates, no_default_updates = True,
allow_input_downcast = True)
num_grad = multiple_outputs_numeric_grad(cost_fn,[v_u1,v_u2,v_x0,v_y0,vW_in1])
num_grad = multiple_outputs_numeric_grad(cost_fn,[ v_u1
, v_u2
, v_x0
, v_y0
, vW_in1])
analytic_grad = grad_fn(v_u1,v_u2, v_x0,v_y0, vW_in1)
max_err, max_err_pos = num_grad.max_err(analytic_grad)
if max_err > 1e-2:
......@@ -862,44 +1112,53 @@ class T_Scan(unittest.TestCase):
def test_grad_multiple_outs_some_uncomputable(self):
rng = numpy.random.RandomState(utt.fetch_seed())
vW_in = asarrayX(rng.uniform(size = (2,2), low = -.1,high = .1))
v_u = asarrayX(rng.uniform(size = (5,2), low = -.1, high = .1))
v_x0 = asarrayX(rng.uniform(size = (2,), low = -.1,high = .1))
vW_in = asarrayX(rng.uniform(size = (2,2), low = -3.,high = 3.))
v_u = asarrayX(rng.uniform(size = (5,2), low = -3., high = 3.))
v_u2 = numpy.array([1,3,4,6,8], dtype='int32')
v_x0 = asarrayX(rng.uniform(size = (2,), low = -3.,high = 3.))
W_in = theano.tensor.matrix('win')
u = theano.tensor.matrix('u1')
x0 = theano.tensor.vector('x0')
# trng = theano.tensor.shared_randomstreams.RandomStreams(utt.fetch_seed())
u2 = theano.tensor.ivector('u2')
x0 = theano.tensor.vector('x0', dtype= theano.config.floatX)
# trng = theano.tensor.shared_randomstreams.RandomStreams(
# utt.fetch_seed())
def f_rnn_cmpl(u_t, x_tm1, W_in):
def f_rnn_cmpl(u_t,u2_t, x_tm1, W_in):
trng1 = theano.tensor.shared_randomstreams.RandomStreams(123)
x_t = theano.dot(u_t, W_in) + x_tm1 + trng1.uniform(low=-.1, high=.1)
return x_t
cost, updates = scan_project_sum(f_rnn_cmpl,u,x0,W_in, n_steps = None,
truncate_gradient = -1, go_backwards = False)
vparams = [v_u, v_x0,vW_in]
params = [u,x0,W_in ]
x_t = theano.tensor.cast(u2_t,theano.config.floatX) +\
theano.dot(u_t, W_in) + x_tm1 + \
trng1.uniform(low=-1.1, high=1.1,
dtype=theano.config.floatX)
return x_t, 2*u2_t
cost, updates = scan_project_sum(f_rnn_cmpl,[u,u2],[x0, None],W_in
, n_steps = None
, truncate_gradient = -1
, go_backwards = False)
vparams = [v_u,v_u2, v_x0,vW_in]
params = [u,u2,x0,W_in ]
gparams = theano.tensor.grad(cost, params)
grad_fn = theano.function([u,x0,W_in], gparams,
updates = updates, no_default_updates = True)
cost_fn = theano.function([u,x0,W_in], cost,
updates = updates, no_default_updates = True)
def reset_rng_cost_fn(*args):
for idx,arg in enumerate(cost_fn.maker.expanded_inputs):
if arg.value and type(arg.value.data) == type(numpy.random.RandomState(123)):
cost_fn.maker.expanded_inputs[idx].value.data = numpy.random.RandomState(123)
return cost_fn(*args)
def reset_rng_grad_fn(*args):
for idx,arg in enumerate(grad_fn.maker.expanded_inputs):
if arg.value and type(arg.value.data)==type(numpy.random.RandomState(123)):
grad_fn.maker.expanded_inputs[idx].value.data = numpy.random.RandomState(123)
return grad_fn(*args)
grad_fn = theano.function([u,u2,x0,W_in], gparams,
updates = updates, no_default_updates = True,
allow_input_downcast = True)
cost_fn = theano.function([u,u2,x0,W_in], cost,
updates = updates, no_default_updates = True,
allow_input_downcast = True)
def reset_rng_fn(fn, *args):
for idx,arg in enumerate(fn.maker.expanded_inputs):
if ( arg.value and type(arg.value.data) ==
type(numpy.random.RandomState(123))):
obj = fn.maker.expanded_inputs[idx].value
obj.data = numpy.random.RandomState(123)
fn.maker.expanded_inputs[idx].value = obj
return fn(*args)
reset_rng_cost_fn = lambda *args : reset_rng_fn(cost_fn, *args)
reset_rng_grad_fn = lambda *args : reset_rng_fn(grad_fn, *args)
num_grad = multiple_outputs_numeric_grad(reset_rng_cost_fn,\
[v_u,v_x0,vW_in] )
analytic_grad = reset_rng_grad_fn(v_u, v_x0, vW_in)
[v_u,v_u2,v_x0,vW_in], ndarray_mask = [True, False, True, True] )
analytic_grad = reset_rng_grad_fn(v_u,v_u2, v_x0, vW_in)
max_err, max_err_pos = num_grad.max_err(analytic_grad)
if max_err > 1e-2:
......@@ -915,40 +1174,52 @@ class T_Scan(unittest.TestCase):
W_in = theano.tensor.matrix('win')
u = theano.tensor.matrix('u1')
x0 = theano.tensor.vector('x0')
# trng = theano.tensor.shared_randomstreams.RandomStreams(utt.fetch_seed())
# trng = theano.tensor.shared_randomstreams.RandomStreams(
# utt.fetch_seed())
def f_rnn_cmpl(u_t, x_tm1, W_in):
trng1 = theano.tensor.shared_randomstreams.RandomStreams(123)
x_t = theano.dot(u_t, W_in) + x_tm1 + trng1.uniform(low=-.1, high=.1)
x_t = theano.dot(u_t, W_in) + x_tm1 + trng1.uniform(low=-.1
, high=.1)
x_t = theano.tensor.cast(x_t, dtype=theano.config.floatX)
return x_t
cost, updates = scan_project_sum(f_rnn_cmpl,u,x0,W_in, n_steps = None,
truncate_gradient = 3, go_backwards = False)
cost, updates = scan_project_sum(f_rnn_cmpl,u,x0,W_in
, n_steps = None
, truncate_gradient = 3
, go_backwards = False)
vparams = [v_u, v_x0,vW_in]
params = [u,x0,W_in ]
gparams = theano.tensor.grad(cost, params)
grad_fn = theano.function([u,x0,W_in], gparams,
updates = updates, no_default_updates = True)
updates = updates, no_default_updates = True,
allow_input_downcast = True,
mode = 'FAST_RUN_NOGC')
cost_fn = theano.function([u,x0,W_in], cost,
updates = updates, no_default_updates = True)
def reset_rng_cost_fn(*args):
for idx,arg in enumerate(cost_fn.maker.expanded_inputs):
if arg.value and type(arg.value.data) == type(numpy.random.RandomState(123)):
cost_fn.maker.expanded_inputs[idx].value.data = numpy.random.RandomState(123)
return cost_fn(*args)
def reset_rng_grad_fn(*args):
for idx,arg in enumerate(grad_fn.maker.expanded_inputs):
if arg.value and type(arg.value.data)==type(numpy.random.RandomState(123)):
grad_fn.maker.expanded_inputs[idx].value.data = numpy.random.RandomState(123)
return grad_fn(*args)
updates = updates, no_default_updates = True,
allow_input_downcast = True,
mode = 'FAST_RUN_NOGC')
def reset_rng_fn(fn, *args):
for idx,arg in enumerate(fn.maker.expanded_inputs):
if ( arg.value and type(arg.value.data) ==
type(numpy.random.RandomState(123))):
obj = fn.maker.expanded_inputs[idx].value
obj.data = numpy.random.RandomState(123)
fn.maker.expanded_inputs[idx].value = obj
try:
out = fn(*args)
except:
import GPUscan.ipdb; GPUscan.ipdb.set_trace()
out = fn(*args)
return out
reset_rng_cost_fn = lambda *args : reset_rng_fn(cost_fn, *args)
reset_rng_grad_fn = lambda *args : reset_rng_fn(grad_fn, *args)
num_grad = multiple_outputs_numeric_grad(reset_rng_cost_fn,\
[v_u,v_x0,vW_in] )
analytic_grad = reset_rng_grad_fn(v_u, v_x0, vW_in)
assert len(analytic_grad[0]) == 3
assert numpy.allclose(analytic_grad[0][:2],numpy.zeros((2,2)))
def test_draw_as_input_to_scan(self):
......@@ -958,10 +1229,11 @@ class T_Scan(unittest.TestCase):
y = trng.binomial(size = x.shape, p = x)
z,updates = theano.scan(lambda a:a, non_sequences=y, n_steps=2)
f = theano.function([x],[y,z], updates = updates)
f = theano.function([x],[y,z], updates = updates,
allow_input_downcast = True)
rng = numpy.random.RandomState(utt.fetch_seed())
nx = rng.uniform( size = (10,10) ).astype(config.floatX)
nx = rng.uniform( size = (10,10) )
ny1,nz1 = f(nx)
ny2,nz2 = f(nx)
......@@ -975,11 +1247,13 @@ class T_Scan(unittest.TestCase):
x1 = theano.shared(3.)
x1.name = 'x1'
x2 = theano.tensor.vector('x2')
y, updates = theano.scan(lambda v: v*x1, sequences = x2)
y, updates = theano.scan(
lambda v: theano.tensor.cast(v*x1,
theano.config.floatX)
, sequences = x2)
m = theano.tensor.grad(y.sum(), x1)
f = theano.function([x2], m)
print f([2,3])
f = theano.function([x2], m, allow_input_downcast = True)
assert numpy.allclose(f([2,3]) , 5)
def test_computing_gradient(self):
......@@ -988,10 +1262,10 @@ class T_Scan(unittest.TestCase):
K = x2*x1
out,updates = theano.scan(lambda i,v: theano.tensor.grad(K[i], v),
sequences = theano.tensor.arange(K.shape[0]), non_sequences=x1)
f = theano.function([x1], out)
sequences = theano.tensor.arange(K.shape[0])
, non_sequences=x1)
f = theano.function([x1], out, allow_input_downcast = True)
print f(3.)
assert numpy.all( f(3.) != 0. )
......@@ -1000,26 +1274,31 @@ class T_Scan(unittest.TestCase):
def test_shared_updates(self):
X = theano.shared( numpy.array( [[1,2,3],[4,5,6]]))
out,updates = theano.scan( lambda :{X: X+1}, outputs_info = [], non_sequences= [],
sequences = [], n_steps = 10)
out,updates = theano.scan( lambda :{X: X+1}
, outputs_info = []
, non_sequences= []
, sequences = []
, n_steps = 10)
f = theano.function([],[], updates = updates)
f()
print X.get_value(borrow=True)
print X.value
'''
def test_scan_output_padding(self):
"""
Scan outputs are usually lists, whose entries correspond to the intermediate result.
When n_steps=1, some extra machinery is required in order to mimic this interface. Scan
thus calls tensor.shape_padleft on the inner function outputs.
Scan outputs are usually lists, whose entries correspond to the
intermediate result. When n_steps=1, some extra machinery is
required in order to mimic this interface. Scan thus calls
tensor.shape_padleft on the inner function outputs.
However, this is not the proper behavior for:
* shared variables : these should not be padded in any way
* when return_steps is explicitely set to 1. Output should NOT be a list, but a tensor
corresponding to the result of the last iteration.
* when return_steps is explicitely set to 1. Output should NOT be
a list, but a tensor corresponding to the result of the last
iteration.
This unit test addresses the bug fix of changeset ba7157e95cb1.
"""
......@@ -1036,10 +1315,279 @@ class T_Scan(unittest.TestCase):
assert out.type.ndim == a.type.ndim
assert updates[b].type.ndim == b.type.ndim
out, updates = theano.scan(inner_func, outputs_info=[init_a], n_steps=1)
out, updates = theano.scan(inner_func, outputs_info=[init_a]
, n_steps=1)
assert out.type.ndim == a.type.ndim+1
assert updates[b].type.ndim == b.type.ndim
def test_scan_extra_inputs_hessian(self):
x = theano.tensor.vector('x')
A = theano.tensor.matrix('A')
fc1 = theano.shared(0.5)
fc2 = theano.shared(0.9)
y = fc1*theano.dot(x*x,theano.dot(A,x))
gy = theano.tensor.grad(y,x)
hy, updates = theano.scan(
lambda i, gy, x: theano.tensor.grad(gy[i]*fc2, x),
sequences = theano.tensor.arange(gy.shape[0]),
non_sequences = [gy,x])
f = theano.function([x,A], hy, allow_input_downcast = True)
vx = numpy.array([1.,1.] , dtype = theano.config.floatX)
vA = numpy.array([[1.,1.],[1.,0.]], dtype = theano.config.floatX)
vR = numpy.array([[3.6,1.8],[1.8,0.9]], dtype = theano.config.floatX)
assert numpy.allclose(f(vx,vA), vR)
def test_cloning_no_replace_strict_copy_inputs(self):
# This has nothing to do with scan, but it refers to the clone
# function that scan uses internally and that pfunc uses now and
# that users might want to use
x = theano.tensor.vector('x')
y = theano.tensor.vector('y')
z = theano.shared(0.25)
f1 = z*(x+y)**2+5
f2 = theano.clone( f1
, replace= None
, strict = True
, copy_inputs = True)
f2_inp = theano.gof.graph.inputs([f2])
assert z in f2_inp
assert x in f2_inp
assert y in f2_inp
def test_cloning_no_replace_strict_not_copy_inputs(self):
# This has nothing to do with scan, but it refers to the clone
# function that scan uses internally and that pfunc uses now and
# that users might want to use
x = theano.tensor.vector('x')
y = theano.tensor.vector('y')
z = theano.shared(0.25)
f1 = z*(x+y)**2+5
f2 = theano.clone( f1
, replace= None
, strict = True
, copy_inputs = False)
f2_inp = theano.gof.graph.inputs([f2])
assert not z in f2_inp
assert not x in f2_inp
assert not y in f2_inp
def test_cloning_replace_strict_copy_inputs(self):
# This has nothing to do with scan, but it refers to the clone
# function that scan uses internally and that pfunc uses now and
# that users might want to use
x = theano.tensor.vector('x')
y = theano.tensor.vector('y')
y2 = theano.tensor.vector('y2')
z = theano.shared(0.25)
f1 = z*(x+y)**2+5
f2 = theano.clone( f1
, replace= {y: y2}
, strict = True
, copy_inputs = True)
f2_inp = theano.gof.graph.inputs([f2])
assert z in f2_inp
assert x in f2_inp
assert y2 in f2_inp
def test_cloning_replace_not_strict_copy_inputs(self):
# This has nothing to do with scan, but it refers to the clone
# function that scan uses internally and that pfunc uses now and
# that users might want to use
x = theano.tensor.vector('x')
y = theano.tensor.fvector('y')
y2 = theano.tensor.dvector('y2')
z = theano.shared(0.25)
f1 = z*(x+y)**2+5
f2 = theano.clone( f1
, replace= {y: y2}
, strict = False
, copy_inputs = True)
f2_inp = theano.gof.graph.inputs([f2])
assert z in f2_inp
assert x in f2_inp
assert y2 in f2_inp
def test_cloning_replace_strict_not_copy_inputs(self):
# This has nothing to do with scan, but it refers to the clone
# function that scan uses internally and that pfunc uses now and
# that users might want to use
x = theano.tensor.vector('x')
y = theano.tensor.vector('y')
y2 = theano.tensor.vector('y2')
z = theano.shared(0.25)
f1 = z*(x+y)**2+5
f2 = theano.clone( f1
, replace= {y: y2}
, strict = True
, copy_inputs = False)
f2_inp = theano.gof.graph.inputs([f2])
assert not z in f2_inp
assert not x in f2_inp
assert not y2 in f2_inp
def test_cloning_replace_not_strict_not_copy_inputs(self):
# This has nothing to do with scan, but it refers to the clone
# function that scan uses internally and that pfunc uses now and
# that users might want to use
x = theano.tensor.vector('x')
y = theano.tensor.fvector('y')
y2 = theano.tensor.dvector('y2')
z = theano.shared(0.25)
f1 = z*(x+y)**2+5
f2 = theano.clone( f1
, replace= {y: y2}
, strict = False
, copy_inputs = False)
f2_inp = theano.gof.graph.inputs([f2])
assert not z in f2_inp
assert not x in f2_inp
assert not y2 in f2_inp
### TEST RE-ordering of inputs
# some rnn with multiple outputs and multiple inputs; other
# dimension instead of scalars/vectors
def test_reordering(self):
rng = numpy.random.RandomState(utt.fetch_seed())
vW_in2 = asarrayX(rng.uniform(size = (2,), low = -5.,high = 5.))
vW = asarrayX(rng.uniform(size = (2,2), low = -5.,high = 5.))
vWout = asarrayX(rng.uniform(size = (2,), low = -5.,high = 5.))
vW_in1 = asarrayX(rng.uniform(size = (2,2), low = -5.,high = 5.))
v_u1 = asarrayX(rng.uniform(size = (3,2), low = -5., high = 5.))
v_u2 = asarrayX(rng.uniform(size = (3,), low = -5.,high = 5.))
v_x0 = asarrayX(rng.uniform(size = (2,), low = -5.,high = 5.))
v_y0 = asarrayX(rng.uniform(size = (3,)))
W_in2 = theano.shared(vW_in2, name='win2')
W = theano.shared(vW, name='w')
W_out = theano.shared(vWout, name = 'wout')
W_in1 = theano.tensor.matrix('win')
u1 = theano.tensor.matrix('u1')
u2 = theano.tensor.vector('u2')
x0 = theano.tensor.vector('x0')
y0 = theano.tensor.vector('y0')
def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, y_tm3, W_in1):
return [y_tm3+1, y_tm3+2, theano.dot(u1_t,W_in1) + u2_t * W_in2 + \
theano.dot(x_tm1, W),
y_tm1 + theano.dot(x_tm1, W_out)]
outputs, updates = theano.scan( f_rnn_cmpl
, [ u1
, u2]
, [ None
, None
, x0
, dict(initial=y0, taps=[-1,-3])]
, W_in1
, n_steps = None
, truncate_gradient = -1
, go_backwards = False)
f4 = theano.function([u1,u2,x0,y0,W_in1], outputs
, updates = updates
, allow_input_downcast = True)
# compute the values in numpy
v_x = numpy.zeros((3,2),dtype=theano.config.floatX)
v_y = numpy.zeros((3,),dtype=theano.config.floatX)
v_x[0] = numpy.dot(v_u1[0],vW_in1) + v_u2[0]*vW_in2 + \
numpy.dot(v_x0,vW)
v_y[0] = numpy.dot(v_x0,vWout) + v_y0[2]
for i in xrange(1,3):
v_x[i] = numpy.dot(v_u1[i],vW_in1) + v_u2[i]*vW_in2 + \
numpy.dot(v_x[i-1],vW)
v_y[i] = numpy.dot(v_x[i-1], vWout) + v_y[i-1]
(theano_dump1, theano_dump2, theano_x,theano_y) = f4( v_u1
, v_u2
, v_x0
, v_y0
, vW_in1)
assert numpy.allclose(theano_x , v_x)
assert numpy.allclose(theano_y , v_y)
### TEST store steps / return steps
def test_return_steps(self):
rng = numpy.random.RandomState(utt.fetch_seed())
vW_in2 = asarrayX(rng.uniform(size = (2,), low = -5.,high = 5.))
vW = asarrayX(rng.uniform(size = (2,2), low = -5.,high = 5.))
vWout = asarrayX(rng.uniform(size = (2,), low = -5.,high = 5.))
vW_in1 = asarrayX(rng.uniform(size = (2,2), low = -5.,high = 5.))
v_u1 = asarrayX(rng.uniform(size = (8,2), low = -5., high = 5.))
v_u2 = asarrayX(rng.uniform(size = (8,), low = -5.,high = 5.))
v_x0 = asarrayX(rng.uniform(size = (2,), low = -5.,high = 5.))
v_y0 = asarrayX(rng.uniform(size = (3,)))
W_in2 = theano.shared(vW_in2, name='win2')
W = theano.shared(vW, name='w')
W_out = theano.shared(vWout, name = 'wout')
W_in1 = theano.tensor.matrix('win')
u1 = theano.tensor.matrix('u1')
u2 = theano.tensor.vector('u2')
x0 = theano.tensor.vector('x0')
y0 = theano.tensor.vector('y0')
def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, y_tm3, W_in1):
return [y_tm3+1, theano.dot(u1_t,W_in1) + u2_t * W_in2 + \
theano.dot(x_tm1, W),
y_tm1 + theano.dot(x_tm1, W_out)]
outputs, updates = theano.scan( f_rnn_cmpl
, [ u1
, u2]
, [ dict(store_steps = 3)
, dict(initial = x0, return_steps = 2)
, dict(initial=y0, taps=[-1,-3],
return_steps = 4)]
, W_in1
, n_steps = None
, truncate_gradient = -1
, go_backwards = False)
f4 = theano.function([u1,u2,x0,y0,W_in1], outputs
, updates = updates
, allow_input_downcast = True
)
# compute the values in numpy
v_x = numpy.zeros((8,2),dtype=theano.config.floatX)
v_y = numpy.zeros((8,),dtype=theano.config.floatX)
v_x[0] = numpy.dot(v_u1[0],vW_in1) + v_u2[0]*vW_in2 + \
numpy.dot(v_x0,vW)
v_y[0] = numpy.dot(v_x0,vWout) + v_y0[2]
for i in xrange(1,8):
v_x[i] = numpy.dot(v_u1[i],vW_in1) + v_u2[i]*vW_in2 + \
numpy.dot(v_x[i-1],vW)
v_y[i] = numpy.dot(v_x[i-1], vWout) + v_y[i-1]
(theano_dump, theano_x,theano_y) = f4( v_u1, v_u2, v_x0, v_y0, vW_in1)
assert numpy.allclose(theano_x , v_x[-2:])
assert numpy.allclose(theano_y , v_y[-4:])
def test_scan_as_tensor_on_gradients(self):
"""
......@@ -1050,32 +1598,97 @@ class T_Scan(unittest.TestCase):
f1 = theano.tensor.dscalar('f1')
def scanStep(prev, seq, f1):
return prev + f1 * seq
return prev + f1 * seq
scanned, _ = theano.scan(fn = scanStep, \
sequences = [seq], \
outputs_info = [to_scan], \
non_sequences = [f1])
f_scan = theano.function(inputs=[to_scan, seq, f1], outputs=scanned)
f_scan([1,2,3], numpy.arange(12).reshape([4,3]), 1.)
f_scan = theano.function(inputs=[to_scan, seq, f1], outputs=scanned
, allow_input_downcast = True)
t_grad = theano.tensor.grad(scanned.sum(), wrt=[to_scan, f1],
consider_constant=[seq])
f_grad = theano.function(inputs=[to_scan, seq, f1], outputs=t_grad)
f_grad = theano.function(inputs=[to_scan, seq, f1], outputs=t_grad,
allow_input_downcast = True)
def test_save_mem(self):
rng = numpy.random.RandomState(utt.fetch_seed())
vW_in2 = asarrayX(rng.uniform(size = (2,), low = -5.,high = 5.))
vW = asarrayX(rng.uniform(size = (2,2), low = -5.,high = 5.))
vWout = asarrayX(rng.uniform(size = (2,), low = -5.,high = 5.))
vW_in1 = asarrayX(rng.uniform(size = (2,2), low = -5.,high = 5.))
v_u1 = asarrayX(rng.uniform(size = (8,2), low = -5., high = 5.))
v_u2 = asarrayX(rng.uniform(size = (8,), low = -5.,high = 5.))
v_x0 = asarrayX(rng.uniform(size = (2,), low = -5.,high = 5.))
v_y0 = asarrayX(rng.uniform(size = (3,)))
W_in2 = theano.shared(vW_in2, name='win2')
W = theano.shared(vW, name='w')
W_out = theano.shared(vWout, name = 'wout')
W_in1 = theano.tensor.matrix('win')
u1 = theano.tensor.matrix('u1')
u2 = theano.tensor.vector('u2')
x0 = theano.tensor.vector('x0')
y0 = theano.tensor.vector('y0')
f_scan([1,2,3], numpy.arange(12).reshape([4,3]), 1.)
f_grad([1,2,3], numpy.arange(12).reshape([4,3]), 1.)
def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, y_tm3, W_in1):
return [y_tm3+1, theano.dot(u1_t,W_in1) + u2_t * W_in2 + \
theano.dot(x_tm1, W),
y_tm1 + theano.dot(x_tm1, W_out)]
outputs, updates = theano.scan( f_rnn_cmpl
, [ u1
, u2]
, [ dict(return_steps = 1)
, dict(initial = x0
, return_steps = 1)
, dict(initial=y0, taps=[-1,-3],
return_steps = 1)]
, W_in1
, n_steps = None
, truncate_gradient = -1
, go_backwards = False)
f4 = theano.function([u1,u2,x0,y0,W_in1], outputs
, updates = updates
, allow_input_downcast = True
)
# compute the values in numpy
v_x = numpy.zeros((8,2),dtype=theano.config.floatX)
v_y = numpy.zeros((8,),dtype=theano.config.floatX)
v_x[0] = numpy.dot(v_u1[0],vW_in1) + v_u2[0]*vW_in2 + \
numpy.dot(v_x0,vW)
v_y[0] = numpy.dot(v_x0,vWout) + v_y0[2]
for i in xrange(1,8):
v_x[i] = numpy.dot(v_u1[i],vW_in1) + v_u2[i]*vW_in2 + \
numpy.dot(v_x[i-1],vW)
v_y[i] = numpy.dot(v_x[i-1], vWout) + v_y[i-1]
(theano_dump, theano_x,theano_y) = f4( v_u1, v_u2, v_x0, v_y0, vW_in1)
assert numpy.allclose(theano_x , v_x[-1:])
assert numpy.allclose(theano_y , v_y[-1:])
def caching_nsteps_by_scan_op(self):
W = theano.tensor.matrix('weights')
initial = theano.tensor.vector('initial')
inpt = theano.tensor.matrix('inpt')
import theano
import theano.tensor as T
import scipy
W = T.matrix('weights')
initial = T.vector('initial')
inpt = T.matrix('inpt')
def one_step(x_t, h_tm1, W):
expr = T.dot(h_tm1, W) + x_t
return expr
expr = T.dot(h_tm1, W) + x_t
return expr
expr, _ = theano.scan(
fn=one_step,
......@@ -1083,51 +1696,263 @@ class T_Scan(unittest.TestCase):
outputs_info=[initial],
non_sequences=[W])
floatX = theano.config.floatX
sh = expr.shape[0]
init_val = theano.shared( numpy.ones(5, dtype=floatX))
inpt_val = theano.shared( numpy.ones((5,5), dtype=floatX))
shapef = theano.function([W], expr,
givens={initial: init_val,
inpt: inpt_val })
givens={initial: theano.shared(
scipy.ones(5,
dtype=theano.config.floatX)),
inpt: theano.shared(
scipy.ones((5, 5),
dtype=theano.config.floatX))})
# First execution to cache n_steps
val0 = numpy.ones((5,5), dtype = floatX)
shapef(val0)
shapef(scipy.ones((5, 5), dtype=theano.config.floatX))
cost = expr.sum()
d_cost_wrt_W = T.grad(cost, [W])
init_val = theano.shared( numpy.zeros(5, dtype =floatX))
f = theano.function([W, inpt], d_cost_wrt_W,
givens={initial: init_val})
rval = numpy.asarray([[5187989]*5]*5, dtype = floatX)
x = numpy.ones((5,5), dtype = floatX)
y = numpy.ones((10,5), dtype = floatX)
t_rval = f( x,y)
assert numpy.allclose( t_rval, rval)
givens={initial: theano.shared(scipy.zeros(5))})
rval = numpy.asarray([[5187989]*5]*5, dtype = theano.config.floatX)
assert numpy.allclose( f(scipy.ones((5, 5),
dtype=theano.config.floatX)
, scipy.ones((10, 5),
dtype=theano.config.floatX))
,rval)
def only_one_output_of_grad_of_scan(self):
initial = theano.tensor.scalar('initial')
floatX = theano.config.floatX
def one_step( h_tm1):
return h_tm1 + numpy.asarray(1., dtype=floatX)
def test_save_mem_reduced_number_of_steps(self):
def f_rnn(u_t):
return u_t+1., u_t+2., u_t+3., u_t+4.,u_t+5, u_t+6, u_t+7.
h, _ = theano.scan(
fn=one_step,
outputs_info=[initial],
n_steps = 3
)
u = theano.tensor.vector('u')
idx = theano.tensor.iscalar('idx')
jdx = theano.tensor.iscalar('jdx')
[x1,x2,x3,x4,x5,x6,x7], updates = theano.scan(f_rnn, u
, n_steps = None
, truncate_gradient = -1
, go_backwards = False)
f2 = theano.function([u, idx, jdx]
,[ x1[:2],x2[4], x3[idx], x4[:idx],x5[-10],
x6[-jdx], x7[:-jdx]]
, updates = updates,
allow_input_downcast = True)
# get random initial values
rng = numpy.random.RandomState(utt.fetch_seed())
v_u = rng.uniform( size = (20,), low = -5., high = 5.)
gh = TT.grad(h[-1], initial)
# compute the output in numpy
tx1,tx2,tx3,tx4,tx5,tx6,tx7 = f2(v_u,3,15)
print tx2
print v_u +2
assert numpy.allclose(tx1, v_u[:2] +1.)
assert numpy.allclose(tx2, v_u[4] +2.)
assert numpy.allclose(tx3, v_u[3] +3.)
assert numpy.allclose(tx4, v_u[:3] +4.)
assert numpy.allclose(tx5, v_u[-10] +5.)
assert numpy.allclose(tx6, v_u[-15] +6.)
assert numpy.allclose(tx7, v_u[:-15]+7.)
scan_node = f2.maker.env.outputs[0].owner.inputs[0]
## I'm not sure how to check the optimization anymore !!
''' old code checkign the optimization got applied
assertion = False
for inp in scan_node.owner.inputs[0].owner.inputs:
if (isinstance(inp, theano.tensor.Constant) and
inp.value == 5):
assertion = True
assert assertion
'''
def test_save_mem_store_steps(self):
def f_rnn(u_t, x1_tm1, x1_tm3, x2_tm1, x3tm2, x3_tm1, x4_tm1 ):
return u_t+1., u_t+2., u_t+3., u_t+4.,u_t+5, u_t+6, u_t+7
f = theano.function([initial], gh)
assert numpy.allclose( f(1.), 1.)
u = theano.tensor.vector('u')
idx = theano.tensor.iscalar('idx')
jdx = theano.tensor.iscalar('jdx')
x10 = theano.tensor.vector('x10')
x20 = theano.tensor.scalar('x20')
x30 = theano.tensor.vector('x30')
x40 = theano.tensor.scalar('x40')
[x1,x2,x3,x4,x5,x6,x7], updates = theano.scan(f_rnn, u
, [None, None, None
, dict(initial = x10, taps=[-1,-2])
, x20
, dict(initial = x30, taps=[-1,-2])
, x40]
, n_steps = None
, truncate_gradient = -1
, go_backwards = False)
f2 = theano.function([u, x10, x20, x30, x40]
,[ x1[-7], x2[-3:-1], x3[-6:]
, x4[-1], x5[-1]]
, updates = updates,
allow_input_downcast = True)
# get random initial values
rng = numpy.random.RandomState(utt.fetch_seed())
v_u = rng.uniform( size = (20,), low = -5., high = 5.)
# compute the output in numpy
tx1,tx2,tx3,tx4,tx5 = f2(v_u,[0,0],0,[0,0],0)
assert numpy.allclose(tx1, v_u[-7] +1.)
assert numpy.allclose(tx2, v_u[-3:-1] +2.)
assert numpy.allclose(tx3, v_u[-6:] +3.)
assert numpy.allclose(tx4, v_u[-1] +4.)
assert numpy.allclose(tx5, v_u[-1] +5.)
assert len(f2.maker.env.outputs) == 5
def test_remove_stuff(self):
x = theano.tensor.vector()
def lm(m):
trng = theano.tensor.shared_randomstreams.RandomStreams(
utt.fetch_seed())
return [ 2*m+ trng.uniform(low =-1.1, high =1.1,
dtype = theano.config.floatX),
m + trng.uniform(size=[3])]
[o1,o2], updates = theano.scan( lm,
sequences = x,
n_steps = None,
truncate_gradient = -1,
go_backwards = False)
go1 = theano.tensor.grad(o1.mean(), wrt = x)
f = theano.function([x],go1, updates = updates,
allow_input_downcast = True)
print f([1,2,3])
if __name__ == '__main__':
unittest.main()
'''
print ' Use nosetests to run these tests '
'''
scan_tst = T_Scan()
'''
print 1
scan_tst.test_generator_one_output_scalar()
#''
print 2
scan_tst.test_one_sequence_one_output_weights()
#''
print 3
scan_tst.test_one_sequence_one_output_weights_shared()
#''
print 4
scan_tst.test_multiple_inputs_multiple_outputs()
#''
print 5
scan_tst.test_using_taps_input_output()
#''
print 6
scan_tst.test_past_future_taps_shared()
#''
print 7
scan_tst.test_inplace1()
#''
print 8
scan_tst.test_inplace2()
#''
print 9
scan_tst.test_shared_arguments_with_updates()
#''
print 10
scan_tst.test_simple_shared_random()
#''
print 11
scan_tst.test_only_shared_no_input_no_output()
print 12
scan_tst.test_map_functionality()
print 13
scan_tst.test_map()
#''
print 14
scan_tst.test_backwards()
#''
print 15
scan_tst.test_reduce()
#''
print 15.5
scan_tst.test_save_mem()
#''
print 16
scan_tst.test_grad_one_output()
#''
print 17
scan_tst.test_grad_multiple_outs()
#''
print 17.5
scan_tst.test_multiple_outs_taps()
#''
print 18
scan_tst.test_grad_multiple_outs_taps()
#''
print 19
scan_tst.test_grad_multiple_outs_taps_backwards()
#'''
#print 19.5
#scan_tst.test_remove_stuff()
#'''
print 21
scan_tst.test_grad_multiple_outs_some_truncate()
#'''
print 22
scan_tst.test_grad_of_shared()
#''
print 23
scan_tst.test_computing_gradient()
#''
print 24
scan_tst.test_scan_output_padding()
print 25
scan_tst.test_scan_extra_inputs_hessian()
#''
print 26
scan_tst.test_cloning_no_replace_strict_copy_inputs()
print 27
scan_tst.test_cloning_no_replace_strict_not_copy_inputs()
print 28
scan_tst.test_cloning_replace_strict_copy_inputs()
print 29
scan_tst.test_cloning_replace_not_strict_copy_inputs()
print 30
scan_tst.test_cloning_replace_strict_not_copy_inputs()
print 31
scan_tst.test_cloning_replace_not_strict_not_copy_inputs()
#''
print 32
scan_tst.test_draw_as_input_to_scan()
#''
print 33
scan_tst.test_reordering()
#''
print 34
scan_tst.test_return_steps()
#''
print 35
scan_tst.test_scan_as_tensor_on_gradients()
#''
#''
print 36
scan_tst.test_save_mem_reduced_number_of_steps()
#''
print 37
scan_tst.test_save_mem_store_steps()
#'''
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论