提交 8a1272bd authored 作者: nouiz's avatar nouiz

Merge pull request #1095 from goodfeli/determinism

Ready to merge: Determinism fix
......@@ -88,7 +88,7 @@ from printing import \
import scan_module
from scan_module import scan, map, reduce, foldl, foldr, clone
from updates import Updates
from updates import Updates, OrderedUpdates
import tensor
import scalar
......
......@@ -12,6 +12,8 @@ from function_module import orig_function
from profiling import ProfileStats
from pfunc import pfunc
from numpy import any # to work in python 2.4
import warnings
from theano import gof
def function(inputs, outputs=None, mode=None, updates=None, givens=None,
no_default_updates=False, accept_inplace=False, name=None,
......@@ -30,7 +32,7 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None,
:type mode: string or `Mode` instance.
:param mode: compilation mode
:type updates: iterable over pairs (shared_variable, new_expression). List, tuple or dict.
:type updates: iterable over pairs (shared_variable, new_expression). List, tuple or OrderedDict.
:param updates: update the values for SharedVariable inputs according to these expressions
:type givens: iterable over pairs (Var1, Var2) of Variables. List, tuple or dict. The Var1
......@@ -128,7 +130,7 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None,
def opt_log1p(node):
if not isinstance(node.op,Elemwise):
return
if not isinstance(node.op.scalar_op, log,):
if not isinstance(node.op.scalar_op, log):
return
inp = node.inputs[0]
if not inp.owner:
......@@ -159,10 +161,18 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None,
"""
#tuple are used in some tests, as we accepted them in the past
#I prefer to allow it as they act the same as list for what they are used.
if updates is None:
updates = []
if isinstance(updates, dict) and \
not isinstance(updates, gof.python25.OrderedDict):
warnings.warn("Expected OrderedDict, got "+str(type(updates))+ "Using "
"a standard dictionary here results in "
"non-deterministic behavior. You should use an OrderedDict"
" if you are using python2.7 or use a list of (shared, update)"
" pairs. Do not just convert your dictionary to this type before"
" the call as the conversion will still be non-deterministic.")
if givens is None:
givens = []
if not isinstance(inputs, (list, tuple)):
......
......@@ -626,8 +626,15 @@ class Test_pfunc(unittest.TestCase):
# The order of the variables is not determined, so we try
# both shared variables.
f = theano.function([], [], updates={a: a, b: (2 * b)})
g = theano.function([], [], updates={a: (a * 2), b: b})
# TODO: explain the above comment. By "not determined" does
# this mean "not deterministic"?
# This test originally wrote the updates using dictionaries,
# and iterating over the dictionary was not deterministic.
# Is that all the comment above meant, or is the CVM intended
# to add extra non-determinism? Or is the CVM meant to
# deterministically but arbitrarily pick an order for the updates?
f = theano.function([], [], updates=[(a, a), (b, (2 * b))])
g = theano.function([], [], updates=[(a, (a * 2)), (b, b)])
f()
assert a.get_value(borrow=True).shape == (), a.get_value()
......@@ -642,10 +649,10 @@ class Test_pfunc(unittest.TestCase):
a = shared(1., 'a')
b = shared(numpy.ones((2, 3)), 'b')
# The order of the variables is not determined, so we try
# both shared variables.
f = theano.function([], [], updates={a: a, b: (2 * b - b)})
g = theano.function([], [], updates={a: (a * 2 - a), b: b})
# See comment in test_update_same about why we try both
# shared variables.
f = theano.function([], [], updates=[(a, a), (b, (2 * b - b))])
g = theano.function([], [], updates=[(a, (a * 2 - a)), (b, b)])
f()
assert a.get_value(borrow=True).shape == (), a.get_value()
......
......@@ -35,3 +35,6 @@ class NullType(Type):
def __hash__(self, other):
return hash(type(self))
def __str__(self):
return 'NullType'
......@@ -162,7 +162,7 @@ else:
if sys.version_info[:2] < (2, 7):
# The following implementation of OrderedDict compatible with python 2.4
# was taked from http://pypi.python.org/pypi/ordereddict/1.1
# was taken from http://pypi.python.org/pypi/ordereddict/1.1
# It is under the MIT license.
# Copyright (c) 2009 Raymond Hettinger
......
......@@ -20,6 +20,7 @@ import theano
from itertools import izip
from theano import gof
from theano.gof import Variable
from theano.gof.python25 import OrderedDict
from theano.gof.python25 import all
import theano.gof.utils
from theano.gof.null_type import NullType
......@@ -144,6 +145,9 @@ class DisconnectedType(theano.gof.type.Type):
" a symbolic placeholder."
))
def __str__(self):
return 'DisconnectedType'
########################
# R Operator
......@@ -211,7 +215,7 @@ def Rop(f, wrt, eval_points):
# Tensor, Sparse and CudaNdArray have the ndim attribute
pass
seen_nodes = {}
seen_nodes = OrderedDict()
def _traverse(node):
""" TODO: writeme """
......@@ -432,14 +436,14 @@ def grad(cost, wrt, consider_constant=None,
if known_grads is not None:
outputs.extend(known_grads.keys())
var_to_node_to_idx = _populate_var_to_node_to_idx(
var_to_app_to_idx = _populate_var_to_app_to_idx(
outputs, wrt, consider_constant)
# build a dict mapping var to the gradient of cost with respect to var
grad_dict = {}
grad_dict = OrderedDict()
if known_grads is None:
known_grads = {}
known_grads = OrderedDict()
# The gradient of the cost is 1 unless specified otherwise by known_grads.
if cost is not None:
......@@ -501,10 +505,10 @@ def grad(cost, wrt, consider_constant=None,
# variables that do not influence the cost have zero gradient.
# if wrt is such a variable, populate the grad_dict with this info
# so that wrt not being in var_to_node_to_idx won't cause an error below
# so that wrt not being in var_to_app_to_idx won't cause an error below
# according to the flag, possibly raise an error if wrt is disconnected
for elem in wrt:
if elem not in var_to_node_to_idx and elem is not cost \
if elem not in var_to_app_to_idx and elem is not cost \
and elem not in grad_dict:
handle_disconnected(elem)
grad_dict[elem] = DisconnectedType()()
......@@ -521,7 +525,7 @@ def grad(cost, wrt, consider_constant=None,
if hasattr(g.type, 'dtype'):
assert g.type.dtype in tensor.float_dtypes
rval = _populate_grad_dict(var_to_node_to_idx,
rval = _populate_grad_dict(var_to_app_to_idx,
grad_dict, wrt, cost_name)
for i in xrange(len(rval)):
......@@ -579,7 +583,7 @@ def _node_to_pattern(node):
return connection_pattern
def _populate_var_to_node_to_idx(outputs, wrt, consider_constant):
def _populate_var_to_app_to_idx(outputs, wrt, consider_constant):
"""
Helper function for grad function.
......@@ -638,7 +642,7 @@ def _populate_var_to_node_to_idx(outputs, wrt, consider_constant):
# var_to_app_to_idx[var][node] = [i,j] means node has
# var as input at positions i and j
var_to_app_to_idx = {}
var_to_app_to_idx = OrderedDict()
# Set of variables that have been added to their true parents
# ('true' here means that the elements of the variable are a function
......@@ -676,7 +680,13 @@ def _populate_var_to_node_to_idx(outputs, wrt, consider_constant):
continue
if ipt not in var_to_app_to_idx:
var_to_app_to_idx[ipt] = {}
# This object here *must* be an OrderedDict, because
# we iterate over its keys when adding up the terms of
# the gradient on ipt. If it is a regular dict, the grad
# method will return something that is analytically correct,
# but whose order of doing additions depends on the memory
# location of the apply nodes.
var_to_app_to_idx[ipt] = OrderedDict()
app_to_idx = var_to_app_to_idx[ipt]
if app not in app_to_idx:
app_to_idx[app] = []
......@@ -731,12 +741,12 @@ class DisconnectedInputError(ValueError):
disconnected_inputs='raise'.
"""
def _populate_grad_dict(var_to_node_to_idx,
def _populate_grad_dict(var_to_app_to_idx,
grad_dict, wrt, cost_name=None):
"""
Helper function for grad function.
var_to_node_to_idx: a dictionary mapping a variable to
var_to_app_to_idx: a dictionary mapping a variable to
a second dictionary.
the second dictionary maps apply nodes acting on
this variable to the variable's index in the apply
......@@ -761,7 +771,7 @@ def _populate_grad_dict(var_to_node_to_idx,
"""
# build a dict mapping node to the terms node contributes to each of
# its inputs' gradients
term_dict = {}
term_dict = OrderedDict()
def access_term_cache(node):
""" Populates term_dict[node] and returns it """
......@@ -1001,15 +1011,17 @@ def _populate_grad_dict(var_to_node_to_idx,
#cache the result
term_dict[node] = input_grads
return term_dict[node]
# populate grad_dict[var] and return it
def access_grad_cache(var):
if var not in grad_dict:
# If var is not in grad_dict already, we must compute it
if var in var_to_node_to_idx:
if var in var_to_app_to_idx:
terms = []
node_to_idx = var_to_node_to_idx[var]
node_to_idx = var_to_app_to_idx[var]
for node in node_to_idx:
for idx in node_to_idx[node]:
......
......@@ -8,6 +8,8 @@ import logging
import os
import StringIO
import sys
# Not available on all platforms
hashlib = None
import numpy
......@@ -1069,3 +1071,78 @@ def min_informative_str(obj, indent_level=0,
rval = indent + prefix + name
return rval
def var_descriptor(obj, _prev_obs=None, _tag_generator=None):
"""
Returns a string, with no endlines, fully specifying
how a variable is computed. Does not include any memory
location dependent information such as the id of a node.
"""
if hashlib is None:
try:
import hashlib
except ImportError:
raise RuntimeError("Can't run var_descriptor because hashlib is not available.")
if _prev_obs is None:
_prev_obs = {}
if id(obj) in _prev_obs:
tag = _prev_obs[id(obj)]
return '<' + tag + '>'
if _tag_generator is None:
_tag_generator = _TagGenerator()
cur_tag = _tag_generator.get_tag()
_prev_obs[id(obj)] = cur_tag
if hasattr(obj, '__array__'):
# hashlib hashes only the contents of the buffer, but
# it can have different semantics depending on the strides
# of the ndarray
name = '<ndarray:'
name += 'strides=['+','.join(str(stride) for stride in obj.strides)+']'
name += ',digest='+hashlib.md5(obj).hexdigest()+'>'
elif hasattr(obj, 'name') and obj.name is not None:
name = obj.name
elif hasattr(obj, 'owner') and obj.owner is not None:
name = str(obj.owner.op) + '('
name += ','.join(var_descriptor(ipt,
_prev_obs=_prev_obs, _tag_generator=_tag_generator) for ipt
in obj.owner.inputs)
name += ')'
else:
name = str(obj)
if ' at 0x' in name:
# The __str__ method is encoding the object's id in its str
name = position_independent_str(obj)
if ' at 0x' in name:
print name
assert False
prefix = cur_tag + '='
rval = prefix + name
return rval
def position_independent_str(obj):
if isinstance(obj, theano.gof.graph.Variable):
rval = 'theano_var'
rval += '{type='+str(obj.type)+'}'
else:
raise NotImplementedError()
return rval
......@@ -869,5 +869,5 @@ def test_stack_rows_segfault_070312():
out = theano.shared(numpy.random.rand(1, 2, 2, 3).astype('float32'))
op = theano.tensor.nnet.conv.ConvOp(imshp=(80, 96, 96), kshp=(9, 9),
nkern=1, bsize=1)
f = theano.function([], [], updates={out: op(img, kern)})
f = theano.function([], [], updates=[(out, op(img, kern))])
f()
......@@ -106,7 +106,7 @@ def test_alloc_memset_0():
def test_gpuspecifyshape():
x = cuda.shared_constructor(numpy.ones(3,dtype='float32'), 'x')
m = theano.tensor.specify_shape(x + numpy.float32(1), (3,))
f = theano.function([], updates={x:m * numpy.float32(2)},
f = theano.function([], updates=[(x, m * numpy.float32(2))],
mode=mode_with_gpu)
l = f.maker.fgraph.toposort()
assert not numpy.any([isinstance(x.op, cuda.HostFromGpu) for x in l])
......
......@@ -60,11 +60,11 @@ class T_updates(unittest.TestCase):
data = numpy.float32([1, 2, 3, 4])
x = f32sc(data)
y = x ** 2
f = theano.function([], y, updates={x: x + 1})
f = theano.function([], y, updates=[(x, x + 1)])
f()
# Test that we can update with a CudaVariable
f = theano.function([], y, updates={x: cuda.gpu_from_host(x + 1)})
f = theano.function([], y, updates=[(x, cuda.gpu_from_host(x + 1))])
f()
def test_2(self):
......@@ -74,7 +74,7 @@ class T_updates(unittest.TestCase):
value=numpy.zeros((10, 10), 'float32'))
x = tensor.fmatrix('x')
output_updates = {output_var: x ** 2}
output_updates = [(output_var, x ** 2)]
output_givens = {x: data}
output_func = theano.function(inputs=[], outputs=[],
updates=output_updates, givens=output_givens)
......@@ -89,8 +89,8 @@ class T_updates(unittest.TestCase):
# the update_var has type matrix, and the update expression
# is a broadcasted scalar, and that should not be allowed.
self.assertRaises(TypeError, theano.function, inputs=[], outputs=[],
updates={output_var:
output_var.sum()})
updates=[(output_var,
output_var.sum())])
def test_err_broadcast(self):
# Test that we raise a good error message when we don't
......@@ -101,8 +101,8 @@ class T_updates(unittest.TestCase):
# the update_var has type matrix, and the update expression
# is a broadcasted scalar, and that should not be allowed.
self.assertRaises(TypeError, theano.function, inputs=[], outputs=[],
updates={output_var:
output_var.sum().dimshuffle('x', 'x')})
updates=[(output_var,
output_var.sum().dimshuffle('x', 'x'))])
def test_broadcast(self):
# Test that we can rebroadcast
......@@ -111,11 +111,11 @@ class T_updates(unittest.TestCase):
up = tensor.unbroadcast(output_var.sum().dimshuffle('x', 'x'), 0, 1)
output_func = theano.function(inputs=[], outputs=[],
updates={output_var: up})
updates=[(output_var, up)])
output_func()
up = tensor.patternbroadcast(output_var.sum().dimshuffle('x', 'x'),
output_var.type.broadcastable)
output_func = theano.function(inputs=[], outputs=[],
updates={output_var: up})
updates=[(output_var, up)])
output_func()
......@@ -13,14 +13,16 @@ __contact__ = "Razvan Pascanu <r.pascanu@gmail>"
import itertools
import logging
import numpy
import warnings
from theano.compile import SharedVariable, function
from theano import compile
from theano import gof
from theano.gof.python25 import OrderedDict
from theano.tensor import opt
from theano import tensor
from theano import config
from theano.updates import Updates
from theano.updates import OrderedUpdates
from theano.scan_module import scan_op
......@@ -147,7 +149,7 @@ def scan(fn,
n_seqs = len(seqs)
n_outs = len(outs_info)
return_steps = {}
return_steps = OrderedDict()
# wrap outputs info in a dictionary if they are not already in one
for i in xrange(n_outs):
if outs_info[i] is not None:
......@@ -242,7 +244,7 @@ def scan(fn,
mit_sot_inner_inputs = []
mit_sot_inner_slices = []
mit_sot_inner_outputs = []
mit_sot_return_steps = {}
mit_sot_return_steps = OrderedDict()
mit_sot_tap_array = []
mit_sot_rightOrder = []
......@@ -251,7 +253,7 @@ def scan(fn,
sit_sot_inner_inputs = []
sit_sot_inner_slices = []
sit_sot_inner_outputs = []
sit_sot_return_steps = {}
sit_sot_return_steps = OrderedDict()
sit_sot_rightOrder = []
nit_sot_steps = []
# go through outputs picking up time slices as needed
......@@ -398,7 +400,8 @@ def scan(fn,
not isinstance(arg, tensor.Constant))]
# when we apply the lambda expression we get a mixture of update rules
# and outputs that needs to be separated
condition, outputs, updates = scan_utils.get_updates_and_outputs(fn(*args))
lambda_result = fn(*args)
condition, outputs, updates = scan_utils.get_updates_and_outputs(lambda_result)
if condition is not None:
as_while = True
else:
......@@ -464,6 +467,13 @@ def scan(fn,
dummy_outs = outputs
if condition is not None:
dummy_outs.append(condition)
# If we use a regular dict here, the results are non-deterministic
if not isinstance(updates, (list, tuple)):
if isinstance(updates, dict) and \
not isinstance(updates, gof.python25.OrderedDict):
warnings.warn("Using non-deterministic dictionary.")
dummy_f = function(dummy_args,
dummy_outs,
updates=updates,
......@@ -508,7 +518,7 @@ def scan(fn,
sit_sot_inner_outputs.append(outputs[i])
## Step 5.3 Outputs that correspond to update rules of shared variables
givens = {}
givens = OrderedDict()
n_shared_outs = 0
shared_scan_inputs = []
shared_inner_inputs = []
......@@ -527,7 +537,7 @@ def scan(fn,
## Step 5.4 Outputs with no taps used in the input
n_nit_sot = 0
nit_sot_inner_outputs = []
nit_sot_return_steps = {}
nit_sot_return_steps = OrderedDict()
nit_sot_rightOrder = []
for i, out in enumerate(outs_info):
if not 'taps' in out:
......@@ -582,7 +592,7 @@ def scan(fn,
shared_inner_outputs)
if condition is not None:
inner_outs.append(condition)
new_givens = {}
new_givens = OrderedDict()
for w, w_copy in givens.iteritems():
new_givens[w] = w.type.filter_variable(w_copy)
......@@ -593,7 +603,7 @@ def scan(fn,
##
tap_array = mit_sot_tap_array + [[-1] for x in xrange(n_sit_sot)]
info = {}
info = OrderedDict()
info['tap_array'] = tap_array
info['n_seqs'] = n_seqs
......@@ -607,7 +617,7 @@ def scan(fn,
info['truncate_gradient'] = -1
info['name'] = name
info['mode'] = mode
info['destroy_map'] = {}
info['destroy_map'] = OrderedDict()
info['inplace'] = False
info['gpu'] = False
info['as_while'] = as_while
......@@ -641,7 +651,7 @@ def scan(fn,
### and so on ...
##
update_map = Updates()
update_map = OrderedUpdates()
offset = n_mit_mot
offsets = [abs(numpy.min(x)) for x in mit_sot_tap_array]
......@@ -675,4 +685,5 @@ def scan(fn,
elif len(scan_out_list) == 0:
scan_out_list = None
assert isinstance(update_map, dict) and 'Ordered' in str(type(update_map))
return (scan_out_list, update_map)
......@@ -46,17 +46,12 @@ from itertools import izip
import logging
import numpy
from theano.compile import SharedVariable, function
from theano import compile
from theano import gof
from theano.tensor import opt, TensorVariable
from theano.tensor.sharedvar import TensorSharedVariable
from theano import tensor
from theano import config
from theano.updates import Updates
from theano.scalar.sharedvar import shared as scalar_shared
from theano.compile.pfunc import rebuild_collect_shared
import theano
import scan_op
import scan_utils
......
......@@ -52,8 +52,9 @@ from theano import gof
from theano.tensor import opt
from theano import tensor
from theano import config
from theano.updates import Updates
from theano.updates import OrderedUpdates
from theano.compile import ops
from theano.gof.python25 import OrderedDict
import scan_op
......@@ -376,11 +377,11 @@ def scan(fn,
n_seqs = len(seqs)
n_outs = len(outs_info)
return_steps = {}
return_steps = OrderedDict()
# wrap sequences in a dictionary if they are not already dictionaries
for i in xrange(n_seqs):
if not isinstance(seqs[i], dict):
seqs[i] = dict(input=seqs[i], taps=[0])
seqs[i] = OrderedDict([('input', seqs[i]), ('taps', [0])])
elif seqs[i].get('taps', None):
seqs[i]['taps'] = wrap_into_list(seqs[i]['taps'])
elif seqs[i].get('taps', True) is None:
......@@ -402,7 +403,7 @@ def scan(fn,
if not isinstance(outs_info[i], dict):
# by default any output has a tap value of -1
outs_info[i] = dict(initial=outs_info[i], taps=[-1])
outs_info[i] = OrderedDict([('initial', outs_info[i]), ('taps', [-1])])
elif (not outs_info[i].get('initial', None) and
outs_info[i].get('taps', None)):
# ^ no initial state but taps provided
......@@ -421,8 +422,8 @@ def scan(fn,
outs_info[i]['taps'] = [-1]
else:
# if a None is provided as the output info we replace it
# with an empty dict() to simplify handling
outs_info[i] = dict()
# with an empty OrdereDict() to simplify handling
outs_info[i] = OrderedDict()
##
### Step 2. Generate inputs and outputs of the inner functions
......@@ -565,7 +566,7 @@ def scan(fn,
mit_sot_inner_inputs = []
mit_sot_inner_slices = []
mit_sot_inner_outputs = []
mit_sot_return_steps = {}
mit_sot_return_steps = OrderedDict()
mit_sot_tap_array = []
mit_sot_rightOrder = []
......@@ -574,7 +575,7 @@ def scan(fn,
sit_sot_inner_inputs = []
sit_sot_inner_slices = []
sit_sot_inner_outputs = []
sit_sot_return_steps = {}
sit_sot_return_steps = OrderedDict()
sit_sot_rightOrder = []
# go through outputs picking up time slices as needed
......@@ -777,7 +778,7 @@ def scan(fn,
# as non sequences at the end of our args
fake_nonseqs = [x.type() for x in non_seqs]
fake_outputs = scan_utils.clone(outputs,
replace=dict(zip(non_seqs,
replace=OrderedDict(zip(non_seqs,
fake_nonseqs)))
all_inputs = itertools.ifilter(
lambda x: (isinstance(x, gof.Variable) and
......@@ -825,7 +826,7 @@ def scan(fn,
n_outs = len(dummy_f.maker.outputs)
if as_while:
n_outs = n_outs - 1
outs_info = [dict() for x in xrange(n_outs)]
outs_info = [OrderedDict() for x in xrange(n_outs)]
## Step 5.1 Outputs with taps different then -1
......@@ -839,7 +840,7 @@ def scan(fn,
sit_sot_inner_outputs.append(outputs[i])
## Step 5.3 Outputs that correspond to update rules of shared variables
givens = {}
givens = OrderedDict()
n_shared_outs = 0
shared_scan_inputs = []
shared_inner_inputs = []
......@@ -879,7 +880,7 @@ def scan(fn,
## Step 5.4 Outputs with no taps used in the input
n_nit_sot = 0
nit_sot_inner_outputs = []
nit_sot_return_steps = {}
nit_sot_return_steps = OrderedDict()
nit_sot_rightOrder = []
for i, out in enumerate(outs_info):
if not 'taps' in out:
......@@ -902,7 +903,7 @@ def scan(fn,
if (not isinstance(arg, SharedVariable) and
not isinstance(arg, tensor.Constant))]
givens.update(dict(zip(other_scan_args, other_inner_args)))
givens.update(OrderedDict(zip(other_scan_args, other_inner_args)))
other_shared_scan_args = [arg.variable for arg
in dummy_f.maker.expanded_inputs
if (isinstance(arg.variable, SharedVariable) and
......@@ -911,7 +912,7 @@ def scan(fn,
in dummy_f.maker.expanded_inputs
if (isinstance(arg.variable, SharedVariable) and
not arg.update)]
givens.update(dict(zip(other_shared_scan_args,
givens.update(OrderedDict(zip(other_shared_scan_args,
other_shared_inner_args)))
##
......@@ -943,7 +944,7 @@ def scan(fn,
# replace w with w_copy, where w is CudaNdarray
# and w_copy is TensorType. This is caused because shared
# variables are put on GPU right aways >:| ,
new_givens = {}
new_givens = OrderedDict()
for w, w_copy in givens.iteritems():
if (isinstance(w.type, cuda.CudaNdarrayType)
......@@ -962,7 +963,7 @@ def scan(fn,
##
tap_array = mit_sot_tap_array + [[-1] for x in xrange(n_sit_sot)]
info = {}
info = OrderedDict()
info['tap_array'] = tap_array
info['n_seqs'] = n_seqs
......@@ -976,7 +977,7 @@ def scan(fn,
info['truncate_gradient'] = truncate_gradient
info['name'] = name
info['mode'] = mode
info['destroy_map'] = {}
info['destroy_map'] = OrderedDict()
info['gpu'] = False
info['as_while'] = as_while
info['profile'] = profile
......@@ -1012,7 +1013,7 @@ def scan(fn,
### and so on ...
##
update_map = Updates()
update_map = OrderedUpdates()
def remove_dimensions(outs, steps_return, offsets=None):
out_ls = []
......
......@@ -18,12 +18,13 @@ import logging
from itertools import izip
import numpy
import warnings
import theano
from theano.compile.pfunc import rebuild_collect_shared
from theano import gof
from theano import tensor, scalar
from theano.gof.python25 import all
from theano.gof.python25 import all, OrderedDict
from theano.tensor.basic import get_constant_value
......@@ -181,12 +182,17 @@ def clone(output,
def get_updates_and_outputs(ls):
"""
This function tries to recognize the updates dictionary, the
This function tries to recognize the updates OrderedDict, the
list of outputs and the stopping condition returned by the
lambda expression and arrange them in a predefined order
WRITEME: what is the type of ls? how is it formatted?
if it's not in the predefined order already, how does
this function know how to put it in that order?
"""
def is_outputs(elem):
if (isinstance(elem, (list, tuple)) and
all([isinstance(x, theano.Variable) for x in elem])):
......@@ -197,6 +203,11 @@ def get_updates_and_outputs(ls):
def is_updates(elem):
if isinstance(elem, dict):
# Make sure the updates will be applied in a deterministic order
if not isinstance(elem, gof.python25.OrderedDict):
warnings.warn("Expected OrderedDict or OrderedUpdates, got "\
+str(type(elem))+". This can make your script non-"
"deterministic.")
return True
# Dictionaries can be given as lists of tuples
if (isinstance(elem, (list, tuple)) and
......@@ -240,12 +251,13 @@ def get_updates_and_outputs(ls):
'variables (or `theano.scan_module.until` objects for '
'conditions). In particular if you need to use constant '
'values, you can use `tensor.constant` to turn them into '
'Theano variables.')
'Theano variables.')
if is_outputs(ls):
return None, _list(ls), {}
return None, _list(ls), OrderedDict()
if is_updates(ls):
return None, [], dict(ls)
return None, [], OrderedDict(ls)
error_msg = ('Scan cannot parse the return value of your lambda '
'expression, which is: %s' % (ls,))
if not isinstance(ls, (list, tuple)):
......@@ -258,16 +270,16 @@ def get_updates_and_outputs(ls):
if len(ls) == 2:
if is_outputs(ls[0]):
if is_updates(ls[1]):
return (None, _list(ls[0]), dict(ls[1]))
return (None, _list(ls[0]), OrderedDict(ls[1]))
elif is_condition(ls[1]):
return (ls[1].condition, _list(ls[0]), {})
return (ls[1].condition, _list(ls[0]), OrderedDict())
else:
raise ValueError(error_msg)
elif is_updates(ls[0]):
if is_outputs(ls[1]):
raise ValueError(deprecation_msg)
elif is_condition(ls[1]):
return (ls[1].condition, [], dict(ls[0]))
return (ls[1].condition, [], OrderedDict(ls[0]))
else:
raise ValueError(error_msg)
else:
......@@ -276,7 +288,7 @@ def get_updates_and_outputs(ls):
if is_outputs(ls[0]):
if is_updates(ls[1]):
if is_condition(ls[2]):
return (ls[2].condition, _list(ls[0]), dict(ls[1]))
return (ls[2].condition, _list(ls[0]), OrderedDict(ls[1]))
else:
raise ValueError(error_msg)
else:
......
......@@ -16,6 +16,7 @@ from theano.compile.pfunc import rebuild_collect_shared
from theano.gof.python25 import any
from theano.tests import unittest_tools as utt
import theano.scalar.sharedvar
from theano.gof.python25 import OrderedDict
from numpy.testing.noseclasses import KnownFailureTest
......@@ -1009,7 +1010,7 @@ class T_Scan(unittest.TestCase):
x0 = theano.tensor.constant(x0)
to_replace = outputs[0].owner.inputs[0].owner.inputs[1]
outputs = theano.clone(outputs,
replace={to_replace: x0})
replace=[(to_replace, x0)])
mode = theano.compile.mode.get_mode(None).including('inplace')
f9 = theano.function([],
outputs,
......@@ -1299,7 +1300,7 @@ class T_Scan(unittest.TestCase):
state = theano.shared(v_state, 'vstate')
def f_2():
return {state: 2 * state}
return OrderedDict([(state, 2 * state)])
n_steps = theano.tensor.iscalar('nstep')
output, updates = theano.scan(f_2,
[],
......@@ -1829,7 +1830,7 @@ class T_Scan(unittest.TestCase):
X = theano.shared(numpy.array(1))
out, updates = theano.scan(
lambda: {X: X + 1},
lambda: OrderedDict([(X, (X + 1))]),
outputs_info=[],
non_sequences=[],
sequences=[],
......@@ -1844,7 +1845,7 @@ class T_Scan(unittest.TestCase):
y = theano.shared(numpy.array(1))
out, updates = theano.scan(
lambda: {x: x + 1, y: x},
lambda: OrderedDict([(x, x + 1), (y, x)]),
outputs_info=[],
non_sequences=[],
sequences=[],
......@@ -1880,11 +1881,11 @@ class T_Scan(unittest.TestCase):
b = theano.shared(numpy.random.rand(5, 4))
def inner_func(a):
return a + 1, {b: 2 * b}
return a + 1, OrderedDict([(b, 2 * b)])
out, updates = theano.scan(
inner_func,
outputs_info=[{'initial': init_a}],
outputs_info=[OrderedDict([('initial', init_a)])],
n_steps=1)
out = out[-1]
assert out.type.ndim == a.type.ndim
......@@ -1967,7 +1968,7 @@ class T_Scan(unittest.TestCase):
f1 = z * (x + y) ** 2 + 5
f2 = theano.clone(f1,
replace={y: y2},
replace=OrderedDict([(y, y2)]),
strict=True,
copy_inputs=True)
f2_inp = theano.gof.graph.inputs([f2])
......@@ -1986,7 +1987,7 @@ class T_Scan(unittest.TestCase):
f1 = z * (x + y) ** 2 + 5
f2 = theano.clone(f1,
replace={y: y2},
replace=OrderedDict([(y, y2)]),
strict=False,
copy_inputs=True)
f2_inp = theano.gof.graph.inputs([f2])
......@@ -2005,7 +2006,7 @@ class T_Scan(unittest.TestCase):
f1 = z * (x + y) ** 2 + 5
f2 = theano.clone(f1,
replace={y: y2},
replace=[(y, y2)],
strict=True,
copy_inputs=False)
f2_inp = theano.gof.graph.inputs([f2])
......@@ -2024,7 +2025,7 @@ class T_Scan(unittest.TestCase):
f1 = z * (x + y) ** 2 + 5
f2 = theano.clone(f1,
replace={y: y2},
replace=[(y, y2)],
strict=False,
copy_inputs=False)
f2_inp = theano.gof.graph.inputs([f2])
......@@ -2204,15 +2205,15 @@ class T_Scan(unittest.TestCase):
v2 = theano.shared(numpy.ones((5, 5), dtype=theano.config.floatX))
shapef = theano.function([W],
expr,
givens={initial: v1,
inpt: v2})
givens=OrderedDict([(initial, v1),
(inpt, v2)]))
# First execution to cache n_steps
shapef(numpy.ones((5, 5), dtype=theano.config.floatX))
cost = expr.sum()
d_cost_wrt_W = tensor.grad(cost, [W])
f = theano.function([W, inpt], d_cost_wrt_W,
givens={initial: theano.shared(numpy.zeros(5))})
givens=OrderedDict([(initial, theano.shared(numpy.zeros(5)))]))
rval = numpy.asarray([[5187989] * 5] * 5, dtype=theano.config.floatX)
arg1 = numpy.ones((5, 5), dtype=theano.config.floatX)
......@@ -3166,7 +3167,7 @@ class T_Scan(unittest.TestCase):
shared_var = theano.shared(numpy.float32(1.))
def inner_fn():
return [], {shared_var: shared_var + numpy.float32(1.)}
return [], OrderedDict([(shared_var, shared_var + numpy.float32(1.))])
_, updates = theano.scan(inner_fn,
n_steps=10,
truncate_gradient=-1,
......@@ -3239,7 +3240,7 @@ class T_Scan(unittest.TestCase):
seq = tensor.matrix()
initial_value = theano.shared(numpy.zeros((4, 1),
dtype=theano.config.floatX))
outputs_info = [{'initial': initial_value, 'taps': [-4]}, None]
outputs_info = [OrderedDict([('initial', initial_value), ('taps', [-4])]), None]
results, updates = theano.scan(fn=onestep,
sequences=seq,
outputs_info=outputs_info)
......@@ -3259,13 +3260,13 @@ class T_Scan(unittest.TestCase):
seq = tensor.matrix()
initial_value = theano.shared(numpy.zeros((4, 1),
dtype=theano.config.floatX))
outputs_info = [{'initial': initial_value, 'taps': [-4]}, None]
outputs_info = [OrderedDict([('initial', initial_value), ('taps', [-4])]), None]
results, _ = theano.scan(fn=onestep,
sequences=seq,
outputs_info=outputs_info)
sharedvar = theano.shared(numpy.zeros((1, 1),
dtype=theano.config.floatX))
updates = {sharedvar: results[0][-1:]}
updates = OrderedDict([(sharedvar, results[0][-1:])])
f = theano.function([seq], results[1], updates=updates)
assert numpy.all(exp_out == f(inp))
......@@ -3354,9 +3355,9 @@ def test_speed():
theano.printing.debugprint(s_rinc)
f = theano.function([],
[],
updates={
s_i: s_i + 1,
shared_r: s_rinc},
updates=OrderedDict([
(s_i, s_i + 1),
(shared_r, s_rinc)]),
mode=theano.Mode(linker='cvm'))
f._check_for_aliased_inputs = False
t2 = time.time()
......@@ -3430,9 +3431,9 @@ def test_speed_rnn():
w)),
tolerate_inplace_aliasing=True)
f = theano.function([], [],
updates={
s_i: s_i + 1,
shared_r: s_rinc},
updates=OrderedDict([
(s_i, s_i + 1),
(shared_r, s_rinc)]),
mode=theano.Mode(linker='cvm'))
#theano.printing.debugprint(f)
f_fn = f.fn
......@@ -3495,9 +3496,9 @@ def test_speed_batchrnn():
tolerate_inplace_aliasing=True)
f = theano.function([],
[],
updates={
s_i: s_i + 1,
shared_r: s_rinc},
updates=[
(s_i, s_i + 1),
(shared_r, s_rinc)],
mode=theano.Mode(linker='cvm'))
#theano.printing.debugprint(f)
f_fn = f.fn
......
......@@ -1219,7 +1219,7 @@ class UsmmTests(unittest.TestCase):
mode = theano.compile.mode.get_default_mode().excluding('fusion')
if inplace:
updates = {z: z - a * theano.sparse.dot(x, y)}
updates = [(z, z - a * theano.sparse.dot(x, y))]
f_a = theano.function([a, x, y], [],
updates=updates,
mode=mode)
......
......@@ -9,7 +9,7 @@ import numpy
import theano
import theano.tensor as T
from theano.gof.python25 import any
from theano.gof.python25 import any, OrderedDict
def gen_data():
......@@ -293,7 +293,7 @@ def test_mlp():
# TODO: refine that and include only those
mode = theano.compile.get_default_mode().including('fast_run')
updates2 = {}
updates2 = OrderedDict()
updates2[classifier.hiddenLayer.params[0]]=T.grad(cost,classifier.hiddenLayer.params[0])
train_model =theano.function( inputs = [index],
......
......@@ -185,8 +185,8 @@ class t_gemm(TestCase):
l2_reg = T.constant(0.0001).astype(config.floatX)
#test constant merge with gemm
f = theano.function([a, b], updates={s: lr1 * T.dot(a, b) +
l2_reg * lr2 * s},
f = theano.function([a, b], updates=[(s, lr1 * T.dot(a, b) +
l2_reg * lr2 * s)],
mode=mode_not_fast_compile).maker.fgraph.toposort()
#[Gemm{inplace}(<TensorType(float64, matrix)>, 0.01,
# <TensorType(float64, matrix)>, <TensorType(float64, matrix)>,
......@@ -195,8 +195,8 @@ class t_gemm(TestCase):
assert f[0].op == gemm_inplace
#test factored scalar with merge
f = theano.function([a, b], updates={s: lr1 * (T.dot(a, b) -
l2_reg * s)},
f = theano.function([a, b], updates=[(s, lr1 * (T.dot(a, b) -
l2_reg * s))],
mode=mode_not_fast_compile).maker.fgraph.toposort()
#[Gemm{inplace}(<TensorType(float64, matrix)>, 0.01,
# <TensorType(float64, matrix)>, <TensorType(float64, matrix)>,
......@@ -206,7 +206,7 @@ class t_gemm(TestCase):
#test factored scalar with merge and neg
f = theano.function([a, b],
updates={s: s - lr1 * (s * .0002 + T.dot(a, b))},
updates=[(s, s - lr1 * (s * .0002 + T.dot(a, b)))],
mode=mode_not_fast_compile).maker.fgraph.toposort()
#[Gemm{inplace}(<TensorType(float64, matrix)>, -0.01,
# <TensorType(float64, matrix)>, <TensorType(float64, matrix)>,
......@@ -368,7 +368,7 @@ class t_gemm(TestCase):
tz_i = gemm_no_inplace(tz[:, :, i], ta, tx[
:, :, i], ty[:, :, i], tb)
g_i = theano.function([], tz_i,
updates={tz: T.set_subtensor(tz[:, :, i], tz_i)},
updates=[(tz, T.set_subtensor(tz[:, :, i], tz_i))],
mode=compile.Mode(optimizer=None, linker=l))
for j in xrange(3):
g_i()
......@@ -801,7 +801,7 @@ def test_gemm_unrolled():
cur_V = update_V(cur_H)
cur_H = update_H(cur_V)
unrolled_theano = theano.function([], updates={V: cur_V, H: cur_H},
unrolled_theano = theano.function([], updates=[(V, cur_V), (H, cur_H)],
name='unrolled_theano')
nb_dot = sum([1 for node in unrolled_theano.maker.fgraph.toposort()
if isinstance(node.op, (theano.tensor.Dot,
......@@ -1032,7 +1032,7 @@ def test_dot_w_self():
p = T.dot(A, A) * B
grad = T.grad(T.mean(p), A)
f = theano.function([B], p, updates={A: A - grad})
f = theano.function([B], p, updates=[(A, A - grad)])
# tests correctness in debugmode
f(numpy.asarray([[0, 1], [2, 3]], dtype=config.floatX))
......@@ -1119,7 +1119,7 @@ class TestGemv(TestCase, unittest_tools.TestOptimizationMixin):
assert topo[0].op.inplace == False
#test the inplace version
g = theano.function([], [], updates={v2: v2 + theano.dot(m, v1)},
g = theano.function([], [], updates=[(v2, v2 + theano.dot(m, v1))],
mode=mode_blas_opt)
# Assert they produce the same output
......@@ -1169,7 +1169,7 @@ class TestGemv(TestCase, unittest_tools.TestOptimizationMixin):
assert topo[-1].op.inplace == False
#test the inplace version
g = theano.function([], [], updates={v2: v2 + theano.dot(v1, m)},
g = theano.function([], [], updates=[(v2, v2 + theano.dot(v1, m))],
mode=mode_blas_opt)
# Assert they produce the same output
......@@ -1575,7 +1575,7 @@ class TestGer(TestCase, unittest_tools.TestOptimizationMixin):
def function(self, inputs, outputs, updates=None):
if updates is None:
updates = {}
updates = []
return theano.function(inputs, outputs, self.mode, updates=updates)
def b(self, bval):
......@@ -1691,8 +1691,8 @@ class TestGer(TestCase, unittest_tools.TestOptimizationMixin):
def test_inplace(self):
A = self.shared(numpy.random.rand(4, 5).astype(self.dtype))
f = self.function([self.x, self.y], [],
updates={A: A + T.constant(0.1, dtype=self.dtype) *
T.outer(self.x, self.y)})
updates=[(A, A + T.constant(0.1, dtype=self.dtype) *
T.outer(self.x, self.y))])
self.assertFunctionContains(f, self.ger_destructive)
f(numpy.random.rand(4).astype(self.dtype),
numpy.random.rand(5).astype(self.dtype))
......@@ -1731,15 +1731,15 @@ class TestBlasStrides(TestCase):
bt_dev = b_t.get_value(borrow=False, return_internal_type=True)
ct_dev = c_t.get_value(borrow=False, return_internal_type=True)
f_nn = theano.function([], [], updates={a: tensor.dot(b, c)},
f_nn = theano.function([], [], updates=[(a, tensor.dot(b, c))],
mode=self.mode)
#print 'class name:', self.__class__.__name__
#theano.printing.debugprint(f_nn)
f_nt = theano.function([], [], updates={a: tensor.dot(b, c_t.T)},
f_nt = theano.function([], [], updates=[(a, tensor.dot(b, c_t.T))],
mode=self.mode)
f_tn = theano.function([], [], updates={a: tensor.dot(b_t.T, c)},
f_tn = theano.function([], [], updates=[(a, tensor.dot(b_t.T, c))],
mode=self.mode)
f_tt = theano.function([], [], updates={a: tensor.dot(b_t.T, c_t.T)},
f_tt = theano.function([], [], updates=[(a, tensor.dot(b_t.T, c_t.T))],
mode=self.mode)
# Try with all stride patterns, and all transposed pattern
......@@ -1802,14 +1802,14 @@ class TestBlasStrides(TestCase):
bt_dev = b_t.get_value(borrow=False, return_internal_type=True)
ct_dev = c_t.get_value(borrow=False, return_internal_type=True)
f_nn = theano.function([], [], updates={a: l * tensor.dot(b, c)},
f_nn = theano.function([], [], updates=[(a, l * tensor.dot(b, c))],
mode=self.mode)
f_nt = theano.function([], [], updates={a: l * tensor.dot(b, c_t.T)},
f_nt = theano.function([], [], updates=[(a, l * tensor.dot(b, c_t.T))],
mode=self.mode)
f_tn = theano.function([], [], updates={a: l * tensor.dot(b_t.T, c)},
f_tn = theano.function([], [], updates=[(a, l * tensor.dot(b_t.T, c))],
mode=self.mode)
f_tt = theano.function([], [],
updates={a: l * tensor.dot(b_t.T, c_t.T)},
updates=[(a, l * tensor.dot(b_t.T, c_t.T))],
mode=self.mode)
# Try with all stride patterns, and all transposed pattern
......@@ -1875,28 +1875,28 @@ class TestBlasStrides(TestCase):
ct_dev = c_t.get_value(borrow=False, return_internal_type=True)
f_nnn = theano.function([], [],
updates={a: (l * a + tensor.dot(b, c))},
updates=[(a, (l * a + tensor.dot(b, c)))],
mode=self.mode)
f_nnt = theano.function([], [],
updates={a: (l * a + tensor.dot(b, c_t.T))},
updates=[(a, (l * a + tensor.dot(b, c_t.T)))],
mode=self.mode)
f_ntn = theano.function([], [],
updates={a: (l * a + tensor.dot(b_t.T, c))},
updates=[(a, (l * a + tensor.dot(b_t.T, c)))],
mode=self.mode)
f_ntt = theano.function([], [],
updates={a: (l * a + tensor.dot(b_t.T, c_t.T))},
updates=[(a, (l * a + tensor.dot(b_t.T, c_t.T)))],
mode=self.mode)
f_tnn = theano.function([], [],
updates={a_t: (l * a_t + tensor.dot(b, c).T)},
updates=[(a_t, (l * a_t + tensor.dot(b, c).T))],
mode=self.mode)
f_tnt = theano.function([], [],
updates={a_t: (l * a_t + tensor.dot(b, c_t.T).T)},
updates=[(a_t, (l * a_t + tensor.dot(b, c_t.T).T))],
mode=self.mode)
f_ttn = theano.function([], [],
updates={a_t: (l * a_t + tensor.dot(b_t.T, c).T)},
updates=[(a_t, (l * a_t + tensor.dot(b_t.T, c).T))],
mode=self.mode)
f_ttt = theano.function([], [],
updates={a_t: (l * a_t + tensor.dot(b_t.T, c_t.T).T)},
updates=[(a_t, (l * a_t + tensor.dot(b_t.T, c_t.T).T))],
mode=self.mode)
# Try with all stride patterns, and all transposed pattern
......@@ -1985,11 +1985,11 @@ class TestBlasStrides(TestCase):
b_dev = b.get_value(borrow=False, return_internal_type=True)
c_dev = c.get_value(borrow=False, return_internal_type=True)
f_n = theano.function([], [], updates={a: (a + l * tensor.dot(b, c))},
f_n = theano.function([], [], updates=[(a, (a + l * tensor.dot(b, c)))],
mode=self.mode)
f_t = theano.function([], [],
updates={a: (a + l * tensor.dot(b_t.T, c))},
updates=[(a, (a + l * tensor.dot(b_t.T, c)))],
mode=self.mode)
# Try with all stride patterns, and all transposed pattern
......@@ -2041,11 +2041,11 @@ class TestBlasStrides(TestCase):
c_dev = c.get_value(borrow=False, return_internal_type=True)
f_n = theano.function([], [],
updates={a: (a + l * tensor.outer(b, c))},
updates=[(a, (a + l * tensor.outer(b, c)))],
mode=self.mode)
f_t = theano.function([], [],
updates={a_t: (a_t + l * tensor.outer(b, c).T)},
updates=[(a_t, (a_t + l * tensor.outer(b, c).T))],
mode=self.mode)
# Try with all stride patterns, and all transposed patterns
......
......@@ -185,7 +185,7 @@ class TestCGemv(TestCase, TestOptimizationMixin):
#test the inplace version
g = theano.function([], [],
updates={v2: v2 + theano.dot(m, v1)},
updates=[(v2, v2 + theano.dot(m, v1))],
mode=self.mode)
# Assert they produce the same output
......
......@@ -526,8 +526,8 @@ def makeSharedTester(shared_constructor_,
s = self.cast_value(s)
s_shared = self.shared_constructor(s)
f = theano.function([],
updates={s_shared:theano.dot(a_shared,b_shared)
+s_shared})
updates=[(s_shared, theano.dot(a_shared,b_shared)
+s_shared)])
topo=f.maker.fgraph.toposort()
f()
#[Gemm{inplace}(<TensorType(float64, matrix)>, 0.01, <TensorType(float64, matrix)>, <TensorType(float64, matrix)>, 2e-06)]
......@@ -541,8 +541,8 @@ def makeSharedTester(shared_constructor_,
#now test with the specify shape op in the output
f = theano.function([], s_shared.shape,
updates={s_shared:theano.dot(a_shared,b_shared)
+s_shared_specify})
updates=[(s_shared, theano.dot(a_shared,b_shared)
+s_shared_specify)])
topo=f.maker.fgraph.toposort()
shp=f()
assert numpy.all(shp == (40,40))
......@@ -557,8 +557,8 @@ def makeSharedTester(shared_constructor_,
b_shared.get_value(borrow=True).shape)
f = theano.function([], s_shared.shape,
updates={s_shared:theano.dot(a_shared,b_shared)
+s_shared_specify})
updates=[(s_shared, theano.dot(a_shared,b_shared)
+s_shared_specify)])
topo=f.maker.fgraph.toposort()
shp=f()
assert numpy.all(shp == (40,40))
......
......@@ -8,8 +8,8 @@ import numpy.random
from theano.tests import unittest_tools as utt
'''
Different tests that are not connected to any particular Op, or functionality of
Theano. Here will go for example code that we will publish in papers, that we
Different tests that are not connected to any particular Op, or functionality of
Theano. Here will go for example code that we will publish in papers, that we
should ensure that it will remain operational
'''
......@@ -55,7 +55,7 @@ class T_scipy(unittest.TestCase):
train = function(
inputs=[x,y],
outputs=[prediction, xent],
updates={w:w-0.1*gw, b:b-0.1*gb})
updates=[(w, w-0.1*gw), (b, b-0.1*gb)])
predict = function(inputs=[x], outputs=prediction)
N = 4
......
import unittest
import theano
from theano.updates import Updates
from theano.updates import OrderedUpdates
import theano.tensor as T
class test_ifelse(unittest.TestCase):
def test_updates_init(self):
self.assertRaises(TypeError, Updates, dict(d=3))
self.assertRaises(TypeError, OrderedUpdates, dict(d=3))
sv = theano.shared('asdf')
Updates({sv:3})
OrderedUpdates({sv:3})
def test_updates_setitem(self):
ok = True
up = Updates()
up = OrderedUpdates()
sv = theano.shared('asdf')
# keys have to be SharedVariables
......@@ -27,8 +27,8 @@ class test_ifelse(unittest.TestCase):
def test_updates_add(self):
up1 = Updates()
up2 = Updates()
up1 = OrderedUpdates()
up2 = OrderedUpdates()
a = theano.shared('a')
b = theano.shared('b')
......
......@@ -8,23 +8,27 @@ __contact__ = "theano-dev <theano-dev@googlegroups.com>"
__docformat__ = "restructuredtext en"
from theano.gof.python25 import OrderedDict
from theano.compile.sharedvalue import SharedVariable
import logging
logger = logging.getLogger('theano.updates')
import warnings
class Updates(dict):
# Must be an OrderedDict or updates will be applied in a non-deterministic order
class OrderedUpdates(OrderedDict):
"""
Dict-like mapping from SharedVariable keys to their new values.
This mapping supports the use of the "+" operator for the union of updates.
"""
def __init__(self, *key, **kwargs):
ret = super(Updates, self).__init__(*key, **kwargs)
ret = super(OrderedUpdates, self).__init__(*key, **kwargs)
for key in self:
if not isinstance(key, SharedVariable):
raise TypeError(
'Updates keys must inherit from SharedVariable',
'OrderedUpdates keys must inherit from SharedVariable',
key)
return ret
......@@ -38,12 +42,14 @@ class Updates(dict):
# value. Should it be cast to a GPU value right away? Should
# literals be transformed into constants immediately?
return super(Updates, self).__setitem__(key, value)
return super(OrderedUpdates, self).__setitem__(key, value)
else:
raise TypeError('Updates keys must inherit from SharedVariable',
raise TypeError('OrderedUpdates keys must inherit from SharedVariable',
key)
def update(self, other):
def update(self, other=None):
if other is None:
return
for key, val in dict(other).iteritems():
if key in self:
if self[key] == val:
......@@ -52,13 +58,17 @@ class Updates(dict):
self[key] = val # __setitem__ does type-checking
def __add__(self, other):
rval = Updates()
rval = OrderedUpdates()
rval.update(self)
rval.update(other)
return rval
def __radd__(other, self):
rval = Updates()
rval = OrderedUpdates()
rval.update(other)
rval.update(self)
return rval
def Updates(*key, **kwargs):
warnings.warn("Updates is deprecated. Switch to OrderedUpdates.")
return OrderedUpdates(*key, **kwargs)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论