提交 f68f06ce authored 作者: Rami Al-Rfou's avatar Rami Al-Rfou

Merge branch 'master' into grad_advinc_subtensor

...@@ -133,8 +133,8 @@ This makes it safe to pass a very long arange, which we need to do for generalit ...@@ -133,8 +133,8 @@ This makes it safe to pass a very long arange, which we need to do for generalit
arange must have its length specified at creation time. arange must have its length specified at creation time.
Simple accumulation into a scalar, ditching lamba Simple accumulation into a scalar, ditching lambda
------------------------------------------------- --------------------------------------------------
Although this example would seem almost self-explanatory, it stresses a Although this example would seem almost self-explanatory, it stresses a
pitfall to be careful of: the initial output state that is supplied, that is pitfall to be careful of: the initial output state that is supplied, that is
......
...@@ -88,7 +88,7 @@ from printing import \ ...@@ -88,7 +88,7 @@ from printing import \
import scan_module import scan_module
from scan_module import scan, map, reduce, foldl, foldr, clone from scan_module import scan, map, reduce, foldl, foldr, clone
from updates import Updates from updates import Updates, OrderedUpdates
import tensor import tensor
import scalar import scalar
......
...@@ -172,7 +172,9 @@ class BadThunkOutput(DebugModeError): ...@@ -172,7 +172,9 @@ class BadThunkOutput(DebugModeError):
print >> sio, "BadThunkOutput" print >> sio, "BadThunkOutput"
print >> sio, " variable :", self.r print >> sio, " variable :", self.r
print >> sio, " Outputs Type:", self.r.type print >> sio, " Outputs Type:", self.r.type
print >> sio, " Inputs Type :", [i.type for i in self.r.owner.inputs], print >> sio, " Outputs Shape:", getattr(self.val1, 'shape', None)
print >> sio, " Outputs Strides:", getattr(self.val1, 'strides', None)
print >> sio, " Inputs Type :", [i.type for i in self.r.owner.inputs]
print >> sio, " Inputs Shape:", [getattr(val, 'shape', None) print >> sio, " Inputs Shape:", [getattr(val, 'shape', None)
for val in self.inputs_val] for val in self.inputs_val]
print >> sio, " Inputs Strides:", [getattr(val, 'strides', None) print >> sio, " Inputs Strides:", [getattr(val, 'strides', None)
...@@ -1336,7 +1338,7 @@ def _check_preallocated_output(node, thunk, prealloc_modes, def_val, ...@@ -1336,7 +1338,7 @@ def _check_preallocated_output(node, thunk, prealloc_modes, def_val,
for r in node.outputs: for r in node.outputs:
if not r.type.values_eq_approx(r_vals[r], storage_map[r][0]): if not r.type.values_eq_approx(r_vals[r], storage_map[r][0]):
# TODO: indicate it is not a C/Py problem # TODO: indicate it is not a C/Py problem
inputs_val = [storage_map[inp] for inp in r.owner.inputs] inputs_val = [storage_map[inp][0] for inp in r.owner.inputs]
raise BadThunkOutput(r, raise BadThunkOutput(r,
thunk1='Reference value', val1=r_vals[r], thunk1='Reference value', val1=r_vals[r],
thunk2=thunk_name, val2=storage_map[r][0], thunk2=thunk_name, val2=storage_map[r][0],
...@@ -1918,7 +1920,7 @@ class _Linker(gof.link.LocalLinker): ...@@ -1918,7 +1920,7 @@ class _Linker(gof.link.LocalLinker):
if not r.type.values_eq_approx(r_vals[r], storage_map[r][0]): if not r.type.values_eq_approx(r_vals[r], storage_map[r][0]):
#import pdb; pdb.set_trace() #import pdb; pdb.set_trace()
#r.type.values_eq_approx(r_vals[r], storage_map[r][0]) #r.type.values_eq_approx(r_vals[r], storage_map[r][0])
inputs_val = [storage_map[inp] for inp in r.owner.inputs] inputs_val = [storage_map[inp][0] for inp in r.owner.inputs]
raise BadThunkOutput(r, raise BadThunkOutput(r,
thunk1='perform', val1=r_vals[r], thunk1='perform', val1=r_vals[r],
thunk2='c_code', val2=storage_map[r][0], thunk2='c_code', val2=storage_map[r][0],
......
...@@ -12,6 +12,8 @@ from function_module import orig_function ...@@ -12,6 +12,8 @@ from function_module import orig_function
from profiling import ProfileStats from profiling import ProfileStats
from pfunc import pfunc from pfunc import pfunc
from numpy import any # to work in python 2.4 from numpy import any # to work in python 2.4
import warnings
from theano import gof
def function(inputs, outputs=None, mode=None, updates=None, givens=None, def function(inputs, outputs=None, mode=None, updates=None, givens=None,
no_default_updates=False, accept_inplace=False, name=None, no_default_updates=False, accept_inplace=False, name=None,
...@@ -30,7 +32,7 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None, ...@@ -30,7 +32,7 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None,
:type mode: string or `Mode` instance. :type mode: string or `Mode` instance.
:param mode: compilation mode :param mode: compilation mode
:type updates: iterable over pairs (shared_variable, new_expression). List, tuple or dict. :type updates: iterable over pairs (shared_variable, new_expression). List, tuple or OrderedDict.
:param updates: update the values for SharedVariable inputs according to these expressions :param updates: update the values for SharedVariable inputs according to these expressions
:type givens: iterable over pairs (Var1, Var2) of Variables. List, tuple or dict. The Var1 :type givens: iterable over pairs (Var1, Var2) of Variables. List, tuple or dict. The Var1
...@@ -128,7 +130,7 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None, ...@@ -128,7 +130,7 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None,
def opt_log1p(node): def opt_log1p(node):
if not isinstance(node.op,Elemwise): if not isinstance(node.op,Elemwise):
return return
if not isinstance(node.op.scalar_op, log,): if not isinstance(node.op.scalar_op, log):
return return
inp = node.inputs[0] inp = node.inputs[0]
if not inp.owner: if not inp.owner:
...@@ -159,10 +161,18 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None, ...@@ -159,10 +161,18 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None,
""" """
#tuple are used in some tests, as we accepted them in the past
#I prefer to allow it as they act the same as list for what they are used.
if updates is None: if updates is None:
updates = [] updates = []
if isinstance(updates, dict) and \
not isinstance(updates, gof.python25.OrderedDict):
warnings.warn("Expected OrderedDict, got "+str(type(updates))+ "Using "
"a standard dictionary here results in "
"non-deterministic behavior. You should use an OrderedDict"
" if you are using python2.7 or use a list of (shared, update)"
" pairs. Do not just convert your dictionary to this type before"
" the call as the conversion will still be non-deterministic.")
if givens is None: if givens is None:
givens = [] givens = []
if not isinstance(inputs, (list, tuple)): if not isinstance(inputs, (list, tuple)):
......
...@@ -1337,6 +1337,7 @@ def orig_function(inputs, outputs, mode=None, accept_inplace=False, ...@@ -1337,6 +1337,7 @@ def orig_function(inputs, outputs, mode=None, accept_inplace=False,
profile.compile_time += t2 - t1 profile.compile_time += t2 - t1
fn.name = name fn.name = name
fn.maker.fgraph.name = name
return fn return fn
......
...@@ -626,8 +626,15 @@ class Test_pfunc(unittest.TestCase): ...@@ -626,8 +626,15 @@ class Test_pfunc(unittest.TestCase):
# The order of the variables is not determined, so we try # The order of the variables is not determined, so we try
# both shared variables. # both shared variables.
f = theano.function([], [], updates={a: a, b: (2 * b)}) # TODO: explain the above comment. By "not determined" does
g = theano.function([], [], updates={a: (a * 2), b: b}) # this mean "not deterministic"?
# This test originally wrote the updates using dictionaries,
# and iterating over the dictionary was not deterministic.
# Is that all the comment above meant, or is the CVM intended
# to add extra non-determinism? Or is the CVM meant to
# deterministically but arbitrarily pick an order for the updates?
f = theano.function([], [], updates=[(a, a), (b, (2 * b))])
g = theano.function([], [], updates=[(a, (a * 2)), (b, b)])
f() f()
assert a.get_value(borrow=True).shape == (), a.get_value() assert a.get_value(borrow=True).shape == (), a.get_value()
...@@ -642,10 +649,10 @@ class Test_pfunc(unittest.TestCase): ...@@ -642,10 +649,10 @@ class Test_pfunc(unittest.TestCase):
a = shared(1., 'a') a = shared(1., 'a')
b = shared(numpy.ones((2, 3)), 'b') b = shared(numpy.ones((2, 3)), 'b')
# The order of the variables is not determined, so we try # See comment in test_update_same about why we try both
# both shared variables. # shared variables.
f = theano.function([], [], updates={a: a, b: (2 * b - b)}) f = theano.function([], [], updates=[(a, a), (b, (2 * b - b))])
g = theano.function([], [], updates={a: (a * 2 - a), b: b}) g = theano.function([], [], updates=[(a, (a * 2 - a)), (b, b)])
f() f()
assert a.get_value(borrow=True).shape == (), a.get_value() assert a.get_value(borrow=True).shape == (), a.get_value()
......
...@@ -35,3 +35,6 @@ class NullType(Type): ...@@ -35,3 +35,6 @@ class NullType(Type):
def __hash__(self, other): def __hash__(self, other):
return hash(type(self)) return hash(type(self))
def __str__(self):
return 'NullType'
...@@ -162,7 +162,7 @@ else: ...@@ -162,7 +162,7 @@ else:
if sys.version_info[:2] < (2, 7): if sys.version_info[:2] < (2, 7):
# The following implementation of OrderedDict compatible with python 2.4 # The following implementation of OrderedDict compatible with python 2.4
# was taked from http://pypi.python.org/pypi/ordereddict/1.1 # was taken from http://pypi.python.org/pypi/ordereddict/1.1
# It is under the MIT license. # It is under the MIT license.
# Copyright (c) 2009 Raymond Hettinger # Copyright (c) 2009 Raymond Hettinger
......
...@@ -20,6 +20,7 @@ import theano ...@@ -20,6 +20,7 @@ import theano
from itertools import izip from itertools import izip
from theano import gof from theano import gof
from theano.gof import Variable from theano.gof import Variable
from theano.gof.python25 import OrderedDict
from theano.gof.python25 import all from theano.gof.python25 import all
import theano.gof.utils import theano.gof.utils
from theano.gof.null_type import NullType from theano.gof.null_type import NullType
...@@ -144,6 +145,9 @@ class DisconnectedType(theano.gof.type.Type): ...@@ -144,6 +145,9 @@ class DisconnectedType(theano.gof.type.Type):
" a symbolic placeholder." " a symbolic placeholder."
)) ))
def __str__(self):
return 'DisconnectedType'
######################## ########################
# R Operator # R Operator
...@@ -211,7 +215,7 @@ def Rop(f, wrt, eval_points): ...@@ -211,7 +215,7 @@ def Rop(f, wrt, eval_points):
# Tensor, Sparse and CudaNdArray have the ndim attribute # Tensor, Sparse and CudaNdArray have the ndim attribute
pass pass
seen_nodes = {} seen_nodes = OrderedDict()
def _traverse(node): def _traverse(node):
""" TODO: writeme """ """ TODO: writeme """
...@@ -432,14 +436,14 @@ def grad(cost, wrt, consider_constant=None, ...@@ -432,14 +436,14 @@ def grad(cost, wrt, consider_constant=None,
if known_grads is not None: if known_grads is not None:
outputs.extend(known_grads.keys()) outputs.extend(known_grads.keys())
var_to_node_to_idx = _populate_var_to_node_to_idx( var_to_app_to_idx = _populate_var_to_app_to_idx(
outputs, wrt, consider_constant) outputs, wrt, consider_constant)
# build a dict mapping var to the gradient of cost with respect to var # build a dict mapping var to the gradient of cost with respect to var
grad_dict = {} grad_dict = OrderedDict()
if known_grads is None: if known_grads is None:
known_grads = {} known_grads = OrderedDict()
# The gradient of the cost is 1 unless specified otherwise by known_grads. # The gradient of the cost is 1 unless specified otherwise by known_grads.
if cost is not None: if cost is not None:
...@@ -501,10 +505,10 @@ def grad(cost, wrt, consider_constant=None, ...@@ -501,10 +505,10 @@ def grad(cost, wrt, consider_constant=None,
# variables that do not influence the cost have zero gradient. # variables that do not influence the cost have zero gradient.
# if wrt is such a variable, populate the grad_dict with this info # if wrt is such a variable, populate the grad_dict with this info
# so that wrt not being in var_to_node_to_idx won't cause an error below # so that wrt not being in var_to_app_to_idx won't cause an error below
# according to the flag, possibly raise an error if wrt is disconnected # according to the flag, possibly raise an error if wrt is disconnected
for elem in wrt: for elem in wrt:
if elem not in var_to_node_to_idx and elem is not cost \ if elem not in var_to_app_to_idx and elem is not cost \
and elem not in grad_dict: and elem not in grad_dict:
handle_disconnected(elem) handle_disconnected(elem)
grad_dict[elem] = DisconnectedType()() grad_dict[elem] = DisconnectedType()()
...@@ -521,7 +525,7 @@ def grad(cost, wrt, consider_constant=None, ...@@ -521,7 +525,7 @@ def grad(cost, wrt, consider_constant=None,
if hasattr(g.type, 'dtype'): if hasattr(g.type, 'dtype'):
assert g.type.dtype in tensor.float_dtypes assert g.type.dtype in tensor.float_dtypes
rval = _populate_grad_dict(var_to_node_to_idx, rval = _populate_grad_dict(var_to_app_to_idx,
grad_dict, wrt, cost_name) grad_dict, wrt, cost_name)
for i in xrange(len(rval)): for i in xrange(len(rval)):
...@@ -579,7 +583,7 @@ def _node_to_pattern(node): ...@@ -579,7 +583,7 @@ def _node_to_pattern(node):
return connection_pattern return connection_pattern
def _populate_var_to_node_to_idx(outputs, wrt, consider_constant): def _populate_var_to_app_to_idx(outputs, wrt, consider_constant):
""" """
Helper function for grad function. Helper function for grad function.
...@@ -638,7 +642,7 @@ def _populate_var_to_node_to_idx(outputs, wrt, consider_constant): ...@@ -638,7 +642,7 @@ def _populate_var_to_node_to_idx(outputs, wrt, consider_constant):
# var_to_app_to_idx[var][node] = [i,j] means node has # var_to_app_to_idx[var][node] = [i,j] means node has
# var as input at positions i and j # var as input at positions i and j
var_to_app_to_idx = {} var_to_app_to_idx = OrderedDict()
# Set of variables that have been added to their true parents # Set of variables that have been added to their true parents
# ('true' here means that the elements of the variable are a function # ('true' here means that the elements of the variable are a function
...@@ -676,7 +680,13 @@ def _populate_var_to_node_to_idx(outputs, wrt, consider_constant): ...@@ -676,7 +680,13 @@ def _populate_var_to_node_to_idx(outputs, wrt, consider_constant):
continue continue
if ipt not in var_to_app_to_idx: if ipt not in var_to_app_to_idx:
var_to_app_to_idx[ipt] = {} # This object here *must* be an OrderedDict, because
# we iterate over its keys when adding up the terms of
# the gradient on ipt. If it is a regular dict, the grad
# method will return something that is analytically correct,
# but whose order of doing additions depends on the memory
# location of the apply nodes.
var_to_app_to_idx[ipt] = OrderedDict()
app_to_idx = var_to_app_to_idx[ipt] app_to_idx = var_to_app_to_idx[ipt]
if app not in app_to_idx: if app not in app_to_idx:
app_to_idx[app] = [] app_to_idx[app] = []
...@@ -731,12 +741,12 @@ class DisconnectedInputError(ValueError): ...@@ -731,12 +741,12 @@ class DisconnectedInputError(ValueError):
disconnected_inputs='raise'. disconnected_inputs='raise'.
""" """
def _populate_grad_dict(var_to_node_to_idx, def _populate_grad_dict(var_to_app_to_idx,
grad_dict, wrt, cost_name=None): grad_dict, wrt, cost_name=None):
""" """
Helper function for grad function. Helper function for grad function.
var_to_node_to_idx: a dictionary mapping a variable to var_to_app_to_idx: a dictionary mapping a variable to
a second dictionary. a second dictionary.
the second dictionary maps apply nodes acting on the second dictionary maps apply nodes acting on
this variable to the variable's index in the apply this variable to the variable's index in the apply
...@@ -761,7 +771,7 @@ def _populate_grad_dict(var_to_node_to_idx, ...@@ -761,7 +771,7 @@ def _populate_grad_dict(var_to_node_to_idx,
""" """
# build a dict mapping node to the terms node contributes to each of # build a dict mapping node to the terms node contributes to each of
# its inputs' gradients # its inputs' gradients
term_dict = {} term_dict = OrderedDict()
def access_term_cache(node): def access_term_cache(node):
""" Populates term_dict[node] and returns it """ """ Populates term_dict[node] and returns it """
...@@ -1001,15 +1011,17 @@ def _populate_grad_dict(var_to_node_to_idx, ...@@ -1001,15 +1011,17 @@ def _populate_grad_dict(var_to_node_to_idx,
#cache the result #cache the result
term_dict[node] = input_grads term_dict[node] = input_grads
return term_dict[node] return term_dict[node]
# populate grad_dict[var] and return it # populate grad_dict[var] and return it
def access_grad_cache(var): def access_grad_cache(var):
if var not in grad_dict: if var not in grad_dict:
# If var is not in grad_dict already, we must compute it # If var is not in grad_dict already, we must compute it
if var in var_to_node_to_idx: if var in var_to_app_to_idx:
terms = [] terms = []
node_to_idx = var_to_node_to_idx[var] node_to_idx = var_to_app_to_idx[var]
for node in node_to_idx: for node in node_to_idx:
for idx in node_to_idx[node]: for idx in node_to_idx[node]:
......
...@@ -8,6 +8,8 @@ import logging ...@@ -8,6 +8,8 @@ import logging
import os import os
import StringIO import StringIO
import sys import sys
# Not available on all platforms
hashlib = None
import numpy import numpy
...@@ -1069,3 +1071,78 @@ def min_informative_str(obj, indent_level=0, ...@@ -1069,3 +1071,78 @@ def min_informative_str(obj, indent_level=0,
rval = indent + prefix + name rval = indent + prefix + name
return rval return rval
def var_descriptor(obj, _prev_obs=None, _tag_generator=None):
"""
Returns a string, with no endlines, fully specifying
how a variable is computed. Does not include any memory
location dependent information such as the id of a node.
"""
if hashlib is None:
try:
import hashlib
except ImportError:
raise RuntimeError("Can't run var_descriptor because hashlib is not available.")
if _prev_obs is None:
_prev_obs = {}
if id(obj) in _prev_obs:
tag = _prev_obs[id(obj)]
return '<' + tag + '>'
if _tag_generator is None:
_tag_generator = _TagGenerator()
cur_tag = _tag_generator.get_tag()
_prev_obs[id(obj)] = cur_tag
if hasattr(obj, '__array__'):
# hashlib hashes only the contents of the buffer, but
# it can have different semantics depending on the strides
# of the ndarray
name = '<ndarray:'
name += 'strides=['+','.join(str(stride) for stride in obj.strides)+']'
name += ',digest='+hashlib.md5(obj).hexdigest()+'>'
elif hasattr(obj, 'name') and obj.name is not None:
name = obj.name
elif hasattr(obj, 'owner') and obj.owner is not None:
name = str(obj.owner.op) + '('
name += ','.join(var_descriptor(ipt,
_prev_obs=_prev_obs, _tag_generator=_tag_generator) for ipt
in obj.owner.inputs)
name += ')'
else:
name = str(obj)
if ' at 0x' in name:
# The __str__ method is encoding the object's id in its str
name = position_independent_str(obj)
if ' at 0x' in name:
print name
assert False
prefix = cur_tag + '='
rval = prefix + name
return rval
def position_independent_str(obj):
if isinstance(obj, theano.gof.graph.Variable):
rval = 'theano_var'
rval += '{type='+str(obj.type)+'}'
else:
raise NotImplementedError()
return rval
...@@ -4666,6 +4666,33 @@ int fprint_CudaNdarray(FILE * fd, const CudaNdarray *self) ...@@ -4666,6 +4666,33 @@ int fprint_CudaNdarray(FILE * fd, const CudaNdarray *self)
return 0; return 0;
} }
int CudaNdarray_prep_output(CudaNdarray ** arr, int nd,
const int * dims)
{
bool allocated = false;
if (*arr == NULL)
{
// This allocates the metadata but not the data
*arr = (CudaNdarray *) CudaNdarray_new_nd(nd);
if (*arr == NULL)
return -1;
allocated = true;
}
if (CudaNdarray_alloc_contiguous(*arr, nd, dims))
{
if (allocated)
{
Py_DECREF(*arr);
*arr = NULL;
}
return -1;
}
return 0;
}
/* /*
Local Variables: Local Variables:
mode:c++ mode:c++
......
...@@ -149,11 +149,11 @@ DllExport int ...@@ -149,11 +149,11 @@ DllExport int
CudaNdarray_Equal(CudaNdarray *cnda1, CudaNdarray *cnda2); CudaNdarray_Equal(CudaNdarray *cnda1, CudaNdarray *cnda2);
/**** /****
* Set the idx'th dimension to value d. * Set the dimension[idx] to value d.
* *
* Updates the log2dim shadow array. * Updates the log2dim shadow array.
* *
* Does not sync structure to host. * Does not sync structure to device.
*/ */
DllExport inline void __attribute__((always_inline)) DllExport inline void __attribute__((always_inline))
CudaNdarray_set_dim(CudaNdarray * self, int idx, int d) CudaNdarray_set_dim(CudaNdarray * self, int idx, int d)
...@@ -229,7 +229,8 @@ DllExport PyObject * CudaNdarray_new_nd(const int nd); ...@@ -229,7 +229,8 @@ DllExport PyObject * CudaNdarray_new_nd(const int nd);
/** /**
* [Re]allocate a CudaNdarray with access to 'nd' dimensions. * [Re]allocate a CudaNdarray with access to 'nd' dimensions.
* *
* Note: This does not allocate storage for data. * Note: This does not allocate storage for data, or free
* pre-existing storage.
*/ */
DllExport inline int __attribute__((always_inline)) DllExport inline int __attribute__((always_inline))
CudaNdarray_set_nd(CudaNdarray * self, const int nd) CudaNdarray_set_nd(CudaNdarray * self, const int nd)
...@@ -276,6 +277,7 @@ CudaNdarray_set_nd(CudaNdarray * self, const int nd) ...@@ -276,6 +277,7 @@ CudaNdarray_set_nd(CudaNdarray * self, const int nd)
* CudaNdarray_alloc_contiguous * CudaNdarray_alloc_contiguous
* *
* Allocate storage space for a tensor of rank 'nd' and given dimensions. * Allocate storage space for a tensor of rank 'nd' and given dimensions.
* (No-op if self already has a contiguous tensor of the right dimensions)
* *
* Note: CudaNdarray_alloc_contiguous is templated to work for both int dimensions and npy_intp dimensions * Note: CudaNdarray_alloc_contiguous is templated to work for both int dimensions and npy_intp dimensions
*/ */
...@@ -286,13 +288,13 @@ static int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd, const i ...@@ -286,13 +288,13 @@ static int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd, const i
// return 0 on success // return 0 on success
int size = 1; //set up the strides for contiguous tensor int size = 1; //set up the strides for contiguous tensor
assert (nd >= 0); assert (nd >= 0);
// Here we modify the host structure to have the desired shape and
// strides. This does not cause the storage to be freed or reallocated.
if (CudaNdarray_set_nd(self, nd)) if (CudaNdarray_set_nd(self, nd))
{ {
return -1; return -1;
} }
//TODO: check if by any chance our current dims are correct,
// and strides already contiguous
// in that case we can return right here.
for (int i = nd-1; i >= 0; --i) for (int i = nd-1; i >= 0; --i)
{ {
CudaNdarray_set_stride(self, i, (dim[i] == 1) ? 0 : size); CudaNdarray_set_stride(self, i, (dim[i] == 1) ? 0 : size);
...@@ -300,7 +302,11 @@ static int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd, const i ...@@ -300,7 +302,11 @@ static int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd, const i
size = size * dim[i]; size = size * dim[i];
} }
if ((self->data_allocated == size) && CudaNdarray_is_c_contiguous(self)) // If the allocated buffer is already of the right size, we don't need to
// do anything else.
// Note: self->data_allocated is 0 for a view, so views will fail this
// check and be turned into independent arrays below.
if (self->data_allocated == size)
{ {
return 0; return 0;
} }
...@@ -468,6 +474,15 @@ PyObject * CudaNdarray_View(const CudaNdarray * self); ...@@ -468,6 +474,15 @@ PyObject * CudaNdarray_View(const CudaNdarray * self);
PyObject * CudaNdarray_inplace_add(PyObject* py_self, PyObject * py_other); PyObject * CudaNdarray_inplace_add(PyObject* py_self, PyObject * py_other);
// Ensures that *arr is a pointer to a contiguous ndarray of the specified
// dimensions.
// *arr may initially be NULL, a pointer to an ndarray of the wrong size,
// or a pointer to an ndarray of the right size. In the last case it will
// not change.
int CudaNdarray_prep_output(CudaNdarray ** arr, int nd,
const int * dims);
#endif #endif
/* /*
Local Variables: Local Variables:
......
...@@ -869,5 +869,5 @@ def test_stack_rows_segfault_070312(): ...@@ -869,5 +869,5 @@ def test_stack_rows_segfault_070312():
out = theano.shared(numpy.random.rand(1, 2, 2, 3).astype('float32')) out = theano.shared(numpy.random.rand(1, 2, 2, 3).astype('float32'))
op = theano.tensor.nnet.conv.ConvOp(imshp=(80, 96, 96), kshp=(9, 9), op = theano.tensor.nnet.conv.ConvOp(imshp=(80, 96, 96), kshp=(9, 9),
nkern=1, bsize=1) nkern=1, bsize=1)
f = theano.function([], [], updates={out: op(img, kern)}) f = theano.function([], [], updates=[(out, op(img, kern))])
f() f()
...@@ -106,7 +106,7 @@ def test_alloc_memset_0(): ...@@ -106,7 +106,7 @@ def test_alloc_memset_0():
def test_gpuspecifyshape(): def test_gpuspecifyshape():
x = cuda.shared_constructor(numpy.ones(3,dtype='float32'), 'x') x = cuda.shared_constructor(numpy.ones(3,dtype='float32'), 'x')
m = theano.tensor.specify_shape(x + numpy.float32(1), (3,)) m = theano.tensor.specify_shape(x + numpy.float32(1), (3,))
f = theano.function([], updates={x:m * numpy.float32(2)}, f = theano.function([], updates=[(x, m * numpy.float32(2))],
mode=mode_with_gpu) mode=mode_with_gpu)
l = f.maker.fgraph.toposort() l = f.maker.fgraph.toposort()
assert not numpy.any([isinstance(x.op, cuda.HostFromGpu) for x in l]) assert not numpy.any([isinstance(x.op, cuda.HostFromGpu) for x in l])
......
...@@ -60,11 +60,11 @@ class T_updates(unittest.TestCase): ...@@ -60,11 +60,11 @@ class T_updates(unittest.TestCase):
data = numpy.float32([1, 2, 3, 4]) data = numpy.float32([1, 2, 3, 4])
x = f32sc(data) x = f32sc(data)
y = x ** 2 y = x ** 2
f = theano.function([], y, updates={x: x + 1}) f = theano.function([], y, updates=[(x, x + 1)])
f() f()
# Test that we can update with a CudaVariable # Test that we can update with a CudaVariable
f = theano.function([], y, updates={x: cuda.gpu_from_host(x + 1)}) f = theano.function([], y, updates=[(x, cuda.gpu_from_host(x + 1))])
f() f()
def test_2(self): def test_2(self):
...@@ -74,7 +74,7 @@ class T_updates(unittest.TestCase): ...@@ -74,7 +74,7 @@ class T_updates(unittest.TestCase):
value=numpy.zeros((10, 10), 'float32')) value=numpy.zeros((10, 10), 'float32'))
x = tensor.fmatrix('x') x = tensor.fmatrix('x')
output_updates = {output_var: x ** 2} output_updates = [(output_var, x ** 2)]
output_givens = {x: data} output_givens = {x: data}
output_func = theano.function(inputs=[], outputs=[], output_func = theano.function(inputs=[], outputs=[],
updates=output_updates, givens=output_givens) updates=output_updates, givens=output_givens)
...@@ -89,8 +89,8 @@ class T_updates(unittest.TestCase): ...@@ -89,8 +89,8 @@ class T_updates(unittest.TestCase):
# the update_var has type matrix, and the update expression # the update_var has type matrix, and the update expression
# is a broadcasted scalar, and that should not be allowed. # is a broadcasted scalar, and that should not be allowed.
self.assertRaises(TypeError, theano.function, inputs=[], outputs=[], self.assertRaises(TypeError, theano.function, inputs=[], outputs=[],
updates={output_var: updates=[(output_var,
output_var.sum()}) output_var.sum())])
def test_err_broadcast(self): def test_err_broadcast(self):
# Test that we raise a good error message when we don't # Test that we raise a good error message when we don't
...@@ -101,8 +101,8 @@ class T_updates(unittest.TestCase): ...@@ -101,8 +101,8 @@ class T_updates(unittest.TestCase):
# the update_var has type matrix, and the update expression # the update_var has type matrix, and the update expression
# is a broadcasted scalar, and that should not be allowed. # is a broadcasted scalar, and that should not be allowed.
self.assertRaises(TypeError, theano.function, inputs=[], outputs=[], self.assertRaises(TypeError, theano.function, inputs=[], outputs=[],
updates={output_var: updates=[(output_var,
output_var.sum().dimshuffle('x', 'x')}) output_var.sum().dimshuffle('x', 'x'))])
def test_broadcast(self): def test_broadcast(self):
# Test that we can rebroadcast # Test that we can rebroadcast
...@@ -111,11 +111,11 @@ class T_updates(unittest.TestCase): ...@@ -111,11 +111,11 @@ class T_updates(unittest.TestCase):
up = tensor.unbroadcast(output_var.sum().dimshuffle('x', 'x'), 0, 1) up = tensor.unbroadcast(output_var.sum().dimshuffle('x', 'x'), 0, 1)
output_func = theano.function(inputs=[], outputs=[], output_func = theano.function(inputs=[], outputs=[],
updates={output_var: up}) updates=[(output_var, up)])
output_func() output_func()
up = tensor.patternbroadcast(output_var.sum().dimshuffle('x', 'x'), up = tensor.patternbroadcast(output_var.sum().dimshuffle('x', 'x'),
output_var.type.broadcastable) output_var.type.broadcastable)
output_func = theano.function(inputs=[], outputs=[], output_func = theano.function(inputs=[], outputs=[],
updates={output_var: up}) updates=[(output_var, up)])
output_func() output_func()
from ops import (cholesky, matrix_inverse, solve, from ops import (cholesky, matrix_inverse, solve,
diag, extract_diag, alloc_diag, diag, extract_diag, alloc_diag,
det, psd, eig, det, psd, eig, eigh,
trace, spectral_radius_bound) trace, spectral_radius_bound)
...@@ -12,6 +12,7 @@ from theano.tensor.opt import (register_stabilize, ...@@ -12,6 +12,7 @@ from theano.tensor.opt import (register_stabilize,
register_specialize, register_canonicalize) register_specialize, register_canonicalize)
from theano.gof import local_optimizer from theano.gof import local_optimizer
from theano.gof.opt import Optimizer from theano.gof.opt import Optimizer
from theano.gradient import grad_not_implemented, DisconnectedType
try: try:
import scipy.linalg import scipy.linalg
...@@ -395,6 +396,8 @@ cholesky = Cholesky() ...@@ -395,6 +396,8 @@ cholesky = Cholesky()
class CholeskyGrad(Op): class CholeskyGrad(Op):
"""
"""
def __init__(self, lower=True): def __init__(self, lower=True):
self.lower = lower self.lower = lower
self.destructive = False self.destructive = False
...@@ -487,7 +490,7 @@ class MatrixPinv(Op): ...@@ -487,7 +490,7 @@ class MatrixPinv(Op):
This method is not faster then `matrix_inverse`. Its strength comes from This method is not faster then `matrix_inverse`. Its strength comes from
that it works for non-square matrices. that it works for non-square matrices.
If you have a square matrix though, `matrix_inverse` can be both more If you have a square matrix though, `matrix_inverse` can be both more
exact and faster to compute. Aslo this op does not get optimized into a exact and faster to compute. Also this op does not get optimized into a
solve op. solve op.
""" """
def __init__(self): def __init__(self):
...@@ -880,9 +883,7 @@ class Eig(Op): ...@@ -880,9 +883,7 @@ class Eig(Op):
"""Compute the eigenvalues and right eigenvectors of a square array. """Compute the eigenvalues and right eigenvectors of a square array.
""" """
_numop = staticmethod(numpy.linalg.eig)
def __init__(self):
pass
def props(self): def props(self):
"""Function exposing different properties of each instance of the """Function exposing different properties of each instance of the
...@@ -900,15 +901,17 @@ class Eig(Op): ...@@ -900,15 +901,17 @@ class Eig(Op):
def make_node(self, x): def make_node(self, x):
x = as_tensor_variable(x) x = as_tensor_variable(x)
assert x.ndim == 2
w = theano.tensor.vector(dtype=x.dtype) w = theano.tensor.vector(dtype=x.dtype)
v = theano.tensor.matrix(dtype=x.dtype) v = theano.tensor.matrix(dtype=x.dtype)
return Apply(self, [x], [w, v]) return Apply(self, [x], [w, v])
def perform(self, node, (x,), (w, v)): def perform(self, node, (x,), (w, v)):
try: try:
w[0], v[0] = [z.astype(x.dtype) for z in numpy.linalg.eig(x)] w[0], v[0] = [z.astype(x.dtype) for z in self._numop(x)]
except numpy.linalg.LinAlgError: except numpy.linalg.LinAlgError:
logger.debug('Failed to find eig of %s' % str(node.inputs[0])) logger.debug('Failed to find %s of %s' % (self._numop.__name__,
node.inputs[0]))
raise raise
def infer_shape(self, node, shapes): def infer_shape(self, node, shapes):
...@@ -916,6 +919,138 @@ class Eig(Op): ...@@ -916,6 +919,138 @@ class Eig(Op):
return [(n,), (n,n)] return [(n,), (n,n)]
def __str__(self): def __str__(self):
return "Eig" return self._numop.__name__.capitalize()
eig = Eig() eig = Eig()
def _zero_disconnected(outputs, grads):
return [o.zeros_like()
if isinstance(g.type, DisconnectedType) else g
for o, g in zip(outputs, grads)]
class Eigh(Eig):
"""
Return the eigenvalues and eigenvectors of a Hermitian or symmetric matrix.
"""
_numop = staticmethod(numpy.linalg.eigh)
def __init__(self, UPLO='L'):
self.UPLO = UPLO
def __str__(self):
return 'Eigh{%s}' % self.UPLO
def props(self):
return self.UPLO,
def make_node(self, x):
x = as_tensor_variable(x)
assert x.ndim == 2
# Numpy's linalg.eigh may return either double or single
# presision eigenvalues depending on installed version of
# LAPACK. Rather than trying to reproduce the (rather
# involved) logic, we just probe linalg.eigh with a trivial
# input.
w_dtype = self._numop([[numpy.dtype(x.dtype).type()]])[0].dtype.name
w = theano.tensor.vector(dtype=w_dtype)
v = theano.tensor.matrix(dtype=x.dtype)
return Apply(self, [x], [w, v])
def perform(self, node, (x,), (w, v)):
try:
w[0], v[0] = self._numop(x, self.UPLO)
except numpy.linalg.LinAlgError:
logger.debug('Failed to find %s of %s' % (self._numop.__name__,
node.inputs[0]))
raise
def grad(self, inputs, g_outputs):
r"""The gradient function should return
.. math:: \sum_n\left(W_n\frac{\partial\,w_n}
{\partial a_{ij}} +
\sum_k V_{nk}\frac{\partial\,v_{nk}}
{\partial a_{ij}}\right),
where [:math:`W`, :math:`V`] corresponds to ``g_outputs``,
:math:`a` to ``inputs``, and :math:`(w, v)=\mbox{eig}(a)`.
Analytic formulae for eigensystem gradients are well-known in
perturbation theory:
.. math:: \frac{\partial\,w_n}
{\partial a_{ij}} = v_{in}\,v_{jn}
.. math:: \frac{\partial\,v_{kn}}
{\partial a_{ij}} =
\sum_{m\ne n}\frac{v_{km}v_{jn}}{w_n-w_m}
"""
x, = inputs
w, v = self(x)
# Replace gradients wrt disconnected variables with
# zeros. This is a work-around for issue #1063.
gw, gv = _zero_disconnected([w, v], g_outputs)
return [EighGrad(self.UPLO)(x, w, v, gw, gv)]
def eigh(a, UPLO='L'):
return Eigh(UPLO)(a)
class EighGrad(Op):
"""Gradient of an eigensystem of a Hermitian matrix.
"""
def __init__(self, UPLO='L'):
self.UPLO = UPLO
if UPLO == 'L':
self.tri0 = numpy.tril
self.tri1 = lambda a: numpy.triu(a, 1)
else:
self.tri0 = numpy.triu
self.tri1 = lambda a: numpy.tril(a, -1)
def props(self):
return ()
def __hash__(self):
return hash((type(self), self.props()))
def __eq__(self, other):
return (type(self) == type(other) and self.props() == other.props())
def __str__(self):
return 'EighGrad{%s}' % self.UPLO
def make_node(self, x, w, v, gw, gv):
x, w, v, gw, gv = map(as_tensor_variable, (x, w, v, gw, gv))
return Apply(self, [x, w, v, gw, gv], [x.type()])
def perform(self, node, inputs, outputs):
r"""
Implements the "reverse-mode" gradient for the eigensystem of
a square matrix.
"""
x, w, v, W, V = inputs
N = x.shape[0]
outer = numpy.outer
G = lambda n: sum(v[:,m]*V.T[n].dot(v[:,m])/(w[n]-w[m])
for m in xrange(N) if m != n)
g = sum(outer(v[:,n], v[:,n]*W[n] + G(n))
for n in xrange(N))
# Numpy's eigh(a, 'L') (eigh(a, 'U')) is a function of tril(a)
# (triu(a)) only. This means that partial derivative of
# eigh(a, 'L') (eigh(a, 'U')) with respect to a[i,j] is zero
# for i < j (i > j). At the same time, non-zero components of
# the gradient must account for the fact that variation of the
# opposite triangle contributes to variation of two elements
# of Hermitian (symmetric) matrix. The following line
# implements the necessary logic.
outputs[0][0] = self.tri0(g) + self.tri1(g).T
def infer_shape(self, node, shapes):
return [shapes[0]]
...@@ -29,7 +29,7 @@ from theano.sandbox.linalg.ops import (cholesky, ...@@ -29,7 +29,7 @@ from theano.sandbox.linalg.ops import (cholesky,
imported_scipy, imported_scipy,
Eig, Eig,
) )
from theano.sandbox.linalg import eig, eigh
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
...@@ -471,29 +471,51 @@ class test_Solve(utt.InferShapeTester): ...@@ -471,29 +471,51 @@ class test_Solve(utt.InferShapeTester):
self.op_class) self.op_class)
class test_Eig(utt.InferShapeTester): class test_Eig(utt.InferShapeTester):
op_class = Eig
op = eig
dtype = 'float64'
def setUp(self): def setUp(self):
super(test_Eig, self).setUp() super(test_Eig, self).setUp()
self.op_class = Eig self.rng = numpy.random.RandomState(utt.fetch_seed())
self.op = Eig() self.A = theano.tensor.matrix(dtype=self.dtype)
X = numpy.asarray(self.rng.rand(5, 5),
dtype=self.dtype)
self.S = X.dot(X.T)
def test_infer_shape(self): def test_infer_shape(self):
rng = numpy.random.RandomState(utt.fetch_seed()) A = self.A
A = theano.tensor.matrix() S = self.S
X = numpy.asarray(rng.rand(5, 5),
dtype=config.floatX)
self._compile_and_check([A], # theano.function inputs self._compile_and_check([A], # theano.function inputs
self.op(A), # theano.function outputs self.op(A), # theano.function outputs
# A must be square # S must be square
[X.dot(X.T)], [S],
self.op_class) self.op_class)
def test_eval(self): def test_eval(self):
import math import math
A = theano.tensor.matrix() A = theano.tensor.matrix(dtype=self.dtype)
self.assertEquals([e.eval({A: [[1]]}) for e in self.op(A)], self.assertEquals([e.eval({A: [[1]]}) for e in self.op(A)],
[[1.0], [[1.0]]]) [[1.0], [[1.0]]])
x = [[0, 1], [1, 0]]
w, v = [e.eval({A: [[0, 1], [1, 0]]}) w, v = [e.eval({A: x}) for e in self.op(A)]
for e in self.op(A)] assert_array_almost_equal(numpy.dot(x,v), w * v)
assert_array_almost_equal(w, [1, -1])
x = math.sqrt(2)/2 class test_Eigh(test_Eig):
assert_array_almost_equal(v, [[x, -x], [x, x]]) op = staticmethod(eigh)
def test_uplo(self):
S = self.S
a = theano.tensor.matrix()
wu, vu = [out.eval({a: S}) for out in self.op(a, 'U')]
wl, vl = [out.eval({a: S}) for out in self.op(a, 'L')]
assert_array_almost_equal(wu, wl)
assert_array_almost_equal(vu*numpy.sign(vu[0,:]),
vl*numpy.sign(vl[0,:]))
def test_grad(self):
S = self.S
utt.verify_grad(lambda x: self.op(x)[0], [S], rng=self.rng)
utt.verify_grad(lambda x: self.op(x)[1], [S], rng=self.rng)
utt.verify_grad(lambda x: self.op(x, 'U')[0], [S], rng=self.rng)
utt.verify_grad(lambda x: self.op(x, 'U')[1], [S], rng=self.rng)
class test_Eigh_float32(test_Eigh):
dtype = 'float32'
...@@ -13,14 +13,16 @@ __contact__ = "Razvan Pascanu <r.pascanu@gmail>" ...@@ -13,14 +13,16 @@ __contact__ = "Razvan Pascanu <r.pascanu@gmail>"
import itertools import itertools
import logging import logging
import numpy import numpy
import warnings
from theano.compile import SharedVariable, function from theano.compile import SharedVariable, function
from theano import compile from theano import compile
from theano import gof from theano import gof
from theano.gof.python25 import OrderedDict
from theano.tensor import opt from theano.tensor import opt
from theano import tensor from theano import tensor
from theano import config from theano import config
from theano.updates import Updates from theano.updates import OrderedUpdates
from theano.scan_module import scan_op from theano.scan_module import scan_op
...@@ -147,7 +149,7 @@ def scan(fn, ...@@ -147,7 +149,7 @@ def scan(fn,
n_seqs = len(seqs) n_seqs = len(seqs)
n_outs = len(outs_info) n_outs = len(outs_info)
return_steps = {} return_steps = OrderedDict()
# wrap outputs info in a dictionary if they are not already in one # wrap outputs info in a dictionary if they are not already in one
for i in xrange(n_outs): for i in xrange(n_outs):
if outs_info[i] is not None: if outs_info[i] is not None:
...@@ -242,7 +244,7 @@ def scan(fn, ...@@ -242,7 +244,7 @@ def scan(fn,
mit_sot_inner_inputs = [] mit_sot_inner_inputs = []
mit_sot_inner_slices = [] mit_sot_inner_slices = []
mit_sot_inner_outputs = [] mit_sot_inner_outputs = []
mit_sot_return_steps = {} mit_sot_return_steps = OrderedDict()
mit_sot_tap_array = [] mit_sot_tap_array = []
mit_sot_rightOrder = [] mit_sot_rightOrder = []
...@@ -251,7 +253,7 @@ def scan(fn, ...@@ -251,7 +253,7 @@ def scan(fn,
sit_sot_inner_inputs = [] sit_sot_inner_inputs = []
sit_sot_inner_slices = [] sit_sot_inner_slices = []
sit_sot_inner_outputs = [] sit_sot_inner_outputs = []
sit_sot_return_steps = {} sit_sot_return_steps = OrderedDict()
sit_sot_rightOrder = [] sit_sot_rightOrder = []
nit_sot_steps = [] nit_sot_steps = []
# go through outputs picking up time slices as needed # go through outputs picking up time slices as needed
...@@ -398,7 +400,8 @@ def scan(fn, ...@@ -398,7 +400,8 @@ def scan(fn,
not isinstance(arg, tensor.Constant))] not isinstance(arg, tensor.Constant))]
# when we apply the lambda expression we get a mixture of update rules # when we apply the lambda expression we get a mixture of update rules
# and outputs that needs to be separated # and outputs that needs to be separated
condition, outputs, updates = scan_utils.get_updates_and_outputs(fn(*args)) lambda_result = fn(*args)
condition, outputs, updates = scan_utils.get_updates_and_outputs(lambda_result)
if condition is not None: if condition is not None:
as_while = True as_while = True
else: else:
...@@ -464,6 +467,13 @@ def scan(fn, ...@@ -464,6 +467,13 @@ def scan(fn,
dummy_outs = outputs dummy_outs = outputs
if condition is not None: if condition is not None:
dummy_outs.append(condition) dummy_outs.append(condition)
# If we use a regular dict here, the results are non-deterministic
if not isinstance(updates, (list, tuple)):
if isinstance(updates, dict) and \
not isinstance(updates, gof.python25.OrderedDict):
warnings.warn("Using non-deterministic dictionary.")
dummy_f = function(dummy_args, dummy_f = function(dummy_args,
dummy_outs, dummy_outs,
updates=updates, updates=updates,
...@@ -508,7 +518,7 @@ def scan(fn, ...@@ -508,7 +518,7 @@ def scan(fn,
sit_sot_inner_outputs.append(outputs[i]) sit_sot_inner_outputs.append(outputs[i])
## Step 5.3 Outputs that correspond to update rules of shared variables ## Step 5.3 Outputs that correspond to update rules of shared variables
givens = {} givens = OrderedDict()
n_shared_outs = 0 n_shared_outs = 0
shared_scan_inputs = [] shared_scan_inputs = []
shared_inner_inputs = [] shared_inner_inputs = []
...@@ -527,7 +537,7 @@ def scan(fn, ...@@ -527,7 +537,7 @@ def scan(fn,
## Step 5.4 Outputs with no taps used in the input ## Step 5.4 Outputs with no taps used in the input
n_nit_sot = 0 n_nit_sot = 0
nit_sot_inner_outputs = [] nit_sot_inner_outputs = []
nit_sot_return_steps = {} nit_sot_return_steps = OrderedDict()
nit_sot_rightOrder = [] nit_sot_rightOrder = []
for i, out in enumerate(outs_info): for i, out in enumerate(outs_info):
if not 'taps' in out: if not 'taps' in out:
...@@ -582,7 +592,7 @@ def scan(fn, ...@@ -582,7 +592,7 @@ def scan(fn,
shared_inner_outputs) shared_inner_outputs)
if condition is not None: if condition is not None:
inner_outs.append(condition) inner_outs.append(condition)
new_givens = {} new_givens = OrderedDict()
for w, w_copy in givens.iteritems(): for w, w_copy in givens.iteritems():
new_givens[w] = w.type.filter_variable(w_copy) new_givens[w] = w.type.filter_variable(w_copy)
...@@ -593,7 +603,7 @@ def scan(fn, ...@@ -593,7 +603,7 @@ def scan(fn,
## ##
tap_array = mit_sot_tap_array + [[-1] for x in xrange(n_sit_sot)] tap_array = mit_sot_tap_array + [[-1] for x in xrange(n_sit_sot)]
info = {} info = OrderedDict()
info['tap_array'] = tap_array info['tap_array'] = tap_array
info['n_seqs'] = n_seqs info['n_seqs'] = n_seqs
...@@ -607,7 +617,7 @@ def scan(fn, ...@@ -607,7 +617,7 @@ def scan(fn,
info['truncate_gradient'] = -1 info['truncate_gradient'] = -1
info['name'] = name info['name'] = name
info['mode'] = mode info['mode'] = mode
info['destroy_map'] = {} info['destroy_map'] = OrderedDict()
info['inplace'] = False info['inplace'] = False
info['gpu'] = False info['gpu'] = False
info['as_while'] = as_while info['as_while'] = as_while
...@@ -641,7 +651,7 @@ def scan(fn, ...@@ -641,7 +651,7 @@ def scan(fn,
### and so on ... ### and so on ...
## ##
update_map = Updates() update_map = OrderedUpdates()
offset = n_mit_mot offset = n_mit_mot
offsets = [abs(numpy.min(x)) for x in mit_sot_tap_array] offsets = [abs(numpy.min(x)) for x in mit_sot_tap_array]
...@@ -675,4 +685,5 @@ def scan(fn, ...@@ -675,4 +685,5 @@ def scan(fn,
elif len(scan_out_list) == 0: elif len(scan_out_list) == 0:
scan_out_list = None scan_out_list = None
assert isinstance(update_map, dict) and 'Ordered' in str(type(update_map))
return (scan_out_list, update_map) return (scan_out_list, update_map)
...@@ -46,17 +46,12 @@ from itertools import izip ...@@ -46,17 +46,12 @@ from itertools import izip
import logging import logging
import numpy import numpy
from theano.compile import SharedVariable, function
from theano import compile
from theano import gof from theano import gof
from theano.tensor import opt, TensorVariable from theano.tensor import opt, TensorVariable
from theano.tensor.sharedvar import TensorSharedVariable from theano.tensor.sharedvar import TensorSharedVariable
from theano import tensor from theano import tensor
from theano import config
from theano.updates import Updates
from theano.scalar.sharedvar import shared as scalar_shared from theano.scalar.sharedvar import shared as scalar_shared
from theano.compile.pfunc import rebuild_collect_shared from theano.compile.pfunc import rebuild_collect_shared
import theano
import scan_op import scan_op
import scan_utils import scan_utils
......
...@@ -52,8 +52,9 @@ from theano import gof ...@@ -52,8 +52,9 @@ from theano import gof
from theano.tensor import opt from theano.tensor import opt
from theano import tensor from theano import tensor
from theano import config from theano import config
from theano.updates import Updates from theano.updates import OrderedUpdates
from theano.compile import ops from theano.compile import ops
from theano.gof.python25 import OrderedDict
import scan_op import scan_op
...@@ -112,7 +113,7 @@ def scan(fn, ...@@ -112,7 +113,7 @@ def scan(fn,
, outputs_info = [ dict(initial = Output1, taps = [-3,-5]) , outputs_info = [ dict(initial = Output1, taps = [-3,-5])
, dict(initial = Output2, taps = None) , dict(initial = Output2, taps = None)
, Output3 ] , Output3 ]
, non_sequences = [ Argument1, Argument 2]) , non_sequences = [ Argument1, Argument2])
``fn`` should expect the following arguments in this given order: ``fn`` should expect the following arguments in this given order:
...@@ -376,11 +377,11 @@ def scan(fn, ...@@ -376,11 +377,11 @@ def scan(fn,
n_seqs = len(seqs) n_seqs = len(seqs)
n_outs = len(outs_info) n_outs = len(outs_info)
return_steps = {} return_steps = OrderedDict()
# wrap sequences in a dictionary if they are not already dictionaries # wrap sequences in a dictionary if they are not already dictionaries
for i in xrange(n_seqs): for i in xrange(n_seqs):
if not isinstance(seqs[i], dict): if not isinstance(seqs[i], dict):
seqs[i] = dict(input=seqs[i], taps=[0]) seqs[i] = OrderedDict([('input', seqs[i]), ('taps', [0])])
elif seqs[i].get('taps', None): elif seqs[i].get('taps', None):
seqs[i]['taps'] = wrap_into_list(seqs[i]['taps']) seqs[i]['taps'] = wrap_into_list(seqs[i]['taps'])
elif seqs[i].get('taps', True) is None: elif seqs[i].get('taps', True) is None:
...@@ -402,7 +403,7 @@ def scan(fn, ...@@ -402,7 +403,7 @@ def scan(fn,
if not isinstance(outs_info[i], dict): if not isinstance(outs_info[i], dict):
# by default any output has a tap value of -1 # by default any output has a tap value of -1
outs_info[i] = dict(initial=outs_info[i], taps=[-1]) outs_info[i] = OrderedDict([('initial', outs_info[i]), ('taps', [-1])])
elif (not outs_info[i].get('initial', None) and elif (not outs_info[i].get('initial', None) and
outs_info[i].get('taps', None)): outs_info[i].get('taps', None)):
# ^ no initial state but taps provided # ^ no initial state but taps provided
...@@ -421,8 +422,8 @@ def scan(fn, ...@@ -421,8 +422,8 @@ def scan(fn,
outs_info[i]['taps'] = [-1] outs_info[i]['taps'] = [-1]
else: else:
# if a None is provided as the output info we replace it # if a None is provided as the output info we replace it
# with an empty dict() to simplify handling # with an empty OrdereDict() to simplify handling
outs_info[i] = dict() outs_info[i] = OrderedDict()
## ##
### Step 2. Generate inputs and outputs of the inner functions ### Step 2. Generate inputs and outputs of the inner functions
...@@ -565,7 +566,7 @@ def scan(fn, ...@@ -565,7 +566,7 @@ def scan(fn,
mit_sot_inner_inputs = [] mit_sot_inner_inputs = []
mit_sot_inner_slices = [] mit_sot_inner_slices = []
mit_sot_inner_outputs = [] mit_sot_inner_outputs = []
mit_sot_return_steps = {} mit_sot_return_steps = OrderedDict()
mit_sot_tap_array = [] mit_sot_tap_array = []
mit_sot_rightOrder = [] mit_sot_rightOrder = []
...@@ -574,7 +575,7 @@ def scan(fn, ...@@ -574,7 +575,7 @@ def scan(fn,
sit_sot_inner_inputs = [] sit_sot_inner_inputs = []
sit_sot_inner_slices = [] sit_sot_inner_slices = []
sit_sot_inner_outputs = [] sit_sot_inner_outputs = []
sit_sot_return_steps = {} sit_sot_return_steps = OrderedDict()
sit_sot_rightOrder = [] sit_sot_rightOrder = []
# go through outputs picking up time slices as needed # go through outputs picking up time slices as needed
...@@ -777,7 +778,7 @@ def scan(fn, ...@@ -777,7 +778,7 @@ def scan(fn,
# as non sequences at the end of our args # as non sequences at the end of our args
fake_nonseqs = [x.type() for x in non_seqs] fake_nonseqs = [x.type() for x in non_seqs]
fake_outputs = scan_utils.clone(outputs, fake_outputs = scan_utils.clone(outputs,
replace=dict(zip(non_seqs, replace=OrderedDict(zip(non_seqs,
fake_nonseqs))) fake_nonseqs)))
all_inputs = itertools.ifilter( all_inputs = itertools.ifilter(
lambda x: (isinstance(x, gof.Variable) and lambda x: (isinstance(x, gof.Variable) and
...@@ -825,7 +826,7 @@ def scan(fn, ...@@ -825,7 +826,7 @@ def scan(fn,
n_outs = len(dummy_f.maker.outputs) n_outs = len(dummy_f.maker.outputs)
if as_while: if as_while:
n_outs = n_outs - 1 n_outs = n_outs - 1
outs_info = [dict() for x in xrange(n_outs)] outs_info = [OrderedDict() for x in xrange(n_outs)]
## Step 5.1 Outputs with taps different then -1 ## Step 5.1 Outputs with taps different then -1
...@@ -839,7 +840,7 @@ def scan(fn, ...@@ -839,7 +840,7 @@ def scan(fn,
sit_sot_inner_outputs.append(outputs[i]) sit_sot_inner_outputs.append(outputs[i])
## Step 5.3 Outputs that correspond to update rules of shared variables ## Step 5.3 Outputs that correspond to update rules of shared variables
givens = {} givens = OrderedDict()
n_shared_outs = 0 n_shared_outs = 0
shared_scan_inputs = [] shared_scan_inputs = []
shared_inner_inputs = [] shared_inner_inputs = []
...@@ -879,7 +880,7 @@ def scan(fn, ...@@ -879,7 +880,7 @@ def scan(fn,
## Step 5.4 Outputs with no taps used in the input ## Step 5.4 Outputs with no taps used in the input
n_nit_sot = 0 n_nit_sot = 0
nit_sot_inner_outputs = [] nit_sot_inner_outputs = []
nit_sot_return_steps = {} nit_sot_return_steps = OrderedDict()
nit_sot_rightOrder = [] nit_sot_rightOrder = []
for i, out in enumerate(outs_info): for i, out in enumerate(outs_info):
if not 'taps' in out: if not 'taps' in out:
...@@ -902,7 +903,7 @@ def scan(fn, ...@@ -902,7 +903,7 @@ def scan(fn,
if (not isinstance(arg, SharedVariable) and if (not isinstance(arg, SharedVariable) and
not isinstance(arg, tensor.Constant))] not isinstance(arg, tensor.Constant))]
givens.update(dict(zip(other_scan_args, other_inner_args))) givens.update(OrderedDict(zip(other_scan_args, other_inner_args)))
other_shared_scan_args = [arg.variable for arg other_shared_scan_args = [arg.variable for arg
in dummy_f.maker.expanded_inputs in dummy_f.maker.expanded_inputs
if (isinstance(arg.variable, SharedVariable) and if (isinstance(arg.variable, SharedVariable) and
...@@ -911,7 +912,7 @@ def scan(fn, ...@@ -911,7 +912,7 @@ def scan(fn,
in dummy_f.maker.expanded_inputs in dummy_f.maker.expanded_inputs
if (isinstance(arg.variable, SharedVariable) and if (isinstance(arg.variable, SharedVariable) and
not arg.update)] not arg.update)]
givens.update(dict(zip(other_shared_scan_args, givens.update(OrderedDict(zip(other_shared_scan_args,
other_shared_inner_args))) other_shared_inner_args)))
## ##
...@@ -943,7 +944,7 @@ def scan(fn, ...@@ -943,7 +944,7 @@ def scan(fn,
# replace w with w_copy, where w is CudaNdarray # replace w with w_copy, where w is CudaNdarray
# and w_copy is TensorType. This is caused because shared # and w_copy is TensorType. This is caused because shared
# variables are put on GPU right aways >:| , # variables are put on GPU right aways >:| ,
new_givens = {} new_givens = OrderedDict()
for w, w_copy in givens.iteritems(): for w, w_copy in givens.iteritems():
if (isinstance(w.type, cuda.CudaNdarrayType) if (isinstance(w.type, cuda.CudaNdarrayType)
...@@ -962,7 +963,7 @@ def scan(fn, ...@@ -962,7 +963,7 @@ def scan(fn,
## ##
tap_array = mit_sot_tap_array + [[-1] for x in xrange(n_sit_sot)] tap_array = mit_sot_tap_array + [[-1] for x in xrange(n_sit_sot)]
info = {} info = OrderedDict()
info['tap_array'] = tap_array info['tap_array'] = tap_array
info['n_seqs'] = n_seqs info['n_seqs'] = n_seqs
...@@ -976,7 +977,7 @@ def scan(fn, ...@@ -976,7 +977,7 @@ def scan(fn,
info['truncate_gradient'] = truncate_gradient info['truncate_gradient'] = truncate_gradient
info['name'] = name info['name'] = name
info['mode'] = mode info['mode'] = mode
info['destroy_map'] = {} info['destroy_map'] = OrderedDict()
info['gpu'] = False info['gpu'] = False
info['as_while'] = as_while info['as_while'] = as_while
info['profile'] = profile info['profile'] = profile
...@@ -1012,7 +1013,7 @@ def scan(fn, ...@@ -1012,7 +1013,7 @@ def scan(fn,
### and so on ... ### and so on ...
## ##
update_map = Updates() update_map = OrderedUpdates()
def remove_dimensions(outs, steps_return, offsets=None): def remove_dimensions(outs, steps_return, offsets=None):
out_ls = [] out_ls = []
......
...@@ -18,12 +18,13 @@ import logging ...@@ -18,12 +18,13 @@ import logging
from itertools import izip from itertools import izip
import numpy import numpy
import warnings
import theano import theano
from theano.compile.pfunc import rebuild_collect_shared from theano.compile.pfunc import rebuild_collect_shared
from theano import gof from theano import gof
from theano import tensor, scalar from theano import tensor, scalar
from theano.gof.python25 import all from theano.gof.python25 import all, OrderedDict
from theano.tensor.basic import get_constant_value from theano.tensor.basic import get_constant_value
...@@ -181,12 +182,17 @@ def clone(output, ...@@ -181,12 +182,17 @@ def clone(output,
def get_updates_and_outputs(ls): def get_updates_and_outputs(ls):
""" """
This function tries to recognize the updates dictionary, the This function tries to recognize the updates OrderedDict, the
list of outputs and the stopping condition returned by the list of outputs and the stopping condition returned by the
lambda expression and arrange them in a predefined order lambda expression and arrange them in a predefined order
WRITEME: what is the type of ls? how is it formatted?
if it's not in the predefined order already, how does
this function know how to put it in that order?
""" """
def is_outputs(elem): def is_outputs(elem):
if (isinstance(elem, (list, tuple)) and if (isinstance(elem, (list, tuple)) and
all([isinstance(x, theano.Variable) for x in elem])): all([isinstance(x, theano.Variable) for x in elem])):
...@@ -197,6 +203,11 @@ def get_updates_and_outputs(ls): ...@@ -197,6 +203,11 @@ def get_updates_and_outputs(ls):
def is_updates(elem): def is_updates(elem):
if isinstance(elem, dict): if isinstance(elem, dict):
# Make sure the updates will be applied in a deterministic order
if not isinstance(elem, gof.python25.OrderedDict):
warnings.warn("Expected OrderedDict or OrderedUpdates, got "\
+str(type(elem))+". This can make your script non-"
"deterministic.")
return True return True
# Dictionaries can be given as lists of tuples # Dictionaries can be given as lists of tuples
if (isinstance(elem, (list, tuple)) and if (isinstance(elem, (list, tuple)) and
...@@ -242,10 +253,11 @@ def get_updates_and_outputs(ls): ...@@ -242,10 +253,11 @@ def get_updates_and_outputs(ls):
'values, you can use `tensor.constant` to turn them into ' 'values, you can use `tensor.constant` to turn them into '
'Theano variables.') 'Theano variables.')
if is_outputs(ls): if is_outputs(ls):
return None, _list(ls), {} return None, _list(ls), OrderedDict()
if is_updates(ls): if is_updates(ls):
return None, [], dict(ls) return None, [], OrderedDict(ls)
error_msg = ('Scan cannot parse the return value of your lambda ' error_msg = ('Scan cannot parse the return value of your lambda '
'expression, which is: %s' % (ls,)) 'expression, which is: %s' % (ls,))
if not isinstance(ls, (list, tuple)): if not isinstance(ls, (list, tuple)):
...@@ -258,16 +270,16 @@ def get_updates_and_outputs(ls): ...@@ -258,16 +270,16 @@ def get_updates_and_outputs(ls):
if len(ls) == 2: if len(ls) == 2:
if is_outputs(ls[0]): if is_outputs(ls[0]):
if is_updates(ls[1]): if is_updates(ls[1]):
return (None, _list(ls[0]), dict(ls[1])) return (None, _list(ls[0]), OrderedDict(ls[1]))
elif is_condition(ls[1]): elif is_condition(ls[1]):
return (ls[1].condition, _list(ls[0]), {}) return (ls[1].condition, _list(ls[0]), OrderedDict())
else: else:
raise ValueError(error_msg) raise ValueError(error_msg)
elif is_updates(ls[0]): elif is_updates(ls[0]):
if is_outputs(ls[1]): if is_outputs(ls[1]):
raise ValueError(deprecation_msg) raise ValueError(deprecation_msg)
elif is_condition(ls[1]): elif is_condition(ls[1]):
return (ls[1].condition, [], dict(ls[0])) return (ls[1].condition, [], OrderedDict(ls[0]))
else: else:
raise ValueError(error_msg) raise ValueError(error_msg)
else: else:
...@@ -276,7 +288,7 @@ def get_updates_and_outputs(ls): ...@@ -276,7 +288,7 @@ def get_updates_and_outputs(ls):
if is_outputs(ls[0]): if is_outputs(ls[0]):
if is_updates(ls[1]): if is_updates(ls[1]):
if is_condition(ls[2]): if is_condition(ls[2]):
return (ls[2].condition, _list(ls[0]), dict(ls[1])) return (ls[2].condition, _list(ls[0]), OrderedDict(ls[1]))
else: else:
raise ValueError(error_msg) raise ValueError(error_msg)
else: else:
......
...@@ -16,6 +16,7 @@ from theano.compile.pfunc import rebuild_collect_shared ...@@ -16,6 +16,7 @@ from theano.compile.pfunc import rebuild_collect_shared
from theano.gof.python25 import any from theano.gof.python25 import any
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
import theano.scalar.sharedvar import theano.scalar.sharedvar
from theano.gof.python25 import OrderedDict
from numpy.testing.noseclasses import KnownFailureTest from numpy.testing.noseclasses import KnownFailureTest
...@@ -1009,7 +1010,7 @@ class T_Scan(unittest.TestCase): ...@@ -1009,7 +1010,7 @@ class T_Scan(unittest.TestCase):
x0 = theano.tensor.constant(x0) x0 = theano.tensor.constant(x0)
to_replace = outputs[0].owner.inputs[0].owner.inputs[1] to_replace = outputs[0].owner.inputs[0].owner.inputs[1]
outputs = theano.clone(outputs, outputs = theano.clone(outputs,
replace={to_replace: x0}) replace=[(to_replace, x0)])
mode = theano.compile.mode.get_mode(None).including('inplace') mode = theano.compile.mode.get_mode(None).including('inplace')
f9 = theano.function([], f9 = theano.function([],
outputs, outputs,
...@@ -1299,7 +1300,7 @@ class T_Scan(unittest.TestCase): ...@@ -1299,7 +1300,7 @@ class T_Scan(unittest.TestCase):
state = theano.shared(v_state, 'vstate') state = theano.shared(v_state, 'vstate')
def f_2(): def f_2():
return {state: 2 * state} return OrderedDict([(state, 2 * state)])
n_steps = theano.tensor.iscalar('nstep') n_steps = theano.tensor.iscalar('nstep')
output, updates = theano.scan(f_2, output, updates = theano.scan(f_2,
[], [],
...@@ -1829,7 +1830,7 @@ class T_Scan(unittest.TestCase): ...@@ -1829,7 +1830,7 @@ class T_Scan(unittest.TestCase):
X = theano.shared(numpy.array(1)) X = theano.shared(numpy.array(1))
out, updates = theano.scan( out, updates = theano.scan(
lambda: {X: X + 1}, lambda: OrderedDict([(X, (X + 1))]),
outputs_info=[], outputs_info=[],
non_sequences=[], non_sequences=[],
sequences=[], sequences=[],
...@@ -1844,7 +1845,7 @@ class T_Scan(unittest.TestCase): ...@@ -1844,7 +1845,7 @@ class T_Scan(unittest.TestCase):
y = theano.shared(numpy.array(1)) y = theano.shared(numpy.array(1))
out, updates = theano.scan( out, updates = theano.scan(
lambda: {x: x + 1, y: x}, lambda: OrderedDict([(x, x + 1), (y, x)]),
outputs_info=[], outputs_info=[],
non_sequences=[], non_sequences=[],
sequences=[], sequences=[],
...@@ -1880,11 +1881,11 @@ class T_Scan(unittest.TestCase): ...@@ -1880,11 +1881,11 @@ class T_Scan(unittest.TestCase):
b = theano.shared(numpy.random.rand(5, 4)) b = theano.shared(numpy.random.rand(5, 4))
def inner_func(a): def inner_func(a):
return a + 1, {b: 2 * b} return a + 1, OrderedDict([(b, 2 * b)])
out, updates = theano.scan( out, updates = theano.scan(
inner_func, inner_func,
outputs_info=[{'initial': init_a}], outputs_info=[OrderedDict([('initial', init_a)])],
n_steps=1) n_steps=1)
out = out[-1] out = out[-1]
assert out.type.ndim == a.type.ndim assert out.type.ndim == a.type.ndim
...@@ -1967,7 +1968,7 @@ class T_Scan(unittest.TestCase): ...@@ -1967,7 +1968,7 @@ class T_Scan(unittest.TestCase):
f1 = z * (x + y) ** 2 + 5 f1 = z * (x + y) ** 2 + 5
f2 = theano.clone(f1, f2 = theano.clone(f1,
replace={y: y2}, replace=OrderedDict([(y, y2)]),
strict=True, strict=True,
copy_inputs=True) copy_inputs=True)
f2_inp = theano.gof.graph.inputs([f2]) f2_inp = theano.gof.graph.inputs([f2])
...@@ -1986,7 +1987,7 @@ class T_Scan(unittest.TestCase): ...@@ -1986,7 +1987,7 @@ class T_Scan(unittest.TestCase):
f1 = z * (x + y) ** 2 + 5 f1 = z * (x + y) ** 2 + 5
f2 = theano.clone(f1, f2 = theano.clone(f1,
replace={y: y2}, replace=OrderedDict([(y, y2)]),
strict=False, strict=False,
copy_inputs=True) copy_inputs=True)
f2_inp = theano.gof.graph.inputs([f2]) f2_inp = theano.gof.graph.inputs([f2])
...@@ -2005,7 +2006,7 @@ class T_Scan(unittest.TestCase): ...@@ -2005,7 +2006,7 @@ class T_Scan(unittest.TestCase):
f1 = z * (x + y) ** 2 + 5 f1 = z * (x + y) ** 2 + 5
f2 = theano.clone(f1, f2 = theano.clone(f1,
replace={y: y2}, replace=[(y, y2)],
strict=True, strict=True,
copy_inputs=False) copy_inputs=False)
f2_inp = theano.gof.graph.inputs([f2]) f2_inp = theano.gof.graph.inputs([f2])
...@@ -2024,7 +2025,7 @@ class T_Scan(unittest.TestCase): ...@@ -2024,7 +2025,7 @@ class T_Scan(unittest.TestCase):
f1 = z * (x + y) ** 2 + 5 f1 = z * (x + y) ** 2 + 5
f2 = theano.clone(f1, f2 = theano.clone(f1,
replace={y: y2}, replace=[(y, y2)],
strict=False, strict=False,
copy_inputs=False) copy_inputs=False)
f2_inp = theano.gof.graph.inputs([f2]) f2_inp = theano.gof.graph.inputs([f2])
...@@ -2204,15 +2205,15 @@ class T_Scan(unittest.TestCase): ...@@ -2204,15 +2205,15 @@ class T_Scan(unittest.TestCase):
v2 = theano.shared(numpy.ones((5, 5), dtype=theano.config.floatX)) v2 = theano.shared(numpy.ones((5, 5), dtype=theano.config.floatX))
shapef = theano.function([W], shapef = theano.function([W],
expr, expr,
givens={initial: v1, givens=OrderedDict([(initial, v1),
inpt: v2}) (inpt, v2)]))
# First execution to cache n_steps # First execution to cache n_steps
shapef(numpy.ones((5, 5), dtype=theano.config.floatX)) shapef(numpy.ones((5, 5), dtype=theano.config.floatX))
cost = expr.sum() cost = expr.sum()
d_cost_wrt_W = tensor.grad(cost, [W]) d_cost_wrt_W = tensor.grad(cost, [W])
f = theano.function([W, inpt], d_cost_wrt_W, f = theano.function([W, inpt], d_cost_wrt_W,
givens={initial: theano.shared(numpy.zeros(5))}) givens=OrderedDict([(initial, theano.shared(numpy.zeros(5)))]))
rval = numpy.asarray([[5187989] * 5] * 5, dtype=theano.config.floatX) rval = numpy.asarray([[5187989] * 5] * 5, dtype=theano.config.floatX)
arg1 = numpy.ones((5, 5), dtype=theano.config.floatX) arg1 = numpy.ones((5, 5), dtype=theano.config.floatX)
...@@ -3166,7 +3167,7 @@ class T_Scan(unittest.TestCase): ...@@ -3166,7 +3167,7 @@ class T_Scan(unittest.TestCase):
shared_var = theano.shared(numpy.float32(1.)) shared_var = theano.shared(numpy.float32(1.))
def inner_fn(): def inner_fn():
return [], {shared_var: shared_var + numpy.float32(1.)} return [], OrderedDict([(shared_var, shared_var + numpy.float32(1.))])
_, updates = theano.scan(inner_fn, _, updates = theano.scan(inner_fn,
n_steps=10, n_steps=10,
truncate_gradient=-1, truncate_gradient=-1,
...@@ -3239,7 +3240,7 @@ class T_Scan(unittest.TestCase): ...@@ -3239,7 +3240,7 @@ class T_Scan(unittest.TestCase):
seq = tensor.matrix() seq = tensor.matrix()
initial_value = theano.shared(numpy.zeros((4, 1), initial_value = theano.shared(numpy.zeros((4, 1),
dtype=theano.config.floatX)) dtype=theano.config.floatX))
outputs_info = [{'initial': initial_value, 'taps': [-4]}, None] outputs_info = [OrderedDict([('initial', initial_value), ('taps', [-4])]), None]
results, updates = theano.scan(fn=onestep, results, updates = theano.scan(fn=onestep,
sequences=seq, sequences=seq,
outputs_info=outputs_info) outputs_info=outputs_info)
...@@ -3259,13 +3260,13 @@ class T_Scan(unittest.TestCase): ...@@ -3259,13 +3260,13 @@ class T_Scan(unittest.TestCase):
seq = tensor.matrix() seq = tensor.matrix()
initial_value = theano.shared(numpy.zeros((4, 1), initial_value = theano.shared(numpy.zeros((4, 1),
dtype=theano.config.floatX)) dtype=theano.config.floatX))
outputs_info = [{'initial': initial_value, 'taps': [-4]}, None] outputs_info = [OrderedDict([('initial', initial_value), ('taps', [-4])]), None]
results, _ = theano.scan(fn=onestep, results, _ = theano.scan(fn=onestep,
sequences=seq, sequences=seq,
outputs_info=outputs_info) outputs_info=outputs_info)
sharedvar = theano.shared(numpy.zeros((1, 1), sharedvar = theano.shared(numpy.zeros((1, 1),
dtype=theano.config.floatX)) dtype=theano.config.floatX))
updates = {sharedvar: results[0][-1:]} updates = OrderedDict([(sharedvar, results[0][-1:])])
f = theano.function([seq], results[1], updates=updates) f = theano.function([seq], results[1], updates=updates)
assert numpy.all(exp_out == f(inp)) assert numpy.all(exp_out == f(inp))
...@@ -3354,9 +3355,9 @@ def test_speed(): ...@@ -3354,9 +3355,9 @@ def test_speed():
theano.printing.debugprint(s_rinc) theano.printing.debugprint(s_rinc)
f = theano.function([], f = theano.function([],
[], [],
updates={ updates=OrderedDict([
s_i: s_i + 1, (s_i, s_i + 1),
shared_r: s_rinc}, (shared_r, s_rinc)]),
mode=theano.Mode(linker='cvm')) mode=theano.Mode(linker='cvm'))
f._check_for_aliased_inputs = False f._check_for_aliased_inputs = False
t2 = time.time() t2 = time.time()
...@@ -3430,9 +3431,9 @@ def test_speed_rnn(): ...@@ -3430,9 +3431,9 @@ def test_speed_rnn():
w)), w)),
tolerate_inplace_aliasing=True) tolerate_inplace_aliasing=True)
f = theano.function([], [], f = theano.function([], [],
updates={ updates=OrderedDict([
s_i: s_i + 1, (s_i, s_i + 1),
shared_r: s_rinc}, (shared_r, s_rinc)]),
mode=theano.Mode(linker='cvm')) mode=theano.Mode(linker='cvm'))
#theano.printing.debugprint(f) #theano.printing.debugprint(f)
f_fn = f.fn f_fn = f.fn
...@@ -3495,9 +3496,9 @@ def test_speed_batchrnn(): ...@@ -3495,9 +3496,9 @@ def test_speed_batchrnn():
tolerate_inplace_aliasing=True) tolerate_inplace_aliasing=True)
f = theano.function([], f = theano.function([],
[], [],
updates={ updates=[
s_i: s_i + 1, (s_i, s_i + 1),
shared_r: s_rinc}, (shared_r, s_rinc)],
mode=theano.Mode(linker='cvm')) mode=theano.Mode(linker='cvm'))
#theano.printing.debugprint(f) #theano.printing.debugprint(f)
f_fn = f.fn f_fn = f.fn
......
...@@ -1219,7 +1219,7 @@ class UsmmTests(unittest.TestCase): ...@@ -1219,7 +1219,7 @@ class UsmmTests(unittest.TestCase):
mode = theano.compile.mode.get_default_mode().excluding('fusion') mode = theano.compile.mode.get_default_mode().excluding('fusion')
if inplace: if inplace:
updates = {z: z - a * theano.sparse.dot(x, y)} updates = [(z, z - a * theano.sparse.dot(x, y))]
f_a = theano.function([a, x, y], [], f_a = theano.function([a, x, y], [],
updates=updates, updates=updates,
mode=mode) mode=mode)
......
...@@ -27,6 +27,7 @@ from theano.tensor.utils import hash_from_ndarray ...@@ -27,6 +27,7 @@ from theano.tensor.utils import hash_from_ndarray
from theano.scalar import ComplexError, IntegerDivisionError from theano.scalar import ComplexError, IntegerDivisionError
import theano.scalar.sharedvar import theano.scalar.sharedvar
from theano.gradient import grad_undefined from theano.gradient import grad_undefined
from theano.gradient import grad_not_implemented
from theano.gradient import DisconnectedType from theano.gradient import DisconnectedType
### set up the external interface ### set up the external interface
...@@ -1639,6 +1640,9 @@ class _tensor_py_operators: ...@@ -1639,6 +1640,9 @@ class _tensor_py_operators:
def ravel(self): def ravel(self):
return flatten(self) return flatten(self)
def diagonal(self, offset=0, axis1=0, axis2=1):
return diagonal(self, offset, axis1, axis2)
# CASTING # CASTING
def astype(self, dtype): def astype(self, dtype):
return cast(self, dtype) return cast(self, dtype)
...@@ -1796,6 +1800,8 @@ class _tensor_py_operators: ...@@ -1796,6 +1800,8 @@ class _tensor_py_operators:
"""See `theano.tensor.conj`""" """See `theano.tensor.conj`"""
return conj(self) return conj(self)
conjugate = conj
def repeat(self, repeats, axis=None): def repeat(self, repeats, axis=None):
"""See `theano.tensor.repeat`""" """See `theano.tensor.repeat`"""
from theano.tensor.extra_ops import repeat from theano.tensor.extra_ops import repeat
...@@ -7310,3 +7316,96 @@ def all(x, axis=None, keepdims=False): ...@@ -7310,3 +7316,96 @@ def all(x, axis=None, keepdims=False):
if keepdims: if keepdims:
out = makeKeepDims(x, out, axis) out = makeKeepDims(x, out, axis)
return out return out
class Diagonal(Op):
"""Return specified diagonals.
:param x: A tensor variable with x.ndim >= 2.
:return: A vector representing the diagonal elements.
"""
def __init__(self, offset=0, axis1=0, axis2=1):
self.offset = offset
self.axis1 = axis1
self.axis2 = axis2
def __eq__(self, other):
return (type(self) == type(other))
def __hash__(self):
return hash(type(self))
def make_node(self, x):
x = as_tensor_variable(x)
assert x.ndim >= 2
return Apply(self, [x], [tensor(dtype=x.dtype,
broadcastable=[False] * (x.ndim -1))])
def perform(self, node, (x,), (z,)):
z[0] = x.diagonal(self.offset, self.axis1, self.axis2)
def grad(self, (x,), (gz,)):
return [grad_not_implemented(self, 0, x)]
def infer_shape(self, node, shapes):
in_shape, = shapes
dim1 = in_shape[self.axis1]
dim2 = in_shape[self.axis2]
out_shape = [d for i,d in enumerate(in_shape)
if i not in (self.axis1, self.axis2)]
# The following logic is inspired by C code of PyArray_Diagonal().
offset = self.offset
if offset > 0:
diag_size = clip(dim2 - offset, 0, dim1)
elif offset < 0:
diag_size = clip(dim1 + offset, 0, dim2)
else:
diag_size = minimum(dim1, dim2)
out_shape.append(diag_size)
return [tuple(out_shape)]
def __str__(self):
return self.__class__.__name__
def diagonal(a, offset=0, axis1=0, axis2=1):
if (offset, axis1, axis2) == (0, 0, 1):
from theano.sandbox.linalg import extract_diag
return extract_diag(a)
return Diagonal(offset, axis1, axis2)(a)
class Diag(Op):
def __eq__(self, other):
return type(self) == type(other)
def __hash__(self):
return hash(type(self))
def make_node(self, diag):
diag = as_tensor_variable(diag)
if diag.type.ndim != 1:
raise TypeError('data argument must be a vector', diag.type)
return Apply(self, [diag], [matrix(dtype=diag.dtype)])
def perform(self, node, inputs, (z,)):
z[0] = numpy.diag(inputs[0])
def grad(self, inputs, (gz,)):
return [diagonal(gz)]
def infer_shape(self, nodes, shapes):
return [(shapes[0][0],) * 2]
def __str__(self):
return self.__class__.__name__
def diag(v, k=0):
if v.ndim == 1:
assert k == 0, "diagonals other than main are not implemented"
return Diag()(v)
elif v.ndim == 2:
return diagonal(v, k)
else:
raise ValueError("Input must be 1- or 2-d.")
...@@ -9,7 +9,7 @@ import numpy ...@@ -9,7 +9,7 @@ import numpy
import theano import theano
import theano.tensor as T import theano.tensor as T
from theano.gof.python25 import any from theano.gof.python25 import any, OrderedDict
def gen_data(): def gen_data():
...@@ -293,7 +293,7 @@ def test_mlp(): ...@@ -293,7 +293,7 @@ def test_mlp():
# TODO: refine that and include only those # TODO: refine that and include only those
mode = theano.compile.get_default_mode().including('fast_run') mode = theano.compile.get_default_mode().including('fast_run')
updates2 = {} updates2 = OrderedDict()
updates2[classifier.hiddenLayer.params[0]]=T.grad(cost,classifier.hiddenLayer.params[0]) updates2[classifier.hiddenLayer.params[0]]=T.grad(cost,classifier.hiddenLayer.params[0])
train_model =theano.function( inputs = [index], train_model =theano.function( inputs = [index],
......
...@@ -40,7 +40,7 @@ from theano.tensor import (_shared, wvector, bvector, autocast_float_as, ...@@ -40,7 +40,7 @@ from theano.tensor import (_shared, wvector, bvector, autocast_float_as,
tile, patternbroadcast, Eye, Shape, Default, Dot, PermuteRowElements, tile, patternbroadcast, Eye, Shape, Default, Dot, PermuteRowElements,
ScalarFromTensor, TensorFromScalar, dtensor4, Rebroadcast, Alloc, ScalarFromTensor, TensorFromScalar, dtensor4, Rebroadcast, Alloc,
dtensor3, SpecifyShape, Mean, IncSubtensor, AdvancedIncSubtensor1, dtensor3, SpecifyShape, Mean, IncSubtensor, AdvancedIncSubtensor1,
itensor3, Tile, AdvancedIncSubtensor, switch) itensor3, Tile, AdvancedIncSubtensor, switch, Diagonal, Diag)
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
from theano.printing import debugprint from theano.printing import debugprint
...@@ -6590,6 +6590,34 @@ class TestInferShape(utt.InferShapeTester): ...@@ -6590,6 +6590,34 @@ class TestInferShape(utt.InferShapeTester):
[Eye()(aiscal, biscal, ciscal)], [Eye()(aiscal, biscal, ciscal)],
[3, 5, 0], Eye) [3, 5, 0], Eye)
# Diagonal
atens3 = tensor3()
atens3_val = rand(4, 5, 3)
atens3_diag = Diagonal()(atens3)
self._compile_and_check([atens3], [atens3_diag],
[atens3_val], Diagonal)
atens3_diag = Diagonal(1)(atens3)
self._compile_and_check([atens3], [atens3_diag],
[atens3_val], Diagonal)
atens3_diag = Diagonal(-1)(atens3)
self._compile_and_check([atens3], [atens3_diag],
[atens3_val], Diagonal)
atens3_diag = Diagonal(1,0,2)(atens3)
self._compile_and_check([atens3], [atens3_diag],
[atens3_val], Diagonal)
atens3_diag = Diagonal(1,1,2)(atens3)
self._compile_and_check([atens3], [atens3_diag],
[atens3_val], Diagonal)
atens3_diag = Diagonal(1,2,0)(atens3)
self._compile_and_check([atens3], [atens3_diag],
[atens3_val], Diagonal)
# Diag
advec = dvector()
advec_val = rand(4)
self._compile_and_check([advec], [Diag()(advec)],
[advec_val], Diag)
# Shape # Shape
# 'opt.Makevector' precludes optimizer from disentangling # 'opt.Makevector' precludes optimizer from disentangling
# elements of shape # elements of shape
...@@ -7070,7 +7098,7 @@ class TestTensorInstanceMethods(unittest.TestCase): ...@@ -7070,7 +7098,7 @@ class TestTensorInstanceMethods(unittest.TestCase):
assert_array_equal(X.argsort().eval({X: x}), x.argsort()) assert_array_equal(X.argsort().eval({X: x}), x.argsort())
assert_array_equal(X.argsort(1).eval({X: x}), x.argsort(1)) assert_array_equal(X.argsort(1).eval({X: x}), x.argsort(1))
def test_dot(self): def test_clip(self):
X, Y = self.vars X, Y = self.vars
x, y = self.vals x, y = self.vals
Z = X.clip(0.5 - Y, 0.5 + Y) Z = X.clip(0.5 - Y, 0.5 + Y)
...@@ -7099,6 +7127,7 @@ class TestTensorInstanceMethods(unittest.TestCase): ...@@ -7099,6 +7127,7 @@ class TestTensorInstanceMethods(unittest.TestCase):
Z = X + Y * 1j Z = X + Y * 1j
z = x + y * 1j z = x + y * 1j
assert_array_equal(Z.conj().eval({Z: z}), z.conj()) assert_array_equal(Z.conj().eval({Z: z}), z.conj())
assert_array_equal(Z.conjugate().eval({Z: z}), z.conj())
def test_round(self): def test_round(self):
X, _ = self.vars X, _ = self.vars
...@@ -7128,6 +7157,16 @@ class TestTensorInstanceMethods(unittest.TestCase): ...@@ -7128,6 +7157,16 @@ class TestTensorInstanceMethods(unittest.TestCase):
x, _ = self.vals x, _ = self.vals
assert_array_equal(X.ravel().eval({X: x}), x.ravel()) assert_array_equal(X.ravel().eval({X: x}), x.ravel())
def test_diagonal(self):
X, _ = self.vars
x, _ = self.vals
assert_array_equal(X.diagonal().eval({X: x}), x.diagonal())
assert_array_equal(X.diagonal(1).eval({X: x}), x.diagonal(1))
assert_array_equal(X.diagonal(-1).eval({X: x}), x.diagonal(-1))
for offset, axis1, axis2 in [(1,0,1), (-1,0,1), (0,1,0), (-2,1,0)]:
assert_array_equal(X.diagonal(offset, axis1, axis2).eval({X: x}),
x.diagonal(offset, axis1, axis2))
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -185,8 +185,8 @@ class t_gemm(TestCase): ...@@ -185,8 +185,8 @@ class t_gemm(TestCase):
l2_reg = T.constant(0.0001).astype(config.floatX) l2_reg = T.constant(0.0001).astype(config.floatX)
#test constant merge with gemm #test constant merge with gemm
f = theano.function([a, b], updates={s: lr1 * T.dot(a, b) + f = theano.function([a, b], updates=[(s, lr1 * T.dot(a, b) +
l2_reg * lr2 * s}, l2_reg * lr2 * s)],
mode=mode_not_fast_compile).maker.fgraph.toposort() mode=mode_not_fast_compile).maker.fgraph.toposort()
#[Gemm{inplace}(<TensorType(float64, matrix)>, 0.01, #[Gemm{inplace}(<TensorType(float64, matrix)>, 0.01,
# <TensorType(float64, matrix)>, <TensorType(float64, matrix)>, # <TensorType(float64, matrix)>, <TensorType(float64, matrix)>,
...@@ -195,8 +195,8 @@ class t_gemm(TestCase): ...@@ -195,8 +195,8 @@ class t_gemm(TestCase):
assert f[0].op == gemm_inplace assert f[0].op == gemm_inplace
#test factored scalar with merge #test factored scalar with merge
f = theano.function([a, b], updates={s: lr1 * (T.dot(a, b) - f = theano.function([a, b], updates=[(s, lr1 * (T.dot(a, b) -
l2_reg * s)}, l2_reg * s))],
mode=mode_not_fast_compile).maker.fgraph.toposort() mode=mode_not_fast_compile).maker.fgraph.toposort()
#[Gemm{inplace}(<TensorType(float64, matrix)>, 0.01, #[Gemm{inplace}(<TensorType(float64, matrix)>, 0.01,
# <TensorType(float64, matrix)>, <TensorType(float64, matrix)>, # <TensorType(float64, matrix)>, <TensorType(float64, matrix)>,
...@@ -206,7 +206,7 @@ class t_gemm(TestCase): ...@@ -206,7 +206,7 @@ class t_gemm(TestCase):
#test factored scalar with merge and neg #test factored scalar with merge and neg
f = theano.function([a, b], f = theano.function([a, b],
updates={s: s - lr1 * (s * .0002 + T.dot(a, b))}, updates=[(s, s - lr1 * (s * .0002 + T.dot(a, b)))],
mode=mode_not_fast_compile).maker.fgraph.toposort() mode=mode_not_fast_compile).maker.fgraph.toposort()
#[Gemm{inplace}(<TensorType(float64, matrix)>, -0.01, #[Gemm{inplace}(<TensorType(float64, matrix)>, -0.01,
# <TensorType(float64, matrix)>, <TensorType(float64, matrix)>, # <TensorType(float64, matrix)>, <TensorType(float64, matrix)>,
...@@ -368,7 +368,7 @@ class t_gemm(TestCase): ...@@ -368,7 +368,7 @@ class t_gemm(TestCase):
tz_i = gemm_no_inplace(tz[:, :, i], ta, tx[ tz_i = gemm_no_inplace(tz[:, :, i], ta, tx[
:, :, i], ty[:, :, i], tb) :, :, i], ty[:, :, i], tb)
g_i = theano.function([], tz_i, g_i = theano.function([], tz_i,
updates={tz: T.set_subtensor(tz[:, :, i], tz_i)}, updates=[(tz, T.set_subtensor(tz[:, :, i], tz_i))],
mode=compile.Mode(optimizer=None, linker=l)) mode=compile.Mode(optimizer=None, linker=l))
for j in xrange(3): for j in xrange(3):
g_i() g_i()
...@@ -801,7 +801,7 @@ def test_gemm_unrolled(): ...@@ -801,7 +801,7 @@ def test_gemm_unrolled():
cur_V = update_V(cur_H) cur_V = update_V(cur_H)
cur_H = update_H(cur_V) cur_H = update_H(cur_V)
unrolled_theano = theano.function([], updates={V: cur_V, H: cur_H}, unrolled_theano = theano.function([], updates=[(V, cur_V), (H, cur_H)],
name='unrolled_theano') name='unrolled_theano')
nb_dot = sum([1 for node in unrolled_theano.maker.fgraph.toposort() nb_dot = sum([1 for node in unrolled_theano.maker.fgraph.toposort()
if isinstance(node.op, (theano.tensor.Dot, if isinstance(node.op, (theano.tensor.Dot,
...@@ -1032,7 +1032,7 @@ def test_dot_w_self(): ...@@ -1032,7 +1032,7 @@ def test_dot_w_self():
p = T.dot(A, A) * B p = T.dot(A, A) * B
grad = T.grad(T.mean(p), A) grad = T.grad(T.mean(p), A)
f = theano.function([B], p, updates={A: A - grad}) f = theano.function([B], p, updates=[(A, A - grad)])
# tests correctness in debugmode # tests correctness in debugmode
f(numpy.asarray([[0, 1], [2, 3]], dtype=config.floatX)) f(numpy.asarray([[0, 1], [2, 3]], dtype=config.floatX))
...@@ -1119,7 +1119,7 @@ class TestGemv(TestCase, unittest_tools.TestOptimizationMixin): ...@@ -1119,7 +1119,7 @@ class TestGemv(TestCase, unittest_tools.TestOptimizationMixin):
assert topo[0].op.inplace == False assert topo[0].op.inplace == False
#test the inplace version #test the inplace version
g = theano.function([], [], updates={v2: v2 + theano.dot(m, v1)}, g = theano.function([], [], updates=[(v2, v2 + theano.dot(m, v1))],
mode=mode_blas_opt) mode=mode_blas_opt)
# Assert they produce the same output # Assert they produce the same output
...@@ -1169,7 +1169,7 @@ class TestGemv(TestCase, unittest_tools.TestOptimizationMixin): ...@@ -1169,7 +1169,7 @@ class TestGemv(TestCase, unittest_tools.TestOptimizationMixin):
assert topo[-1].op.inplace == False assert topo[-1].op.inplace == False
#test the inplace version #test the inplace version
g = theano.function([], [], updates={v2: v2 + theano.dot(v1, m)}, g = theano.function([], [], updates=[(v2, v2 + theano.dot(v1, m))],
mode=mode_blas_opt) mode=mode_blas_opt)
# Assert they produce the same output # Assert they produce the same output
...@@ -1575,7 +1575,7 @@ class TestGer(TestCase, unittest_tools.TestOptimizationMixin): ...@@ -1575,7 +1575,7 @@ class TestGer(TestCase, unittest_tools.TestOptimizationMixin):
def function(self, inputs, outputs, updates=None): def function(self, inputs, outputs, updates=None):
if updates is None: if updates is None:
updates = {} updates = []
return theano.function(inputs, outputs, self.mode, updates=updates) return theano.function(inputs, outputs, self.mode, updates=updates)
def b(self, bval): def b(self, bval):
...@@ -1691,8 +1691,8 @@ class TestGer(TestCase, unittest_tools.TestOptimizationMixin): ...@@ -1691,8 +1691,8 @@ class TestGer(TestCase, unittest_tools.TestOptimizationMixin):
def test_inplace(self): def test_inplace(self):
A = self.shared(numpy.random.rand(4, 5).astype(self.dtype)) A = self.shared(numpy.random.rand(4, 5).astype(self.dtype))
f = self.function([self.x, self.y], [], f = self.function([self.x, self.y], [],
updates={A: A + T.constant(0.1, dtype=self.dtype) * updates=[(A, A + T.constant(0.1, dtype=self.dtype) *
T.outer(self.x, self.y)}) T.outer(self.x, self.y))])
self.assertFunctionContains(f, self.ger_destructive) self.assertFunctionContains(f, self.ger_destructive)
f(numpy.random.rand(4).astype(self.dtype), f(numpy.random.rand(4).astype(self.dtype),
numpy.random.rand(5).astype(self.dtype)) numpy.random.rand(5).astype(self.dtype))
...@@ -1731,15 +1731,15 @@ class TestBlasStrides(TestCase): ...@@ -1731,15 +1731,15 @@ class TestBlasStrides(TestCase):
bt_dev = b_t.get_value(borrow=False, return_internal_type=True) bt_dev = b_t.get_value(borrow=False, return_internal_type=True)
ct_dev = c_t.get_value(borrow=False, return_internal_type=True) ct_dev = c_t.get_value(borrow=False, return_internal_type=True)
f_nn = theano.function([], [], updates={a: tensor.dot(b, c)}, f_nn = theano.function([], [], updates=[(a, tensor.dot(b, c))],
mode=self.mode) mode=self.mode)
#print 'class name:', self.__class__.__name__ #print 'class name:', self.__class__.__name__
#theano.printing.debugprint(f_nn) #theano.printing.debugprint(f_nn)
f_nt = theano.function([], [], updates={a: tensor.dot(b, c_t.T)}, f_nt = theano.function([], [], updates=[(a, tensor.dot(b, c_t.T))],
mode=self.mode) mode=self.mode)
f_tn = theano.function([], [], updates={a: tensor.dot(b_t.T, c)}, f_tn = theano.function([], [], updates=[(a, tensor.dot(b_t.T, c))],
mode=self.mode) mode=self.mode)
f_tt = theano.function([], [], updates={a: tensor.dot(b_t.T, c_t.T)}, f_tt = theano.function([], [], updates=[(a, tensor.dot(b_t.T, c_t.T))],
mode=self.mode) mode=self.mode)
# Try with all stride patterns, and all transposed pattern # Try with all stride patterns, and all transposed pattern
...@@ -1802,14 +1802,14 @@ class TestBlasStrides(TestCase): ...@@ -1802,14 +1802,14 @@ class TestBlasStrides(TestCase):
bt_dev = b_t.get_value(borrow=False, return_internal_type=True) bt_dev = b_t.get_value(borrow=False, return_internal_type=True)
ct_dev = c_t.get_value(borrow=False, return_internal_type=True) ct_dev = c_t.get_value(borrow=False, return_internal_type=True)
f_nn = theano.function([], [], updates={a: l * tensor.dot(b, c)}, f_nn = theano.function([], [], updates=[(a, l * tensor.dot(b, c))],
mode=self.mode) mode=self.mode)
f_nt = theano.function([], [], updates={a: l * tensor.dot(b, c_t.T)}, f_nt = theano.function([], [], updates=[(a, l * tensor.dot(b, c_t.T))],
mode=self.mode) mode=self.mode)
f_tn = theano.function([], [], updates={a: l * tensor.dot(b_t.T, c)}, f_tn = theano.function([], [], updates=[(a, l * tensor.dot(b_t.T, c))],
mode=self.mode) mode=self.mode)
f_tt = theano.function([], [], f_tt = theano.function([], [],
updates={a: l * tensor.dot(b_t.T, c_t.T)}, updates=[(a, l * tensor.dot(b_t.T, c_t.T))],
mode=self.mode) mode=self.mode)
# Try with all stride patterns, and all transposed pattern # Try with all stride patterns, and all transposed pattern
...@@ -1875,28 +1875,28 @@ class TestBlasStrides(TestCase): ...@@ -1875,28 +1875,28 @@ class TestBlasStrides(TestCase):
ct_dev = c_t.get_value(borrow=False, return_internal_type=True) ct_dev = c_t.get_value(borrow=False, return_internal_type=True)
f_nnn = theano.function([], [], f_nnn = theano.function([], [],
updates={a: (l * a + tensor.dot(b, c))}, updates=[(a, (l * a + tensor.dot(b, c)))],
mode=self.mode) mode=self.mode)
f_nnt = theano.function([], [], f_nnt = theano.function([], [],
updates={a: (l * a + tensor.dot(b, c_t.T))}, updates=[(a, (l * a + tensor.dot(b, c_t.T)))],
mode=self.mode) mode=self.mode)
f_ntn = theano.function([], [], f_ntn = theano.function([], [],
updates={a: (l * a + tensor.dot(b_t.T, c))}, updates=[(a, (l * a + tensor.dot(b_t.T, c)))],
mode=self.mode) mode=self.mode)
f_ntt = theano.function([], [], f_ntt = theano.function([], [],
updates={a: (l * a + tensor.dot(b_t.T, c_t.T))}, updates=[(a, (l * a + tensor.dot(b_t.T, c_t.T)))],
mode=self.mode) mode=self.mode)
f_tnn = theano.function([], [], f_tnn = theano.function([], [],
updates={a_t: (l * a_t + tensor.dot(b, c).T)}, updates=[(a_t, (l * a_t + tensor.dot(b, c).T))],
mode=self.mode) mode=self.mode)
f_tnt = theano.function([], [], f_tnt = theano.function([], [],
updates={a_t: (l * a_t + tensor.dot(b, c_t.T).T)}, updates=[(a_t, (l * a_t + tensor.dot(b, c_t.T).T))],
mode=self.mode) mode=self.mode)
f_ttn = theano.function([], [], f_ttn = theano.function([], [],
updates={a_t: (l * a_t + tensor.dot(b_t.T, c).T)}, updates=[(a_t, (l * a_t + tensor.dot(b_t.T, c).T))],
mode=self.mode) mode=self.mode)
f_ttt = theano.function([], [], f_ttt = theano.function([], [],
updates={a_t: (l * a_t + tensor.dot(b_t.T, c_t.T).T)}, updates=[(a_t, (l * a_t + tensor.dot(b_t.T, c_t.T).T))],
mode=self.mode) mode=self.mode)
# Try with all stride patterns, and all transposed pattern # Try with all stride patterns, and all transposed pattern
...@@ -1985,11 +1985,11 @@ class TestBlasStrides(TestCase): ...@@ -1985,11 +1985,11 @@ class TestBlasStrides(TestCase):
b_dev = b.get_value(borrow=False, return_internal_type=True) b_dev = b.get_value(borrow=False, return_internal_type=True)
c_dev = c.get_value(borrow=False, return_internal_type=True) c_dev = c.get_value(borrow=False, return_internal_type=True)
f_n = theano.function([], [], updates={a: (a + l * tensor.dot(b, c))}, f_n = theano.function([], [], updates=[(a, (a + l * tensor.dot(b, c)))],
mode=self.mode) mode=self.mode)
f_t = theano.function([], [], f_t = theano.function([], [],
updates={a: (a + l * tensor.dot(b_t.T, c))}, updates=[(a, (a + l * tensor.dot(b_t.T, c)))],
mode=self.mode) mode=self.mode)
# Try with all stride patterns, and all transposed pattern # Try with all stride patterns, and all transposed pattern
...@@ -2041,11 +2041,11 @@ class TestBlasStrides(TestCase): ...@@ -2041,11 +2041,11 @@ class TestBlasStrides(TestCase):
c_dev = c.get_value(borrow=False, return_internal_type=True) c_dev = c.get_value(borrow=False, return_internal_type=True)
f_n = theano.function([], [], f_n = theano.function([], [],
updates={a: (a + l * tensor.outer(b, c))}, updates=[(a, (a + l * tensor.outer(b, c)))],
mode=self.mode) mode=self.mode)
f_t = theano.function([], [], f_t = theano.function([], [],
updates={a_t: (a_t + l * tensor.outer(b, c).T)}, updates=[(a_t, (a_t + l * tensor.outer(b, c).T))],
mode=self.mode) mode=self.mode)
# Try with all stride patterns, and all transposed patterns # Try with all stride patterns, and all transposed patterns
......
...@@ -185,7 +185,7 @@ class TestCGemv(TestCase, TestOptimizationMixin): ...@@ -185,7 +185,7 @@ class TestCGemv(TestCase, TestOptimizationMixin):
#test the inplace version #test the inplace version
g = theano.function([], [], g = theano.function([], [],
updates={v2: v2 + theano.dot(m, v1)}, updates=[(v2, v2 + theano.dot(m, v1))],
mode=self.mode) mode=self.mode)
# Assert they produce the same output # Assert they produce the same output
......
...@@ -526,8 +526,8 @@ def makeSharedTester(shared_constructor_, ...@@ -526,8 +526,8 @@ def makeSharedTester(shared_constructor_,
s = self.cast_value(s) s = self.cast_value(s)
s_shared = self.shared_constructor(s) s_shared = self.shared_constructor(s)
f = theano.function([], f = theano.function([],
updates={s_shared:theano.dot(a_shared,b_shared) updates=[(s_shared, theano.dot(a_shared,b_shared)
+s_shared}) +s_shared)])
topo=f.maker.fgraph.toposort() topo=f.maker.fgraph.toposort()
f() f()
#[Gemm{inplace}(<TensorType(float64, matrix)>, 0.01, <TensorType(float64, matrix)>, <TensorType(float64, matrix)>, 2e-06)] #[Gemm{inplace}(<TensorType(float64, matrix)>, 0.01, <TensorType(float64, matrix)>, <TensorType(float64, matrix)>, 2e-06)]
...@@ -541,8 +541,8 @@ def makeSharedTester(shared_constructor_, ...@@ -541,8 +541,8 @@ def makeSharedTester(shared_constructor_,
#now test with the specify shape op in the output #now test with the specify shape op in the output
f = theano.function([], s_shared.shape, f = theano.function([], s_shared.shape,
updates={s_shared:theano.dot(a_shared,b_shared) updates=[(s_shared, theano.dot(a_shared,b_shared)
+s_shared_specify}) +s_shared_specify)])
topo=f.maker.fgraph.toposort() topo=f.maker.fgraph.toposort()
shp=f() shp=f()
assert numpy.all(shp == (40,40)) assert numpy.all(shp == (40,40))
...@@ -557,8 +557,8 @@ def makeSharedTester(shared_constructor_, ...@@ -557,8 +557,8 @@ def makeSharedTester(shared_constructor_,
b_shared.get_value(borrow=True).shape) b_shared.get_value(borrow=True).shape)
f = theano.function([], s_shared.shape, f = theano.function([], s_shared.shape,
updates={s_shared:theano.dot(a_shared,b_shared) updates=[(s_shared, theano.dot(a_shared,b_shared)
+s_shared_specify}) +s_shared_specify)])
topo=f.maker.fgraph.toposort() topo=f.maker.fgraph.toposort()
shp=f() shp=f()
assert numpy.all(shp == (40,40)) assert numpy.all(shp == (40,40))
......
...@@ -55,7 +55,7 @@ class T_scipy(unittest.TestCase): ...@@ -55,7 +55,7 @@ class T_scipy(unittest.TestCase):
train = function( train = function(
inputs=[x,y], inputs=[x,y],
outputs=[prediction, xent], outputs=[prediction, xent],
updates={w:w-0.1*gw, b:b-0.1*gb}) updates=[(w, w-0.1*gw), (b, b-0.1*gb)])
predict = function(inputs=[x], outputs=prediction) predict = function(inputs=[x], outputs=prediction)
N = 4 N = 4
......
import unittest import unittest
import theano import theano
from theano.updates import Updates from theano.updates import OrderedUpdates
import theano.tensor as T import theano.tensor as T
class test_ifelse(unittest.TestCase): class test_ifelse(unittest.TestCase):
def test_updates_init(self): def test_updates_init(self):
self.assertRaises(TypeError, Updates, dict(d=3)) self.assertRaises(TypeError, OrderedUpdates, dict(d=3))
sv = theano.shared('asdf') sv = theano.shared('asdf')
Updates({sv:3}) OrderedUpdates({sv:3})
def test_updates_setitem(self): def test_updates_setitem(self):
ok = True ok = True
up = Updates() up = OrderedUpdates()
sv = theano.shared('asdf') sv = theano.shared('asdf')
# keys have to be SharedVariables # keys have to be SharedVariables
...@@ -27,8 +27,8 @@ class test_ifelse(unittest.TestCase): ...@@ -27,8 +27,8 @@ class test_ifelse(unittest.TestCase):
def test_updates_add(self): def test_updates_add(self):
up1 = Updates() up1 = OrderedUpdates()
up2 = Updates() up2 = OrderedUpdates()
a = theano.shared('a') a = theano.shared('a')
b = theano.shared('b') b = theano.shared('b')
......
...@@ -8,23 +8,27 @@ __contact__ = "theano-dev <theano-dev@googlegroups.com>" ...@@ -8,23 +8,27 @@ __contact__ = "theano-dev <theano-dev@googlegroups.com>"
__docformat__ = "restructuredtext en" __docformat__ = "restructuredtext en"
from theano.gof.python25 import OrderedDict
from theano.compile.sharedvalue import SharedVariable from theano.compile.sharedvalue import SharedVariable
import logging import logging
logger = logging.getLogger('theano.updates') logger = logging.getLogger('theano.updates')
import warnings
class Updates(dict): # Must be an OrderedDict or updates will be applied in a non-deterministic order
class OrderedUpdates(OrderedDict):
""" """
Dict-like mapping from SharedVariable keys to their new values. Dict-like mapping from SharedVariable keys to their new values.
This mapping supports the use of the "+" operator for the union of updates. This mapping supports the use of the "+" operator for the union of updates.
""" """
def __init__(self, *key, **kwargs): def __init__(self, *key, **kwargs):
ret = super(Updates, self).__init__(*key, **kwargs) ret = super(OrderedUpdates, self).__init__(*key, **kwargs)
for key in self: for key in self:
if not isinstance(key, SharedVariable): if not isinstance(key, SharedVariable):
raise TypeError( raise TypeError(
'Updates keys must inherit from SharedVariable', 'OrderedUpdates keys must inherit from SharedVariable',
key) key)
return ret return ret
...@@ -38,12 +42,14 @@ class Updates(dict): ...@@ -38,12 +42,14 @@ class Updates(dict):
# value. Should it be cast to a GPU value right away? Should # value. Should it be cast to a GPU value right away? Should
# literals be transformed into constants immediately? # literals be transformed into constants immediately?
return super(Updates, self).__setitem__(key, value) return super(OrderedUpdates, self).__setitem__(key, value)
else: else:
raise TypeError('Updates keys must inherit from SharedVariable', raise TypeError('OrderedUpdates keys must inherit from SharedVariable',
key) key)
def update(self, other): def update(self, other=None):
if other is None:
return
for key, val in dict(other).iteritems(): for key, val in dict(other).iteritems():
if key in self: if key in self:
if self[key] == val: if self[key] == val:
...@@ -52,13 +58,17 @@ class Updates(dict): ...@@ -52,13 +58,17 @@ class Updates(dict):
self[key] = val # __setitem__ does type-checking self[key] = val # __setitem__ does type-checking
def __add__(self, other): def __add__(self, other):
rval = Updates() rval = OrderedUpdates()
rval.update(self) rval.update(self)
rval.update(other) rval.update(other)
return rval return rval
def __radd__(other, self): def __radd__(other, self):
rval = Updates() rval = OrderedUpdates()
rval.update(other) rval.update(other)
rval.update(self) rval.update(self)
return rval return rval
def Updates(*key, **kwargs):
warnings.warn("Updates is deprecated. Switch to OrderedUpdates.")
return OrderedUpdates(*key, **kwargs)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论