Merge branch 'master' into grad_advinc_subtensor

f68f06ce · Rami Al-Rfou · 32a9a89a · 8a1272bd · f68f06ce · f68f06ce
--- a/doc/library/scan.txt
+++ b/doc/library/scan.txt
@@ -133,8 +133,8 @@ This makes it safe to pass a very long arange, which we need to do for generalit
 arange must have its length specified at creation time.


-Simple accumulation into a scalar, ditching lamba
-------------------------------------------------
+Simple accumulation into a scalar, ditching lambda
+--------------------------------------------------

 Although this example would seem almost self-explanatory, it stresses a
 pitfall to be careful of: the initial output state that is supplied, that is 

--- a/theano/__init__.py
+++ b/theano/__init__.py
@@ -88,7 +88,7 @@ from printing import \
 import scan_module
 from scan_module import scan, map, reduce, foldl, foldr, clone

-from updates import Updates
+from updates import Updates, OrderedUpdates

 import tensor
 import scalar

--- a/theano/compile/debugmode.py
+++ b/theano/compile/debugmode.py
@@ -172,7 +172,9 @@ class BadThunkOutput(DebugModeError):
        print >> sio, "BadThunkOutput"
        print >> sio, "  variable    :", self.r
        print >> sio, "  Outputs Type:", self.r.type
-        print >> sio, "  Inputs Type :", [i.type for i in self.r.owner.inputs],
+        print >> sio, "  Outputs Shape:", getattr(self.val1, 'shape', None)
+        print >> sio, "  Outputs Strides:", getattr(self.val1, 'strides', None)
+        print >> sio, "  Inputs Type :", [i.type for i in self.r.owner.inputs]
        print >> sio, "  Inputs Shape:", [getattr(val, 'shape', None)
                                          for val in self.inputs_val]
        print >> sio, "  Inputs Strides:", [getattr(val, 'strides', None)
@@ -1336,7 +1338,7 @@ def _check_preallocated_output(node, thunk, prealloc_modes, def_val,
            for r in node.outputs:
                if not r.type.values_eq_approx(r_vals[r], storage_map[r][0]):
                    # TODO: indicate it is not a C/Py problem
-                    inputs_val = [storage_map[inp] for inp in r.owner.inputs]
+                    inputs_val = [storage_map[inp][0] for inp in r.owner.inputs]
                    raise BadThunkOutput(r,
                            thunk1='Reference value', val1=r_vals[r],
                            thunk2=thunk_name, val2=storage_map[r][0],
@@ -1918,7 +1920,7 @@ class _Linker(gof.link.LocalLinker):
                                if not r.type.values_eq_approx(r_vals[r], storage_map[r][0]):
                                    #import pdb; pdb.set_trace()
                                    #r.type.values_eq_approx(r_vals[r], storage_map[r][0])
-                                    inputs_val = [storage_map[inp] for inp in r.owner.inputs]
+                                    inputs_val = [storage_map[inp][0] for inp in r.owner.inputs]
                                    raise BadThunkOutput(r,
                                            thunk1='perform', val1=r_vals[r],
                                            thunk2='c_code', val2=storage_map[r][0],

--- a/theano/compile/function.py
+++ b/theano/compile/function.py
@@ -12,6 +12,8 @@ from function_module import orig_function
 from profiling import ProfileStats
 from pfunc import pfunc
 from numpy import any  # to work in python 2.4
+import warnings
+from theano import gof

 def function(inputs, outputs=None, mode=None, updates=None, givens=None,
             no_default_updates=False, accept_inplace=False, name=None,
@@ -30,7 +32,7 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None,
    :type mode: string or `Mode` instance.
    :param mode: compilation mode

-    :type updates: iterable over pairs (shared_variable, new_expression). List, tuple or dict.
+    :type updates: iterable over pairs (shared_variable, new_expression). List, tuple or OrderedDict.
    :param updates: update the values for SharedVariable inputs according to these expressions

    :type givens: iterable over pairs (Var1, Var2) of Variables. List, tuple or dict.  The Var1
@@ -128,7 +130,7 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None,
                 def opt_log1p(node):
                    if not isinstance(node.op,Elemwise):
                       return
-                    if not isinstance(node.op.scalar_op, log,):
+                    if not isinstance(node.op.scalar_op, log):
                       return
                    inp = node.inputs[0]
                    if not inp.owner:
@@ -159,10 +161,18 @@ def function(inputs, outputs=None, mode=None, updates=None, givens=None,


    """
-    #tuple are used in some tests, as we accepted them in the past
-    #I prefer to allow it as they act the same as list for what they are used.
    if updates is None:
        updates = []
+
+    if isinstance(updates, dict) and \
+            not isinstance(updates, gof.python25.OrderedDict):
+        warnings.warn("Expected OrderedDict, got "+str(type(updates))+ "Using "
+        "a standard dictionary here results in "
+            "non-deterministic behavior. You should use an OrderedDict"
+            " if you are using python2.7 or use a list of (shared, update)"
+            " pairs. Do not just convert your dictionary to this type before"
+            " the call as the conversion will still be non-deterministic.")
+
    if givens is None:
        givens = []
    if not isinstance(inputs, (list, tuple)):

--- a/theano/compile/function_module.py
+++ b/theano/compile/function_module.py
@@ -1337,6 +1337,7 @@ def orig_function(inputs, outputs, mode=None, accept_inplace=False,
        profile.compile_time += t2 - t1

    fn.name = name
+    fn.maker.fgraph.name = name
    return fn



--- a/theano/compile/tests/test_pfunc.py
+++ b/theano/compile/tests/test_pfunc.py
@@ -626,8 +626,15 @@ class Test_pfunc(unittest.TestCase):

        # The order of the variables is not determined, so we try
        # both shared variables.
-        f = theano.function([], [], updates={a: a, b: (2 * b)})
-        g = theano.function([], [], updates={a: (a * 2), b: b})
+        # TODO: explain the above comment. By "not determined" does
+        # this mean "not deterministic"?
+        # This test originally wrote the updates using dictionaries,
+        # and iterating over the dictionary was not deterministic.
+        # Is that all the comment above meant, or is the CVM intended
+        # to add extra non-determinism? Or is the CVM meant to
+        # deterministically but arbitrarily pick an order for the updates?
+        f = theano.function([], [], updates=[(a, a), (b, (2 * b))])
+        g = theano.function([], [], updates=[(a, (a * 2)), (b, b)])

        f()
        assert a.get_value(borrow=True).shape == (), a.get_value()
@@ -642,10 +649,10 @@ class Test_pfunc(unittest.TestCase):
        a = shared(1., 'a')
        b = shared(numpy.ones((2, 3)), 'b')

-        # The order of the variables is not determined, so we try
-        # both shared variables.
-        f = theano.function([], [], updates={a: a, b: (2 * b - b)})
-        g = theano.function([], [], updates={a: (a * 2 - a), b: b})
+        # See comment in test_update_same about why we try both
+        # shared variables.
+        f = theano.function([], [], updates=[(a, a), (b, (2 * b - b))])
+        g = theano.function([], [], updates=[(a, (a * 2 - a)), (b, b)])

        f()
        assert a.get_value(borrow=True).shape == (), a.get_value()

--- a/theano/gof/null_type.py
+++ b/theano/gof/null_type.py
@@ -35,3 +35,6 @@ class NullType(Type):

    def __hash__(self, other):
        return hash(type(self))
+
+    def __str__(self):
+        return 'NullType'
--- a/theano/gof/python25.py
+++ b/theano/gof/python25.py
@@ -162,7 +162,7 @@ else:

 if sys.version_info[:2] < (2, 7):
    # The following implementation of OrderedDict compatible with python 2.4
-    # was taked from http://pypi.python.org/pypi/ordereddict/1.1
+    # was taken from http://pypi.python.org/pypi/ordereddict/1.1
    # It is under the MIT license.

    # Copyright (c) 2009 Raymond Hettinger

--- a/theano/gradient.py
+++ b/theano/gradient.py
@@ -20,6 +20,7 @@ import theano
 from itertools import izip
 from theano import gof
 from theano.gof import Variable
+from theano.gof.python25 import OrderedDict
 from theano.gof.python25 import all
 import theano.gof.utils
 from theano.gof.null_type import NullType
@@ -144,6 +145,9 @@ class DisconnectedType(theano.gof.type.Type):
                " a symbolic placeholder."
            ))

+    def __str__(self):
+        return 'DisconnectedType'
+

 ########################
 # R Operator
@@ -211,7 +215,7 @@ def Rop(f, wrt, eval_points):
            # Tensor, Sparse and CudaNdArray have the ndim attribute
            pass

-    seen_nodes = {}
+    seen_nodes = OrderedDict()

    def _traverse(node):
        """ TODO: writeme """
@@ -432,14 +436,14 @@ def grad(cost, wrt, consider_constant=None,
    if known_grads is not None:
        outputs.extend(known_grads.keys())

-    var_to_node_to_idx = _populate_var_to_node_to_idx(
+    var_to_app_to_idx = _populate_var_to_app_to_idx(
            outputs, wrt, consider_constant)

    # build a dict mapping var to the gradient of cost with respect to var
-    grad_dict = {}
+    grad_dict = OrderedDict()

    if known_grads is None:
-        known_grads = {}
+        known_grads = OrderedDict()

    # The gradient of the cost is 1 unless specified otherwise by known_grads.
    if cost is not None:
@@ -501,10 +505,10 @@ def grad(cost, wrt, consider_constant=None,

    # variables that do not influence the cost have zero gradient.
    # if wrt is such a variable, populate the grad_dict with this info
-    # so that wrt not being in var_to_node_to_idx won't cause an error below
+    # so that wrt not being in var_to_app_to_idx won't cause an error below
    # according to the flag, possibly raise an error if wrt is disconnected
    for elem in wrt:
-        if elem not in var_to_node_to_idx and elem is not cost \
+        if elem not in var_to_app_to_idx and elem is not cost \
                and elem not in grad_dict:
            handle_disconnected(elem)
            grad_dict[elem] = DisconnectedType()()
@@ -521,7 +525,7 @@ def grad(cost, wrt, consider_constant=None,
        if hasattr(g.type, 'dtype'):
            assert g.type.dtype in tensor.float_dtypes

-    rval = _populate_grad_dict(var_to_node_to_idx,
+    rval = _populate_grad_dict(var_to_app_to_idx,
            grad_dict, wrt, cost_name)

    for i in xrange(len(rval)):
@@ -579,7 +583,7 @@ def _node_to_pattern(node):
    return connection_pattern


-def _populate_var_to_node_to_idx(outputs, wrt, consider_constant):
+def _populate_var_to_app_to_idx(outputs, wrt, consider_constant):
    """
    Helper function for grad function.

@@ -638,7 +642,7 @@ def _populate_var_to_node_to_idx(outputs, wrt, consider_constant):

    # var_to_app_to_idx[var][node] = [i,j] means node has
    # var as input at positions i and j
-    var_to_app_to_idx = {}
+    var_to_app_to_idx = OrderedDict()

    # Set of variables that have been added to their true parents
    # ('true' here means that the elements of the variable are a function
@@ -676,7 +680,13 @@ def _populate_var_to_node_to_idx(outputs, wrt, consider_constant):
                    continue

                if ipt not in var_to_app_to_idx:
-                    var_to_app_to_idx[ipt] = {}
+                    # This object here *must* be an OrderedDict, because
+                    # we iterate over its keys when adding up the terms of
+                    # the gradient on ipt. If it is a regular dict, the grad
+                    # method will return something that is analytically correct,
+                    # but whose order of doing additions depends on the memory
+                    # location of the apply nodes.
+                    var_to_app_to_idx[ipt] = OrderedDict()
                app_to_idx = var_to_app_to_idx[ipt]
                if app not in app_to_idx:
                    app_to_idx[app] = []
@@ -731,12 +741,12 @@ class DisconnectedInputError(ValueError):
    disconnected_inputs='raise'.
    """

-def _populate_grad_dict(var_to_node_to_idx,
+def _populate_grad_dict(var_to_app_to_idx,
        grad_dict, wrt, cost_name=None):
    """
        Helper function for grad function.

-        var_to_node_to_idx: a dictionary mapping a variable to
+        var_to_app_to_idx: a dictionary mapping a variable to
                a second dictionary.
                the second dictionary maps apply nodes acting on
                this variable to the variable's index in the apply
@@ -761,7 +771,7 @@ def _populate_grad_dict(var_to_node_to_idx,
    """
    # build a dict mapping node to the terms node contributes to each of
    # its inputs' gradients
-    term_dict = {}
+    term_dict = OrderedDict()

    def access_term_cache(node):
        """ Populates term_dict[node] and returns it """
@@ -1001,15 +1011,17 @@ def _populate_grad_dict(var_to_node_to_idx,
            #cache the result
            term_dict[node] = input_grads

+
+
        return term_dict[node]

    # populate grad_dict[var] and return it
    def access_grad_cache(var):
        if var not in grad_dict:
            # If var is not in grad_dict already, we must compute it
-            if var in var_to_node_to_idx:
+            if var in var_to_app_to_idx:
                terms = []
-                node_to_idx = var_to_node_to_idx[var]
+                node_to_idx = var_to_app_to_idx[var]
                for node in node_to_idx:
                    for idx in node_to_idx[node]:


--- a/theano/printing.py
+++ b/theano/printing.py
@@ -8,6 +8,8 @@ import logging
 import os
 import StringIO
 import sys
+# Not available on all platforms
+hashlib = None

 import numpy

@@ -1069,3 +1071,78 @@ def min_informative_str(obj, indent_level=0,
    rval = indent + prefix + name

    return rval
+
+
+
+
+def var_descriptor(obj, _prev_obs=None, _tag_generator=None):
+    """
+    Returns a string, with no endlines, fully specifying
+    how a variable is computed. Does not include any memory
+    location dependent information such as the id of a node.
+    """
+
+    if hashlib is None:
+        try:
+            import hashlib
+        except ImportError:
+            raise RuntimeError("Can't run var_descriptor because hashlib is not available.")
+
+    if _prev_obs is None:
+        _prev_obs = {}
+
+    if id(obj) in _prev_obs:
+        tag = _prev_obs[id(obj)]
+
+        return '<' + tag + '>'
+
+    if _tag_generator is None:
+        _tag_generator = _TagGenerator()
+
+    cur_tag = _tag_generator.get_tag()
+
+    _prev_obs[id(obj)] = cur_tag
+
+    if hasattr(obj, '__array__'):
+        # hashlib hashes only the contents of the buffer, but
+        # it can have different semantics depending on the strides
+        # of the ndarray
+        name = '<ndarray:'
+        name += 'strides=['+','.join(str(stride) for stride in obj.strides)+']'
+        name += ',digest='+hashlib.md5(obj).hexdigest()+'>'
+    elif hasattr(obj, 'name') and obj.name is not None:
+        name = obj.name
+    elif hasattr(obj, 'owner') and obj.owner is not None:
+        name = str(obj.owner.op) + '('
+        name += ','.join(var_descriptor(ipt,
+                    _prev_obs=_prev_obs, _tag_generator=_tag_generator) for ipt
+                    in obj.owner.inputs)
+        name += ')'
+    else:
+        name = str(obj)
+        if ' at 0x' in name:
+            # The __str__ method is encoding the object's id in its str
+            name = position_independent_str(obj)
+            if ' at 0x' in name:
+                print name
+                assert False
+
+    prefix = cur_tag + '='
+
+    rval = prefix + name
+
+    return rval
+
+def position_independent_str(obj):
+    if isinstance(obj, theano.gof.graph.Variable):
+        rval = 'theano_var'
+        rval += '{type='+str(obj.type)+'}'
+    else:
+        raise NotImplementedError()
+
+    return rval
+
+
+
+
+
--- a/theano/sandbox/cuda/cuda_ndarray.cu
+++ b/theano/sandbox/cuda/cuda_ndarray.cu
@@ -4666,6 +4666,33 @@ int fprint_CudaNdarray(FILE * fd, const CudaNdarray *self)
    return 0;
 }

+
+int CudaNdarray_prep_output(CudaNdarray ** arr, int nd,
+        const int * dims)
+{
+    bool allocated = false;
+    if (*arr == NULL)
+    {
+        // This allocates the metadata but not the data
+        *arr = (CudaNdarray *) CudaNdarray_new_nd(nd);
+        if (*arr == NULL)
+            return -1;
+        allocated = true;
+    }
+
+    if (CudaNdarray_alloc_contiguous(*arr, nd, dims))
+    {
+        if (allocated)
+        {
+            Py_DECREF(*arr);
+            *arr = NULL;
+        }
+        return -1;
+    }
+    return 0;
+}
+
+
 /*
  Local Variables:
  mode:c++

--- a/theano/sandbox/cuda/cuda_ndarray.cuh
+++ b/theano/sandbox/cuda/cuda_ndarray.cuh
@@ -149,11 +149,11 @@ DllExport int
 CudaNdarray_Equal(CudaNdarray *cnda1, CudaNdarray *cnda2);

 /****
- *  Set the idx'th dimension to value d.
+ *  Set the dimension[idx] to value d.
 *
 *  Updates the log2dim shadow array.
 *
- *  Does not sync structure to host.
+ *  Does not sync structure to device.
 */
 DllExport inline void __attribute__((always_inline))
 CudaNdarray_set_dim(CudaNdarray * self, int idx, int d) 
@@ -229,7 +229,8 @@ DllExport PyObject * CudaNdarray_new_nd(const int nd);
 /**
 * [Re]allocate a CudaNdarray with access to 'nd' dimensions.
 *
- * Note: This does not allocate storage for data.
+ * Note: This does not allocate storage for data, or free
+ *       pre-existing storage.
 */
 DllExport inline int __attribute__((always_inline))
 CudaNdarray_set_nd(CudaNdarray * self, const int nd)
@@ -276,6 +277,7 @@ CudaNdarray_set_nd(CudaNdarray * self, const int nd)
 * CudaNdarray_alloc_contiguous
 *
 * Allocate storage space for a tensor of rank 'nd' and given dimensions.
+ * (No-op if self already has a contiguous tensor of the right dimensions)
 *
 * Note: CudaNdarray_alloc_contiguous is templated to work for both int dimensions and npy_intp dimensions
 */
@@ -286,13 +288,13 @@ static int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd, const i
    // return 0 on success
    int size = 1; //set up the strides for contiguous tensor
    assert (nd >= 0);
+
+    // Here we modify the host structure to have the desired shape and
+    // strides. This does not cause the storage to be freed or reallocated.
    if (CudaNdarray_set_nd(self, nd))
    {
        return -1;
    }
-    //TODO: check if by any chance our current dims are correct,
-    //      and strides already contiguous
-    //      in that case we can return right here.
    for (int i = nd-1; i >= 0; --i)
    {
        CudaNdarray_set_stride(self, i, (dim[i] == 1) ? 0 : size);
@@ -300,7 +302,11 @@ static int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd, const i
        size = size * dim[i];
    }

-    if ((self->data_allocated == size) && CudaNdarray_is_c_contiguous(self))
+    // If the allocated buffer is already of the right size, we don't need to
+    // do anything else.
+    // Note: self->data_allocated is 0 for a view, so views will fail this
+    // check and be turned into independent arrays below.
+    if (self->data_allocated == size)
    {
        return 0;
    }
@@ -468,6 +474,15 @@ PyObject * CudaNdarray_View(const CudaNdarray * self);
 PyObject * CudaNdarray_inplace_add(PyObject* py_self, PyObject * py_other);


+
+// Ensures that *arr is a pointer to a contiguous ndarray of the specified
+// dimensions.
+// *arr may initially be NULL, a pointer to an ndarray of the wrong size,
+// or a pointer to an ndarray of the right size. In the last case it will
+// not change.
+int CudaNdarray_prep_output(CudaNdarray ** arr, int nd,
+        const int * dims);
+
 #endif
 /*
  Local Variables:

--- a/theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
+++ b/theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
@@ -869,5 +869,5 @@ def test_stack_rows_segfault_070312():
    out = theano.shared(numpy.random.rand(1, 2, 2, 3).astype('float32'))
    op = theano.tensor.nnet.conv.ConvOp(imshp=(80, 96, 96), kshp=(9, 9),
            nkern=1, bsize=1)
-    f = theano.function([], [], updates={out: op(img, kern)})
+    f = theano.function([], [], updates=[(out, op(img, kern))])
    f()
--- a/theano/sandbox/cuda/tests/test_opt.py
+++ b/theano/sandbox/cuda/tests/test_opt.py
@@ -106,7 +106,7 @@ def test_alloc_memset_0():
 def test_gpuspecifyshape():
    x = cuda.shared_constructor(numpy.ones(3,dtype='float32'), 'x')
    m = theano.tensor.specify_shape(x + numpy.float32(1), (3,))
-    f = theano.function([], updates={x:m * numpy.float32(2)},
+    f = theano.function([], updates=[(x, m * numpy.float32(2))],
                        mode=mode_with_gpu)
    l = f.maker.fgraph.toposort()
    assert not numpy.any([isinstance(x.op, cuda.HostFromGpu) for x in l])

--- a/theano/sandbox/cuda/tests/test_var.py
+++ b/theano/sandbox/cuda/tests/test_var.py
@@ -60,11 +60,11 @@ class T_updates(unittest.TestCase):
        data = numpy.float32([1, 2, 3, 4])
        x = f32sc(data)
        y = x ** 2
-        f = theano.function([], y, updates={x: x + 1})
+        f = theano.function([], y, updates=[(x, x + 1)])
        f()

        # Test that we can update with a CudaVariable
-        f = theano.function([], y, updates={x: cuda.gpu_from_host(x + 1)})
+        f = theano.function([], y, updates=[(x, cuda.gpu_from_host(x + 1))])
        f()

    def test_2(self):
@@ -74,7 +74,7 @@ class T_updates(unittest.TestCase):
                value=numpy.zeros((10, 10), 'float32'))

        x = tensor.fmatrix('x')
-        output_updates = {output_var: x ** 2}
+        output_updates = [(output_var, x ** 2)]
        output_givens = {x: data}
        output_func = theano.function(inputs=[], outputs=[],
                updates=output_updates, givens=output_givens)
@@ -89,8 +89,8 @@ class T_updates(unittest.TestCase):
        # the update_var has type matrix, and the update expression
        # is a broadcasted scalar, and that should not be allowed.
        self.assertRaises(TypeError, theano.function, inputs=[], outputs=[],
-                          updates={output_var:
-                                   output_var.sum()})
+                          updates=[(output_var,
+                                   output_var.sum())])

    def test_err_broadcast(self):
        # Test that we raise a good error message when we don't
@@ -101,8 +101,8 @@ class T_updates(unittest.TestCase):
        # the update_var has type matrix, and the update expression
        # is a broadcasted scalar, and that should not be allowed.
        self.assertRaises(TypeError, theano.function, inputs=[], outputs=[],
-                          updates={output_var:
-                                   output_var.sum().dimshuffle('x', 'x')})
+                          updates=[(output_var,
+                                   output_var.sum().dimshuffle('x', 'x'))])

    def test_broadcast(self):
        # Test that we can rebroadcast
@@ -111,11 +111,11 @@ class T_updates(unittest.TestCase):

        up = tensor.unbroadcast(output_var.sum().dimshuffle('x', 'x'), 0, 1)
        output_func = theano.function(inputs=[], outputs=[],
-                                      updates={output_var: up})
+                                      updates=[(output_var, up)])
        output_func()

        up = tensor.patternbroadcast(output_var.sum().dimshuffle('x', 'x'),
                                     output_var.type.broadcastable)
        output_func = theano.function(inputs=[], outputs=[],
-                                      updates={output_var: up})
+                                      updates=[(output_var, up)])
        output_func()
--- a/theano/sandbox/linalg/__init__.py
+++ b/theano/sandbox/linalg/__init__.py

 from ops import (cholesky, matrix_inverse, solve,
        diag, extract_diag, alloc_diag,
-        det, psd, eig,
+        det, psd, eig, eigh,
        trace, spectral_radius_bound)
--- a/theano/sandbox/linalg/ops.py
+++ b/theano/sandbox/linalg/ops.py
@@ -12,6 +12,7 @@ from theano.tensor.opt import (register_stabilize,
        register_specialize, register_canonicalize)
 from theano.gof import local_optimizer
 from theano.gof.opt import Optimizer
+from theano.gradient import grad_not_implemented, DisconnectedType

 try:
    import scipy.linalg
@@ -395,6 +396,8 @@ cholesky = Cholesky()


 class CholeskyGrad(Op):
+    """
+    """
    def __init__(self, lower=True):
        self.lower = lower
        self.destructive = False
@@ -487,7 +490,7 @@ class MatrixPinv(Op):
    This method is not faster then `matrix_inverse`. Its strength comes from
    that it works for non-square matrices.
    If you have a square matrix though, `matrix_inverse` can be both more
-    exact and faster to compute. Aslo this op does not get optimized into a
+    exact and faster to compute. Also this op does not get optimized into a
    solve op.
    """
    def __init__(self):
@@ -880,9 +883,7 @@ class Eig(Op):
    """Compute the eigenvalues and right eigenvectors of a square array.

    """
-
-    def __init__(self):
-        pass
+    _numop = staticmethod(numpy.linalg.eig)

    def props(self):
        """Function exposing different properties of each instance of the
@@ -900,15 +901,17 @@ class Eig(Op):

    def make_node(self, x):
        x = as_tensor_variable(x)
+        assert x.ndim == 2
        w = theano.tensor.vector(dtype=x.dtype)
        v = theano.tensor.matrix(dtype=x.dtype)
        return Apply(self, [x], [w, v])

    def perform(self, node, (x,), (w, v)):
        try:
-            w[0], v[0] = [z.astype(x.dtype) for z in numpy.linalg.eig(x)]
+            w[0], v[0] = [z.astype(x.dtype) for z in self._numop(x)]
        except numpy.linalg.LinAlgError:
-            logger.debug('Failed to find eig of %s' % str(node.inputs[0]))
+            logger.debug('Failed to find %s of %s' % (self._numop.__name__,
+                                                      node.inputs[0]))
            raise

    def infer_shape(self, node, shapes):
@@ -916,6 +919,138 @@ class Eig(Op):
        return [(n,), (n,n)]

    def __str__(self):
-        return "Eig"
+        return self._numop.__name__.capitalize()

 eig = Eig()
+
+def _zero_disconnected(outputs, grads):
+    return [o.zeros_like()
+            if isinstance(g.type, DisconnectedType) else g
+            for o, g in zip(outputs, grads)]
+
+class Eigh(Eig):
+    """
+    Return the eigenvalues and eigenvectors of a Hermitian or symmetric matrix.
+    
+    """
+    _numop = staticmethod(numpy.linalg.eigh)
+    def __init__(self, UPLO='L'):
+        self.UPLO = UPLO
+
+    def __str__(self):
+        return 'Eigh{%s}' % self.UPLO
+
+    def props(self):
+        return self.UPLO,
+
+    def make_node(self, x):
+        x = as_tensor_variable(x)
+        assert x.ndim == 2
+        # Numpy's linalg.eigh may return either double or single
+        # presision eigenvalues depending on installed version of
+        # LAPACK.  Rather than trying to reproduce the (rather
+        # involved) logic, we just probe linalg.eigh with a trivial
+        # input.
+        w_dtype = self._numop([[numpy.dtype(x.dtype).type()]])[0].dtype.name
+        w = theano.tensor.vector(dtype=w_dtype)
+        v = theano.tensor.matrix(dtype=x.dtype)
+        return Apply(self, [x], [w, v])
+
+    def perform(self, node, (x,), (w, v)):
+        try:
+            w[0], v[0] = self._numop(x, self.UPLO)
+        except numpy.linalg.LinAlgError:
+            logger.debug('Failed to find %s of %s' % (self._numop.__name__,
+                                                      node.inputs[0]))
+                                                      
+            raise
+        
+    def grad(self, inputs, g_outputs):
+        r"""The gradient function should return
+
+           .. math:: \sum_n\left(W_n\frac{\partial\,w_n}
+                           {\partial a_{ij}} +
+                     \sum_k V_{nk}\frac{\partial\,v_{nk}}
+                           {\partial a_{ij}}\right),
+                        
+        where [:math:`W`, :math:`V`] corresponds to ``g_outputs``,
+        :math:`a` to ``inputs``, and  :math:`(w, v)=\mbox{eig}(a)`.
+
+        Analytic formulae for eigensystem gradients are well-known in
+        perturbation theory:
+
+           .. math:: \frac{\partial\,w_n}
+                          {\partial a_{ij}} = v_{in}\,v_{jn}
+
+
+           .. math:: \frac{\partial\,v_{kn}}
+                          {\partial a_{ij}} = 
+                \sum_{m\ne n}\frac{v_{km}v_{jn}}{w_n-w_m}
+        """
+        x, = inputs
+        w, v = self(x)
+        # Replace gradients wrt disconnected variables with
+        # zeros. This is a work-around for issue #1063.
+        gw, gv = _zero_disconnected([w, v], g_outputs)
+        return [EighGrad(self.UPLO)(x, w, v, gw, gv)]
+
+def eigh(a, UPLO='L'):
+     return Eigh(UPLO)(a)
+
+class EighGrad(Op):
+    """Gradient of an eigensystem of a Hermitian matrix.
+
+    """
+    def __init__(self, UPLO='L'):
+        self.UPLO = UPLO
+        if UPLO == 'L':
+            self.tri0 = numpy.tril
+            self.tri1 = lambda a: numpy.triu(a, 1)
+        else:
+            self.tri0 = numpy.triu
+            self.tri1 = lambda a: numpy.tril(a, -1)
+            
+    def props(self):
+        return ()
+
+    def __hash__(self):
+        return hash((type(self), self.props()))
+
+    def __eq__(self, other):
+        return (type(self) == type(other) and self.props() == other.props())
+
+    def __str__(self):
+        return 'EighGrad{%s}' % self.UPLO
+    
+
+    def make_node(self, x, w, v, gw, gv):
+        x, w, v, gw, gv = map(as_tensor_variable, (x, w, v, gw, gv))
+        return Apply(self, [x, w, v, gw, gv], [x.type()])
+
+    def perform(self, node, inputs, outputs):
+        r"""
+        Implements the "reverse-mode" gradient for the eigensystem of
+        a square matrix.
+        """
+        x, w, v, W, V = inputs
+        N = x.shape[0]
+        outer = numpy.outer
+
+        G = lambda n: sum(v[:,m]*V.T[n].dot(v[:,m])/(w[n]-w[m])
+                          for m in xrange(N) if m != n)
+        g = sum(outer(v[:,n], v[:,n]*W[n] + G(n))
+                for n in xrange(N))
+
+        # Numpy's eigh(a, 'L') (eigh(a, 'U')) is a function of tril(a)
+        # (triu(a)) only.  This means that partial derivative of
+        # eigh(a, 'L') (eigh(a, 'U')) with respect to a[i,j] is zero
+        # for i < j (i > j).  At the same time, non-zero components of
+        # the gradient must account for the fact that variation of the
+        # opposite triangle contributes to variation of two elements
+        # of Hermitian (symmetric) matrix. The following line
+        # implements the necessary logic.
+        outputs[0][0] = self.tri0(g) + self.tri1(g).T
+
+    def infer_shape(self, node, shapes):
+        return [shapes[0]]
+
--- a/theano/sandbox/linalg/tests/test_linalg.py
+++ b/theano/sandbox/linalg/tests/test_linalg.py
@@ -29,7 +29,7 @@ from theano.sandbox.linalg.ops import (cholesky,
                                       imported_scipy,
                                       Eig,
                                       )
-
+from theano.sandbox.linalg import eig, eigh
 from nose.plugins.skip import SkipTest


@@ -471,29 +471,51 @@ class test_Solve(utt.InferShapeTester):
                                self.op_class)

 class test_Eig(utt.InferShapeTester):
+    op_class = Eig
+    op = eig
+    dtype = 'float64'
    def setUp(self):
        super(test_Eig, self).setUp()
-        self.op_class = Eig
-        self.op = Eig()
+        self.rng = numpy.random.RandomState(utt.fetch_seed())
+        self.A = theano.tensor.matrix(dtype=self.dtype)
+        X = numpy.asarray(self.rng.rand(5, 5),
+                          dtype=self.dtype)
+        self.S = X.dot(X.T)
        
    def test_infer_shape(self):
-        rng = numpy.random.RandomState(utt.fetch_seed())
-        A = theano.tensor.matrix()
-        X = numpy.asarray(rng.rand(5, 5),
-                          dtype=config.floatX)
+        A = self.A
+        S = self.S
        self._compile_and_check([A],  # theano.function inputs
                                self.op(A),  # theano.function outputs
-                                # A must be square
-                                [X.dot(X.T)],
+                                # S must be square
+                                [S],
                                self.op_class)
    def test_eval(self):
        import math
-        A = theano.tensor.matrix()
+        A = theano.tensor.matrix(dtype=self.dtype)
        self.assertEquals([e.eval({A: [[1]]}) for e in self.op(A)],
                          [[1.0], [[1.0]]])
-
-        w, v = [e.eval({A: [[0, 1], [1, 0]]}) 
-                for e in self.op(A)]
-        assert_array_almost_equal(w, [1, -1])
-        x = math.sqrt(2)/2
-        assert_array_almost_equal(v, [[x, -x], [x, x]])
+        x = [[0, 1], [1, 0]] 
+        w, v = [e.eval({A: x}) for e in self.op(A)]
+        assert_array_almost_equal(numpy.dot(x,v), w * v)
+
+class test_Eigh(test_Eig):
+    op = staticmethod(eigh)
+    def test_uplo(self):
+        S = self.S
+        a = theano.tensor.matrix()
+        wu, vu = [out.eval({a: S}) for out in self.op(a, 'U')]
+        wl, vl = [out.eval({a: S}) for out in self.op(a, 'L')]
+        assert_array_almost_equal(wu, wl)
+        assert_array_almost_equal(vu*numpy.sign(vu[0,:]),
+                                  vl*numpy.sign(vl[0,:]))
+            
+    def test_grad(self):
+        S = self.S
+        utt.verify_grad(lambda x: self.op(x)[0], [S], rng=self.rng)
+        utt.verify_grad(lambda x: self.op(x)[1], [S], rng=self.rng)
+        utt.verify_grad(lambda x: self.op(x, 'U')[0], [S], rng=self.rng)
+        utt.verify_grad(lambda x: self.op(x, 'U')[1], [S], rng=self.rng)
+
+class test_Eigh_float32(test_Eigh):
+    dtype = 'float32'
--- a/theano/sandbox/scan.py
+++ b/theano/sandbox/scan.py
@@ -13,14 +13,16 @@ __contact__ = "Razvan Pascanu <r.pascanu@gmail>"
 import itertools
 import logging
 import numpy
+import warnings

 from theano.compile import SharedVariable, function
 from theano import compile
 from theano import gof
+from theano.gof.python25 import OrderedDict
 from theano.tensor import opt
 from theano import tensor
 from theano import config
-from theano.updates import Updates
+from theano.updates import OrderedUpdates


 from theano.scan_module import scan_op
@@ -147,7 +149,7 @@ def scan(fn,
    n_seqs = len(seqs)
    n_outs = len(outs_info)

-    return_steps = {}
+    return_steps = OrderedDict()
    # wrap outputs info in a dictionary if they are not already in one
    for i in xrange(n_outs):
        if outs_info[i] is not None:
@@ -242,7 +244,7 @@ def scan(fn,
    mit_sot_inner_inputs = []
    mit_sot_inner_slices = []
    mit_sot_inner_outputs = []
-    mit_sot_return_steps = {}
+    mit_sot_return_steps = OrderedDict()
    mit_sot_tap_array = []
    mit_sot_rightOrder = []

@@ -251,7 +253,7 @@ def scan(fn,
    sit_sot_inner_inputs = []
    sit_sot_inner_slices = []
    sit_sot_inner_outputs = []
-    sit_sot_return_steps = {}
+    sit_sot_return_steps = OrderedDict()
    sit_sot_rightOrder = []
    nit_sot_steps = []
    # go through outputs picking up time slices as needed
@@ -398,7 +400,8 @@ def scan(fn,
                      not isinstance(arg, tensor.Constant))]
    # when we apply the lambda expression we get a mixture of update rules
    # and outputs that needs to be separated
-    condition, outputs, updates = scan_utils.get_updates_and_outputs(fn(*args))
+    lambda_result = fn(*args)
+    condition, outputs, updates = scan_utils.get_updates_and_outputs(lambda_result)
    if condition is not None:
        as_while = True
    else:
@@ -464,6 +467,13 @@ def scan(fn,
    dummy_outs = outputs
    if condition is not None:
        dummy_outs.append(condition)
+
+    # If we use a regular dict here, the results are non-deterministic
+    if not isinstance(updates, (list, tuple)):
+        if isinstance(updates, dict) and \
+            not isinstance(updates, gof.python25.OrderedDict):
+                warnings.warn("Using non-deterministic dictionary.")
+
    dummy_f = function(dummy_args,
                       dummy_outs,
                       updates=updates,
@@ -508,7 +518,7 @@ def scan(fn,
            sit_sot_inner_outputs.append(outputs[i])

    ## Step 5.3 Outputs that correspond to update rules of shared variables
-    givens = {}
+    givens = OrderedDict()
    n_shared_outs = 0
    shared_scan_inputs = []
    shared_inner_inputs = []
@@ -527,7 +537,7 @@ def scan(fn,
    ## Step 5.4 Outputs with no taps used in the input
    n_nit_sot = 0
    nit_sot_inner_outputs = []
-    nit_sot_return_steps = {}
+    nit_sot_return_steps = OrderedDict()
    nit_sot_rightOrder = []
    for i, out in enumerate(outs_info):
        if not 'taps' in out:
@@ -582,7 +592,7 @@ def scan(fn,
                  shared_inner_outputs)
    if condition is not None:
        inner_outs.append(condition)
-    new_givens = {}
+    new_givens = OrderedDict()
    for w, w_copy in givens.iteritems():
        new_givens[w] = w.type.filter_variable(w_copy)

@@ -593,7 +603,7 @@ def scan(fn,
    ##

    tap_array = mit_sot_tap_array + [[-1] for x in xrange(n_sit_sot)]
-    info = {}
+    info = OrderedDict()

    info['tap_array'] = tap_array
    info['n_seqs'] = n_seqs
@@ -607,7 +617,7 @@ def scan(fn,
    info['truncate_gradient'] = -1
    info['name'] = name
    info['mode'] = mode
-    info['destroy_map'] = {}
+    info['destroy_map'] = OrderedDict()
    info['inplace'] = False
    info['gpu'] = False
    info['as_while'] = as_while
@@ -641,7 +651,7 @@ def scan(fn,
    ###         and so on ...
    ##

-    update_map = Updates()
+    update_map = OrderedUpdates()

    offset = n_mit_mot
    offsets = [abs(numpy.min(x)) for x in mit_sot_tap_array]
@@ -675,4 +685,5 @@ def scan(fn,
    elif len(scan_out_list) == 0:
        scan_out_list = None

+    assert isinstance(update_map, dict) and 'Ordered' in str(type(update_map))
    return (scan_out_list, update_map)
--- a/theano/sandbox/scan_module/scan.py
+++ b/theano/sandbox/scan_module/scan.py
@@ -46,17 +46,12 @@ from itertools import izip
 import logging
 import numpy

-from theano.compile import SharedVariable, function
-from theano import compile
 from theano import gof
 from theano.tensor import opt, TensorVariable
 from theano.tensor.sharedvar import TensorSharedVariable
 from theano import tensor
-from theano import config
-from theano.updates import Updates
 from theano.scalar.sharedvar import shared as scalar_shared
 from theano.compile.pfunc import rebuild_collect_shared
-import theano

 import scan_op
 import scan_utils

--- a/theano/scan_module/scan.py
+++ b/theano/scan_module/scan.py
@@ -52,8 +52,9 @@ from theano import gof
 from theano.tensor import opt
 from theano import tensor
 from theano import config
-from theano.updates import Updates
+from theano.updates import OrderedUpdates
 from theano.compile import ops
+from theano.gof.python25 import OrderedDict


 import scan_op
@@ -112,7 +113,7 @@ def scan(fn,
                   , outputs_info = [ dict(initial =  Output1, taps = [-3,-5])
                                    , dict(initial = Output2, taps = None)
                                    , Output3 ]
-                   , non_sequences = [ Argument1, Argument 2])
+                   , non_sequences = [ Argument1, Argument2])

        ``fn`` should expect the following arguments in this given order:

@@ -376,11 +377,11 @@ def scan(fn,
    n_seqs = len(seqs)
    n_outs = len(outs_info)

-    return_steps = {}
+    return_steps = OrderedDict()
    # wrap sequences in a dictionary if they are not already dictionaries
    for i in xrange(n_seqs):
        if not isinstance(seqs[i], dict):
-            seqs[i] = dict(input=seqs[i], taps=[0])
+            seqs[i] = OrderedDict([('input', seqs[i]), ('taps', [0])])
        elif seqs[i].get('taps', None):
            seqs[i]['taps'] = wrap_into_list(seqs[i]['taps'])
        elif seqs[i].get('taps', True) is None:
@@ -402,7 +403,7 @@ def scan(fn,

            if not isinstance(outs_info[i], dict):
                # by default any output has a tap value of -1
-                outs_info[i] = dict(initial=outs_info[i], taps=[-1])
+                outs_info[i] = OrderedDict([('initial', outs_info[i]), ('taps', [-1])])
            elif (not outs_info[i].get('initial', None) and
                    outs_info[i].get('taps', None)):
                # ^ no initial state but taps provided
@@ -421,8 +422,8 @@ def scan(fn,
                outs_info[i]['taps'] = [-1]
        else:
            # if a None is provided as the output info we replace it
-            # with an empty dict() to simplify handling
-            outs_info[i] = dict()
+            # with an empty OrdereDict() to simplify handling
+            outs_info[i] = OrderedDict()

    ##
    ###   Step 2. Generate inputs and outputs of the inner functions
@@ -565,7 +566,7 @@ def scan(fn,
    mit_sot_inner_inputs = []
    mit_sot_inner_slices = []
    mit_sot_inner_outputs = []
-    mit_sot_return_steps = {}
+    mit_sot_return_steps = OrderedDict()
    mit_sot_tap_array = []
    mit_sot_rightOrder = []

@@ -574,7 +575,7 @@ def scan(fn,
    sit_sot_inner_inputs = []
    sit_sot_inner_slices = []
    sit_sot_inner_outputs = []
-    sit_sot_return_steps = {}
+    sit_sot_return_steps = OrderedDict()
    sit_sot_rightOrder = []

    # go through outputs picking up time slices as needed
@@ -777,7 +778,7 @@ def scan(fn,
    # as non sequences at the end of our args
    fake_nonseqs = [x.type() for x in non_seqs]
    fake_outputs = scan_utils.clone(outputs,
-                                    replace=dict(zip(non_seqs,
+                                    replace=OrderedDict(zip(non_seqs,
                                                     fake_nonseqs)))
    all_inputs = itertools.ifilter(
        lambda x: (isinstance(x, gof.Variable) and
@@ -825,7 +826,7 @@ def scan(fn,
        n_outs = len(dummy_f.maker.outputs)
        if as_while:
            n_outs = n_outs - 1
-        outs_info = [dict() for x in xrange(n_outs)]
+        outs_info = [OrderedDict() for x in xrange(n_outs)]

    ## Step 5.1 Outputs with taps different then -1

@@ -839,7 +840,7 @@ def scan(fn,
            sit_sot_inner_outputs.append(outputs[i])

    ## Step 5.3 Outputs that correspond to update rules of shared variables
-    givens = {}
+    givens = OrderedDict()
    n_shared_outs = 0
    shared_scan_inputs = []
    shared_inner_inputs = []
@@ -879,7 +880,7 @@ def scan(fn,
    ## Step 5.4 Outputs with no taps used in the input
    n_nit_sot = 0
    nit_sot_inner_outputs = []
-    nit_sot_return_steps = {}
+    nit_sot_return_steps = OrderedDict()
    nit_sot_rightOrder = []
    for i, out in enumerate(outs_info):
        if not 'taps' in out:
@@ -902,7 +903,7 @@ def scan(fn,
                         if (not isinstance(arg, SharedVariable) and
                             not isinstance(arg, tensor.Constant))]

-    givens.update(dict(zip(other_scan_args, other_inner_args)))
+    givens.update(OrderedDict(zip(other_scan_args, other_inner_args)))
    other_shared_scan_args = [arg.variable for arg
                        in dummy_f.maker.expanded_inputs
                        if (isinstance(arg.variable, SharedVariable) and
@@ -911,7 +912,7 @@ def scan(fn,
                        in dummy_f.maker.expanded_inputs
                        if (isinstance(arg.variable, SharedVariable) and
                            not arg.update)]
-    givens.update(dict(zip(other_shared_scan_args,
+    givens.update(OrderedDict(zip(other_shared_scan_args,
                           other_shared_inner_args)))

    ##
@@ -943,7 +944,7 @@ def scan(fn,
        # replace w with w_copy, where w is CudaNdarray
        # and w_copy is TensorType. This is caused because shared
        # variables are put on GPU right aways >:| ,
-        new_givens = {}
+        new_givens = OrderedDict()

        for w, w_copy in givens.iteritems():
            if (isinstance(w.type, cuda.CudaNdarrayType)
@@ -962,7 +963,7 @@ def scan(fn,
    ##

    tap_array = mit_sot_tap_array + [[-1] for x in xrange(n_sit_sot)]
-    info = {}
+    info = OrderedDict()

    info['tap_array'] = tap_array
    info['n_seqs'] = n_seqs
@@ -976,7 +977,7 @@ def scan(fn,
    info['truncate_gradient'] = truncate_gradient
    info['name'] = name
    info['mode'] = mode
-    info['destroy_map'] = {}
+    info['destroy_map'] = OrderedDict()
    info['gpu'] = False
    info['as_while'] = as_while
    info['profile'] = profile
@@ -1012,7 +1013,7 @@ def scan(fn,
    ###         and so on ...
    ##

-    update_map = Updates()
+    update_map = OrderedUpdates()

    def remove_dimensions(outs, steps_return, offsets=None):
        out_ls = []

--- a/theano/scan_module/scan_utils.py
+++ b/theano/scan_module/scan_utils.py
@@ -18,12 +18,13 @@ import logging
 from itertools import izip

 import numpy
+import warnings

 import theano
 from theano.compile.pfunc import rebuild_collect_shared
 from theano import gof
 from theano import tensor, scalar
-from theano.gof.python25 import all
+from theano.gof.python25 import all, OrderedDict
 from theano.tensor.basic import get_constant_value


@@ -181,12 +182,17 @@ def clone(output,

 def get_updates_and_outputs(ls):
    """
-    This function tries to recognize the updates dictionary, the
+    This function tries to recognize the updates OrderedDict, the
    list of outputs and the stopping condition returned by the
    lambda expression and arrange them in a predefined order

+    WRITEME: what is the type of ls? how is it formatted?
+            if it's not in the predefined order already, how does
+            this function know how to put it in that order?

    """
+
+
    def is_outputs(elem):
        if (isinstance(elem, (list, tuple)) and
            all([isinstance(x, theano.Variable) for x in elem])):
@@ -197,6 +203,11 @@ def get_updates_and_outputs(ls):

    def is_updates(elem):
        if isinstance(elem, dict):
+            # Make sure the updates will be applied in a deterministic order
+            if not isinstance(elem, gof.python25.OrderedDict):
+                warnings.warn("Expected OrderedDict or OrderedUpdates, got "\
+                        +str(type(elem))+". This can make your script non-"
+                        "deterministic.")
            return True
        # Dictionaries can be given as lists of tuples
        if (isinstance(elem, (list, tuple)) and
@@ -242,10 +253,11 @@ def get_updates_and_outputs(ls):
                'values, you can use `tensor.constant` to turn them into '
                 'Theano variables.')

+
    if is_outputs(ls):
-        return None, _list(ls), {}
+        return None, _list(ls), OrderedDict()
    if is_updates(ls):
-        return None, [], dict(ls)
+        return None, [], OrderedDict(ls)
    error_msg = ('Scan cannot parse the return value of your lambda '
                 'expression, which is: %s' % (ls,))
    if not isinstance(ls, (list, tuple)):
@@ -258,16 +270,16 @@ def get_updates_and_outputs(ls):
    if len(ls) == 2:
        if is_outputs(ls[0]):
            if is_updates(ls[1]):
-                return (None, _list(ls[0]), dict(ls[1]))
+                return (None, _list(ls[0]), OrderedDict(ls[1]))
            elif is_condition(ls[1]):
-                return (ls[1].condition, _list(ls[0]), {})
+                return (ls[1].condition, _list(ls[0]), OrderedDict())
            else:
                raise ValueError(error_msg)
        elif is_updates(ls[0]):
            if is_outputs(ls[1]):
                raise ValueError(deprecation_msg)
            elif is_condition(ls[1]):
-                return (ls[1].condition, [], dict(ls[0]))
+                return (ls[1].condition, [], OrderedDict(ls[0]))
            else:
                raise ValueError(error_msg)
        else:
@@ -276,7 +288,7 @@ def get_updates_and_outputs(ls):
        if is_outputs(ls[0]):
            if is_updates(ls[1]):
                if is_condition(ls[2]):
-                    return (ls[2].condition, _list(ls[0]), dict(ls[1]))
+                    return (ls[2].condition, _list(ls[0]), OrderedDict(ls[1]))
                else:
                    raise ValueError(error_msg)
            else:

--- a/theano/scan_module/tests/test_scan.py
+++ b/theano/scan_module/tests/test_scan.py
@@ -16,6 +16,7 @@ from theano.compile.pfunc import rebuild_collect_shared
 from theano.gof.python25 import any
 from theano.tests  import unittest_tools as utt
 import theano.scalar.sharedvar
+from theano.gof.python25 import OrderedDict

 from numpy.testing.noseclasses import KnownFailureTest

@@ -1009,7 +1010,7 @@ class T_Scan(unittest.TestCase):
        x0 = theano.tensor.constant(x0)
        to_replace = outputs[0].owner.inputs[0].owner.inputs[1]
        outputs = theano.clone(outputs,
-                               replace={to_replace: x0})
+                               replace=[(to_replace, x0)])
        mode = theano.compile.mode.get_mode(None).including('inplace')
        f9 = theano.function([],
                             outputs,
@@ -1299,7 +1300,7 @@ class T_Scan(unittest.TestCase):
        state = theano.shared(v_state, 'vstate')

        def f_2():
-            return {state: 2 * state}
+            return OrderedDict([(state, 2 * state)])
        n_steps = theano.tensor.iscalar('nstep')
        output, updates = theano.scan(f_2,
                                      [],
@@ -1829,7 +1830,7 @@ class T_Scan(unittest.TestCase):
        X = theano.shared(numpy.array(1))

        out, updates = theano.scan(
-            lambda: {X: X + 1},
+            lambda: OrderedDict([(X, (X + 1))]),
            outputs_info=[],
            non_sequences=[],
            sequences=[],
@@ -1844,7 +1845,7 @@ class T_Scan(unittest.TestCase):
        y = theano.shared(numpy.array(1))

        out, updates = theano.scan(
-            lambda: {x: x + 1, y: x},
+            lambda: OrderedDict([(x, x + 1), (y, x)]),
            outputs_info=[],
            non_sequences=[],
            sequences=[],
@@ -1880,11 +1881,11 @@ class T_Scan(unittest.TestCase):
        b = theano.shared(numpy.random.rand(5, 4))

        def inner_func(a):
-            return a + 1, {b: 2 * b}
+            return a + 1, OrderedDict([(b, 2 * b)])

        out, updates = theano.scan(
            inner_func,
-            outputs_info=[{'initial': init_a}],
+            outputs_info=[OrderedDict([('initial', init_a)])],
            n_steps=1)
        out = out[-1]
        assert out.type.ndim == a.type.ndim
@@ -1967,7 +1968,7 @@ class T_Scan(unittest.TestCase):

        f1 = z * (x + y) ** 2 + 5
        f2 = theano.clone(f1,
-                          replace={y: y2},
+                          replace=OrderedDict([(y, y2)]),
                          strict=True,
                          copy_inputs=True)
        f2_inp = theano.gof.graph.inputs([f2])
@@ -1986,7 +1987,7 @@ class T_Scan(unittest.TestCase):

        f1 = z * (x + y) ** 2 + 5
        f2 = theano.clone(f1,
-                          replace={y: y2},
+                          replace=OrderedDict([(y, y2)]),
                          strict=False,
                          copy_inputs=True)
        f2_inp = theano.gof.graph.inputs([f2])
@@ -2005,7 +2006,7 @@ class T_Scan(unittest.TestCase):

        f1 = z * (x + y) ** 2 + 5
        f2 = theano.clone(f1,
-                          replace={y: y2},
+                          replace=[(y, y2)],
                          strict=True,
                          copy_inputs=False)
        f2_inp = theano.gof.graph.inputs([f2])
@@ -2024,7 +2025,7 @@ class T_Scan(unittest.TestCase):

        f1 = z * (x + y) ** 2 + 5
        f2 = theano.clone(f1,
-                          replace={y: y2},
+                          replace=[(y, y2)],
                          strict=False,
                          copy_inputs=False)
        f2_inp = theano.gof.graph.inputs([f2])
@@ -2204,15 +2205,15 @@ class T_Scan(unittest.TestCase):
        v2 = theano.shared(numpy.ones((5, 5), dtype=theano.config.floatX))
        shapef = theano.function([W],
                                 expr,
-                                 givens={initial: v1,
-                                         inpt: v2})
+                                 givens=OrderedDict([(initial, v1),
+                                         (inpt, v2)]))
        # First execution to cache n_steps
        shapef(numpy.ones((5, 5), dtype=theano.config.floatX))

        cost = expr.sum()
        d_cost_wrt_W = tensor.grad(cost, [W])
        f = theano.function([W, inpt], d_cost_wrt_W,
-                             givens={initial: theano.shared(numpy.zeros(5))})
+                             givens=OrderedDict([(initial, theano.shared(numpy.zeros(5)))]))

        rval = numpy.asarray([[5187989] * 5] * 5, dtype=theano.config.floatX)
        arg1 = numpy.ones((5, 5), dtype=theano.config.floatX)
@@ -3166,7 +3167,7 @@ class T_Scan(unittest.TestCase):
        shared_var = theano.shared(numpy.float32(1.))

        def inner_fn():
-            return [], {shared_var: shared_var + numpy.float32(1.)}
+            return [], OrderedDict([(shared_var, shared_var + numpy.float32(1.))])
        _, updates = theano.scan(inner_fn,
                                 n_steps=10,
                                 truncate_gradient=-1,
@@ -3239,7 +3240,7 @@ class T_Scan(unittest.TestCase):
        seq = tensor.matrix()
        initial_value = theano.shared(numpy.zeros((4, 1),
                                                  dtype=theano.config.floatX))
-        outputs_info = [{'initial': initial_value, 'taps': [-4]}, None]
+        outputs_info = [OrderedDict([('initial', initial_value), ('taps', [-4])]), None]
        results, updates = theano.scan(fn=onestep,
                                       sequences=seq,
                                       outputs_info=outputs_info)
@@ -3259,13 +3260,13 @@ class T_Scan(unittest.TestCase):
        seq = tensor.matrix()
        initial_value = theano.shared(numpy.zeros((4, 1),
                                                  dtype=theano.config.floatX))
-        outputs_info = [{'initial': initial_value, 'taps': [-4]}, None]
+        outputs_info = [OrderedDict([('initial', initial_value), ('taps', [-4])]), None]
        results, _ = theano.scan(fn=onestep,
                                       sequences=seq,
                                       outputs_info=outputs_info)
        sharedvar = theano.shared(numpy.zeros((1, 1),
                                              dtype=theano.config.floatX))
-        updates = {sharedvar: results[0][-1:]}
+        updates = OrderedDict([(sharedvar, results[0][-1:])])

        f = theano.function([seq], results[1], updates=updates)
        assert numpy.all(exp_out == f(inp))
@@ -3354,9 +3355,9 @@ def test_speed():
        theano.printing.debugprint(s_rinc)
        f = theano.function([],
                            [],
-                            updates={
-                                s_i: s_i + 1,
-                                shared_r: s_rinc},
+                            updates=OrderedDict([
+                                (s_i, s_i + 1),
+                                (shared_r, s_rinc)]),
                           mode=theano.Mode(linker='cvm'))
        f._check_for_aliased_inputs = False
        t2 = time.time()
@@ -3430,9 +3431,9 @@ def test_speed_rnn():
                        w)),
                tolerate_inplace_aliasing=True)
        f = theano.function([], [],
-                updates={
-                    s_i: s_i + 1,
-                    shared_r: s_rinc},
+                updates=OrderedDict([
+                    (s_i, s_i + 1),
+                    (shared_r, s_rinc)]),
                mode=theano.Mode(linker='cvm'))
        #theano.printing.debugprint(f)
        f_fn = f.fn
@@ -3495,9 +3496,9 @@ def test_speed_batchrnn():
                tolerate_inplace_aliasing=True)
        f = theano.function([],
                            [],
-                            updates={
-                                s_i: s_i + 1,
-                                shared_r: s_rinc},
+                            updates=[
+                                (s_i, s_i + 1),
+                                (shared_r, s_rinc)],
                mode=theano.Mode(linker='cvm'))
        #theano.printing.debugprint(f)
        f_fn = f.fn

--- a/theano/sparse/tests/test_basic.py
+++ b/theano/sparse/tests/test_basic.py
@@ -1219,7 +1219,7 @@ class UsmmTests(unittest.TestCase):
            mode = theano.compile.mode.get_default_mode().excluding('fusion')

            if inplace:
-                updates = {z: z - a * theano.sparse.dot(x, y)}
+                updates = [(z, z - a * theano.sparse.dot(x, y))]
                f_a = theano.function([a, x, y], [],
                                      updates=updates,
                                      mode=mode)

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -27,6 +27,7 @@ from theano.tensor.utils import hash_from_ndarray
 from theano.scalar import ComplexError, IntegerDivisionError
 import theano.scalar.sharedvar
 from theano.gradient import grad_undefined
+from theano.gradient import grad_not_implemented
 from theano.gradient import DisconnectedType

 ### set up the external interface
@@ -1639,6 +1640,9 @@ class _tensor_py_operators:
    def ravel(self):
        return flatten(self)

+    def diagonal(self, offset=0, axis1=0, axis2=1):
+        return diagonal(self, offset, axis1, axis2)
+
    # CASTING
    def astype(self, dtype):
        return cast(self, dtype)
@@ -1796,6 +1800,8 @@ class _tensor_py_operators:
        """See `theano.tensor.conj`"""
        return conj(self)

+    conjugate = conj
+    
    def repeat(self, repeats, axis=None):
        """See `theano.tensor.repeat`"""
        from theano.tensor.extra_ops import repeat
@@ -7310,3 +7316,96 @@ def all(x, axis=None, keepdims=False):
    if keepdims:
        out = makeKeepDims(x, out, axis)
    return out
+
+class Diagonal(Op):
+    """Return specified diagonals.
+
+    :param x: A tensor variable with x.ndim >= 2.
+
+    :return: A vector representing the diagonal elements.
+    """
+    
+    def __init__(self, offset=0, axis1=0, axis2=1):
+        self.offset = offset
+        self.axis1 = axis1
+        self.axis2 = axis2
+ 
+    def __eq__(self, other):
+        return (type(self) == type(other))
+
+    def __hash__(self):
+        return hash(type(self))
+
+    def make_node(self, x):
+        x = as_tensor_variable(x)
+        assert x.ndim >= 2
+        return Apply(self, [x], [tensor(dtype=x.dtype,
+                                        broadcastable=[False] * (x.ndim -1))])
+
+    def perform(self, node, (x,), (z,)):
+        z[0] = x.diagonal(self.offset, self.axis1, self.axis2)
+
+    def grad(self, (x,), (gz,)):
+        return [grad_not_implemented(self, 0, x)]
+
+    def infer_shape(self, node, shapes):
+        in_shape, = shapes
+        dim1 = in_shape[self.axis1]
+        dim2 = in_shape[self.axis2]
+        out_shape = [d for i,d in enumerate(in_shape)
+                     if i not in (self.axis1, self.axis2)]
+        # The following logic is inspired by C code of PyArray_Diagonal().
+        offset = self.offset
+        if offset > 0:
+            diag_size = clip(dim2 - offset, 0, dim1)
+        elif offset < 0:
+            diag_size = clip(dim1 + offset, 0, dim2) 
+        else:
+            diag_size = minimum(dim1, dim2)
+        out_shape.append(diag_size)
+        return [tuple(out_shape)]
+
+    def __str__(self):
+        return self.__class__.__name__
+
+def diagonal(a, offset=0, axis1=0, axis2=1):
+    if (offset, axis1, axis2) == (0, 0, 1):
+        from theano.sandbox.linalg import extract_diag
+        return extract_diag(a)
+    return Diagonal(offset, axis1, axis2)(a)
+
+class Diag(Op):
+
+    def __eq__(self, other):
+        return type(self) == type(other)
+
+    def __hash__(self):
+        return hash(type(self))
+
+    def make_node(self, diag):
+        diag = as_tensor_variable(diag)
+        if diag.type.ndim != 1:
+            raise TypeError('data argument must be a vector', diag.type)
+
+        return Apply(self, [diag], [matrix(dtype=diag.dtype)])
+
+    def perform(self, node, inputs, (z,)):
+        z[0] = numpy.diag(inputs[0])
+
+    def grad(self, inputs, (gz,)):
+        return [diagonal(gz)]
+
+    def infer_shape(self, nodes, shapes):
+        return [(shapes[0][0],) * 2]
+
+    def __str__(self):
+        return self.__class__.__name__
+
+def diag(v, k=0):
+    if v.ndim == 1:
+        assert k == 0, "diagonals other than main are not implemented"
+        return Diag()(v)
+    elif v.ndim == 2:
+        return diagonal(v, k)
+    else:
+        raise ValueError("Input must be 1- or 2-d.")
--- a/theano/tensor/tests/mlp_test.py
+++ b/theano/tensor/tests/mlp_test.py
@@ -9,7 +9,7 @@ import numpy

 import theano
 import theano.tensor as T
-from theano.gof.python25 import any
+from theano.gof.python25 import any, OrderedDict

 def gen_data():

@@ -293,7 +293,7 @@ def test_mlp():
    # TODO: refine that and include only those
    mode = theano.compile.get_default_mode().including('fast_run')

-    updates2 = {}
+    updates2 = OrderedDict()

    updates2[classifier.hiddenLayer.params[0]]=T.grad(cost,classifier.hiddenLayer.params[0])
    train_model =theano.function( inputs = [index],

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -40,7 +40,7 @@ from theano.tensor import (_shared, wvector, bvector, autocast_float_as,
        tile, patternbroadcast, Eye, Shape, Default, Dot, PermuteRowElements,
        ScalarFromTensor, TensorFromScalar, dtensor4, Rebroadcast, Alloc,
        dtensor3, SpecifyShape, Mean, IncSubtensor, AdvancedIncSubtensor1,
-        itensor3, Tile, AdvancedIncSubtensor, switch)
+        itensor3, Tile, AdvancedIncSubtensor, switch, Diagonal, Diag)
 from theano.tests import unittest_tools as utt
 from theano.printing import debugprint

@@ -6590,6 +6590,34 @@ class TestInferShape(utt.InferShapeTester):
                                [Eye()(aiscal, biscal, ciscal)],
                                [3, 5, 0], Eye)

+        # Diagonal
+        atens3 = tensor3()
+        atens3_val = rand(4, 5, 3)
+        atens3_diag = Diagonal()(atens3)
+        self._compile_and_check([atens3], [atens3_diag],
+                                [atens3_val], Diagonal)
+        atens3_diag = Diagonal(1)(atens3)
+        self._compile_and_check([atens3], [atens3_diag],
+                                [atens3_val], Diagonal)
+        atens3_diag = Diagonal(-1)(atens3)
+        self._compile_and_check([atens3], [atens3_diag],
+                                [atens3_val], Diagonal)
+        atens3_diag = Diagonal(1,0,2)(atens3)
+        self._compile_and_check([atens3], [atens3_diag],
+                                [atens3_val], Diagonal)
+        atens3_diag = Diagonal(1,1,2)(atens3)
+        self._compile_and_check([atens3], [atens3_diag],
+                                [atens3_val], Diagonal)
+        atens3_diag = Diagonal(1,2,0)(atens3)
+        self._compile_and_check([atens3], [atens3_diag],
+                                [atens3_val], Diagonal)
+
+        # Diag
+        advec = dvector()
+        advec_val = rand(4)
+        self._compile_and_check([advec], [Diag()(advec)],
+                                [advec_val], Diag)
+
        # Shape
        # 'opt.Makevector' precludes optimizer from disentangling
        # elements of shape
@@ -7070,7 +7098,7 @@ class TestTensorInstanceMethods(unittest.TestCase):
        assert_array_equal(X.argsort().eval({X: x}), x.argsort())
        assert_array_equal(X.argsort(1).eval({X: x}), x.argsort(1))

-    def test_dot(self):
+    def test_clip(self):
        X, Y = self.vars
        x, y = self.vals
        Z = X.clip(0.5 - Y, 0.5 + Y)
@@ -7099,6 +7127,7 @@ class TestTensorInstanceMethods(unittest.TestCase):
        Z = X + Y * 1j
        z = x + y * 1j
        assert_array_equal(Z.conj().eval({Z: z}), z.conj())
+        assert_array_equal(Z.conjugate().eval({Z: z}), z.conj())

    def test_round(self):
        X, _ = self.vars
@@ -7128,6 +7157,16 @@ class TestTensorInstanceMethods(unittest.TestCase):
        x, _ = self.vals
        assert_array_equal(X.ravel().eval({X: x}), x.ravel())

+    def test_diagonal(self):
+        X, _ = self.vars
+        x, _ = self.vals
+        assert_array_equal(X.diagonal().eval({X: x}), x.diagonal())
+        assert_array_equal(X.diagonal(1).eval({X: x}), x.diagonal(1))
+        assert_array_equal(X.diagonal(-1).eval({X: x}), x.diagonal(-1))
+        for offset, axis1, axis2 in [(1,0,1), (-1,0,1), (0,1,0), (-2,1,0)]:
+            assert_array_equal(X.diagonal(offset, axis1, axis2).eval({X: x}),
+                               x.diagonal(offset, axis1, axis2))
+

 if __name__ == '__main__':


--- a/theano/tensor/tests/test_blas.py
+++ b/theano/tensor/tests/test_blas.py
@@ -185,8 +185,8 @@ class t_gemm(TestCase):
        l2_reg = T.constant(0.0001).astype(config.floatX)

        #test constant merge with gemm
-        f = theano.function([a, b], updates={s: lr1 * T.dot(a, b) +
-                                                l2_reg * lr2 * s},
+        f = theano.function([a, b], updates=[(s, lr1 * T.dot(a, b) +
+                                                l2_reg * lr2 * s)],
                            mode=mode_not_fast_compile).maker.fgraph.toposort()
        #[Gemm{inplace}(<TensorType(float64, matrix)>, 0.01,
        # <TensorType(float64, matrix)>, <TensorType(float64, matrix)>,
@@ -195,8 +195,8 @@ class t_gemm(TestCase):
        assert f[0].op == gemm_inplace

        #test factored scalar with merge
-        f = theano.function([a, b], updates={s: lr1 * (T.dot(a, b) -
-                                                        l2_reg * s)},
+        f = theano.function([a, b], updates=[(s, lr1 * (T.dot(a, b) -
+                                                        l2_reg * s))],
                            mode=mode_not_fast_compile).maker.fgraph.toposort()
        #[Gemm{inplace}(<TensorType(float64, matrix)>, 0.01,
        # <TensorType(float64, matrix)>, <TensorType(float64, matrix)>,
@@ -206,7 +206,7 @@ class t_gemm(TestCase):

        #test factored scalar with merge and neg
        f = theano.function([a, b],
-                            updates={s: s - lr1 * (s * .0002 + T.dot(a, b))},
+                            updates=[(s, s - lr1 * (s * .0002 + T.dot(a, b)))],
                            mode=mode_not_fast_compile).maker.fgraph.toposort()
        #[Gemm{inplace}(<TensorType(float64, matrix)>, -0.01,
        # <TensorType(float64, matrix)>, <TensorType(float64, matrix)>,
@@ -368,7 +368,7 @@ class t_gemm(TestCase):
                tz_i = gemm_no_inplace(tz[:, :, i], ta, tx[
                    :, :, i], ty[:, :, i], tb)
                g_i = theano.function([], tz_i,
-                        updates={tz: T.set_subtensor(tz[:, :, i], tz_i)},
+                        updates=[(tz, T.set_subtensor(tz[:, :, i], tz_i))],
                        mode=compile.Mode(optimizer=None, linker=l))
                for j in xrange(3):
                    g_i()
@@ -801,7 +801,7 @@ def test_gemm_unrolled():
            cur_V = update_V(cur_H)
            cur_H = update_H(cur_V)

-        unrolled_theano = theano.function([], updates={V: cur_V, H: cur_H},
+        unrolled_theano = theano.function([], updates=[(V, cur_V), (H, cur_H)],
                                   name='unrolled_theano')
        nb_dot = sum([1 for node in unrolled_theano.maker.fgraph.toposort()
                      if isinstance(node.op, (theano.tensor.Dot,
@@ -1032,7 +1032,7 @@ def test_dot_w_self():
    p = T.dot(A, A) * B

    grad = T.grad(T.mean(p), A)
-    f = theano.function([B], p, updates={A: A - grad})
+    f = theano.function([B], p, updates=[(A, A - grad)])

    # tests correctness in debugmode
    f(numpy.asarray([[0, 1], [2, 3]], dtype=config.floatX))
@@ -1119,7 +1119,7 @@ class TestGemv(TestCase, unittest_tools.TestOptimizationMixin):
        assert topo[0].op.inplace == False

        #test the inplace version
-        g = theano.function([], [], updates={v2: v2 + theano.dot(m, v1)},
+        g = theano.function([], [], updates=[(v2, v2 + theano.dot(m, v1))],
                            mode=mode_blas_opt)

        # Assert they produce the same output
@@ -1169,7 +1169,7 @@ class TestGemv(TestCase, unittest_tools.TestOptimizationMixin):
        assert topo[-1].op.inplace == False

        #test the inplace version
-        g = theano.function([], [], updates={v2: v2 + theano.dot(v1, m)},
+        g = theano.function([], [], updates=[(v2, v2 + theano.dot(v1, m))],
                            mode=mode_blas_opt)

        # Assert they produce the same output
@@ -1575,7 +1575,7 @@ class TestGer(TestCase, unittest_tools.TestOptimizationMixin):

    def function(self, inputs, outputs, updates=None):
        if updates is None:
-            updates = {}
+            updates = []
        return theano.function(inputs, outputs, self.mode, updates=updates)

    def b(self, bval):
@@ -1691,8 +1691,8 @@ class TestGer(TestCase, unittest_tools.TestOptimizationMixin):
    def test_inplace(self):
        A = self.shared(numpy.random.rand(4, 5).astype(self.dtype))
        f = self.function([self.x, self.y], [],
-                          updates={A: A + T.constant(0.1, dtype=self.dtype) *
-                                   T.outer(self.x, self.y)})
+                          updates=[(A, A + T.constant(0.1, dtype=self.dtype) *
+                                   T.outer(self.x, self.y))])
        self.assertFunctionContains(f, self.ger_destructive)
        f(numpy.random.rand(4).astype(self.dtype),
          numpy.random.rand(5).astype(self.dtype))
@@ -1731,15 +1731,15 @@ class TestBlasStrides(TestCase):
        bt_dev = b_t.get_value(borrow=False, return_internal_type=True)
        ct_dev = c_t.get_value(borrow=False, return_internal_type=True)

-        f_nn = theano.function([], [], updates={a: tensor.dot(b, c)},
+        f_nn = theano.function([], [], updates=[(a, tensor.dot(b, c))],
                mode=self.mode)
        #print 'class name:', self.__class__.__name__
        #theano.printing.debugprint(f_nn)
-        f_nt = theano.function([], [], updates={a: tensor.dot(b, c_t.T)},
+        f_nt = theano.function([], [], updates=[(a, tensor.dot(b, c_t.T))],
                mode=self.mode)
-        f_tn = theano.function([], [], updates={a: tensor.dot(b_t.T, c)},
+        f_tn = theano.function([], [], updates=[(a, tensor.dot(b_t.T, c))],
                mode=self.mode)
-        f_tt = theano.function([], [], updates={a: tensor.dot(b_t.T, c_t.T)},
+        f_tt = theano.function([], [], updates=[(a, tensor.dot(b_t.T, c_t.T))],
                mode=self.mode)

        # Try with all stride patterns, and all transposed pattern
@@ -1802,14 +1802,14 @@ class TestBlasStrides(TestCase):
        bt_dev = b_t.get_value(borrow=False, return_internal_type=True)
        ct_dev = c_t.get_value(borrow=False, return_internal_type=True)

-        f_nn = theano.function([], [], updates={a: l * tensor.dot(b, c)},
+        f_nn = theano.function([], [], updates=[(a, l * tensor.dot(b, c))],
                mode=self.mode)
-        f_nt = theano.function([], [], updates={a: l * tensor.dot(b, c_t.T)},
+        f_nt = theano.function([], [], updates=[(a, l * tensor.dot(b, c_t.T))],
                mode=self.mode)
-        f_tn = theano.function([], [], updates={a: l * tensor.dot(b_t.T, c)},
+        f_tn = theano.function([], [], updates=[(a, l * tensor.dot(b_t.T, c))],
                mode=self.mode)
        f_tt = theano.function([], [],
-                updates={a: l * tensor.dot(b_t.T, c_t.T)},
+                updates=[(a, l * tensor.dot(b_t.T, c_t.T))],
                mode=self.mode)

        # Try with all stride patterns, and all transposed pattern
@@ -1875,28 +1875,28 @@ class TestBlasStrides(TestCase):
        ct_dev = c_t.get_value(borrow=False, return_internal_type=True)

        f_nnn = theano.function([], [],
-                updates={a: (l * a + tensor.dot(b, c))},
+                updates=[(a, (l * a + tensor.dot(b, c)))],
                mode=self.mode)
        f_nnt = theano.function([], [],
-                updates={a: (l * a + tensor.dot(b, c_t.T))},
+                updates=[(a, (l * a + tensor.dot(b, c_t.T)))],
                mode=self.mode)
        f_ntn = theano.function([], [],
-                updates={a: (l * a + tensor.dot(b_t.T, c))},
+                updates=[(a, (l * a + tensor.dot(b_t.T, c)))],
                mode=self.mode)
        f_ntt = theano.function([], [],
-                updates={a: (l * a + tensor.dot(b_t.T, c_t.T))},
+                updates=[(a, (l * a + tensor.dot(b_t.T, c_t.T)))],
                mode=self.mode)
        f_tnn = theano.function([], [],
-                updates={a_t: (l * a_t + tensor.dot(b, c).T)},
+                updates=[(a_t, (l * a_t + tensor.dot(b, c).T))],
                mode=self.mode)
        f_tnt = theano.function([], [],
-                updates={a_t: (l * a_t + tensor.dot(b, c_t.T).T)},
+                updates=[(a_t, (l * a_t + tensor.dot(b, c_t.T).T))],
                mode=self.mode)
        f_ttn = theano.function([], [],
-                updates={a_t: (l * a_t + tensor.dot(b_t.T, c).T)},
+                updates=[(a_t, (l * a_t + tensor.dot(b_t.T, c).T))],
                mode=self.mode)
        f_ttt = theano.function([], [],
-                updates={a_t: (l * a_t + tensor.dot(b_t.T, c_t.T).T)},
+                updates=[(a_t, (l * a_t + tensor.dot(b_t.T, c_t.T).T))],
                mode=self.mode)

        # Try with all stride patterns, and all transposed pattern
@@ -1985,11 +1985,11 @@ class TestBlasStrides(TestCase):
        b_dev = b.get_value(borrow=False, return_internal_type=True)
        c_dev = c.get_value(borrow=False, return_internal_type=True)

-        f_n = theano.function([], [], updates={a: (a + l * tensor.dot(b, c))},
+        f_n = theano.function([], [], updates=[(a, (a + l * tensor.dot(b, c)))],
                mode=self.mode)

        f_t = theano.function([], [],
-                updates={a: (a + l * tensor.dot(b_t.T, c))},
+                updates=[(a, (a + l * tensor.dot(b_t.T, c)))],
                mode=self.mode)

        # Try with all stride patterns, and all transposed pattern
@@ -2041,11 +2041,11 @@ class TestBlasStrides(TestCase):
        c_dev = c.get_value(borrow=False, return_internal_type=True)

        f_n = theano.function([], [],
-                updates={a: (a + l * tensor.outer(b, c))},
+                updates=[(a, (a + l * tensor.outer(b, c)))],
                mode=self.mode)

        f_t = theano.function([], [],
-                updates={a_t: (a_t + l * tensor.outer(b, c).T)},
+                updates=[(a_t, (a_t + l * tensor.outer(b, c).T))],
                mode=self.mode)

        # Try with all stride patterns, and all transposed patterns

--- a/theano/tensor/tests/test_blas_c.py
+++ b/theano/tensor/tests/test_blas_c.py
@@ -185,7 +185,7 @@ class TestCGemv(TestCase, TestOptimizationMixin):

        #test the inplace version
        g = theano.function([], [],
-                updates={v2: v2 + theano.dot(m, v1)},
+                updates=[(v2, v2 + theano.dot(m, v1))],
                mode=self.mode)

        # Assert they produce the same output

--- a/theano/tensor/tests/test_sharedvar.py
+++ b/theano/tensor/tests/test_sharedvar.py
@@ -526,8 +526,8 @@ def makeSharedTester(shared_constructor_,
            s = self.cast_value(s)
            s_shared = self.shared_constructor(s)
            f = theano.function([],
-                                updates={s_shared:theano.dot(a_shared,b_shared)
-                                         +s_shared})
+                                updates=[(s_shared, theano.dot(a_shared,b_shared)
+                                         +s_shared)])
            topo=f.maker.fgraph.toposort()
            f()
            #[Gemm{inplace}(<TensorType(float64, matrix)>, 0.01, <TensorType(float64, matrix)>, <TensorType(float64, matrix)>, 2e-06)]
@@ -541,8 +541,8 @@ def makeSharedTester(shared_constructor_,

            #now test with the specify shape op in the output
            f = theano.function([], s_shared.shape,
-                                updates={s_shared:theano.dot(a_shared,b_shared)
-                                         +s_shared_specify})
+                                updates=[(s_shared, theano.dot(a_shared,b_shared)
+                                         +s_shared_specify)])
            topo=f.maker.fgraph.toposort()
            shp=f()
            assert numpy.all(shp == (40,40))
@@ -557,8 +557,8 @@ def makeSharedTester(shared_constructor_,
                    b_shared.get_value(borrow=True).shape)

            f = theano.function([], s_shared.shape,
-                                updates={s_shared:theano.dot(a_shared,b_shared)
-                                         +s_shared_specify})
+                                updates=[(s_shared, theano.dot(a_shared,b_shared)
+                                         +s_shared_specify)])
            topo=f.maker.fgraph.toposort()
            shp=f()
            assert numpy.all(shp == (40,40))

--- a/theano/tests/diverse_tests.py
+++ b/theano/tests/diverse_tests.py
@@ -55,7 +55,7 @@ class T_scipy(unittest.TestCase):
        train = function(
            inputs=[x,y],
            outputs=[prediction, xent],
-            updates={w:w-0.1*gw, b:b-0.1*gb})
+            updates=[(w, w-0.1*gw), (b, b-0.1*gb)])
        predict = function(inputs=[x], outputs=prediction)

        N = 4

--- a/theano/tests/test_updates.py
+++ b/theano/tests/test_updates.py
 import unittest

 import theano
-from theano.updates import Updates
+from theano.updates import OrderedUpdates
 import theano.tensor as T


 class test_ifelse(unittest.TestCase):

    def test_updates_init(self):
-        self.assertRaises(TypeError, Updates, dict(d=3))
+        self.assertRaises(TypeError, OrderedUpdates, dict(d=3))

        sv = theano.shared('asdf')
-        Updates({sv:3})
+        OrderedUpdates({sv:3})

    def test_updates_setitem(self):
        ok = True

-        up = Updates()
+        up = OrderedUpdates()
        sv = theano.shared('asdf')

        # keys have to be SharedVariables
@@ -27,8 +27,8 @@ class test_ifelse(unittest.TestCase):

    def test_updates_add(self):

-        up1 = Updates()
-        up2 = Updates()
+        up1 = OrderedUpdates()
+        up2 = OrderedUpdates()

        a = theano.shared('a')
        b = theano.shared('b')

--- a/theano/updates.py
+++ b/theano/updates.py
@@ -8,23 +8,27 @@ __contact__ = "theano-dev <theano-dev@googlegroups.com>"

 __docformat__ = "restructuredtext en"

+from theano.gof.python25 import OrderedDict
+
 from theano.compile.sharedvalue import SharedVariable
 import logging
 logger = logging.getLogger('theano.updates')
+import warnings


-class Updates(dict):
+# Must be an OrderedDict or updates will be applied in a non-deterministic order
+class OrderedUpdates(OrderedDict):
    """
    Dict-like mapping from SharedVariable keys to their new values.

    This mapping supports the use of the "+" operator for the union of updates.
    """
    def __init__(self, *key, **kwargs):
-        ret = super(Updates, self).__init__(*key, **kwargs)
+        ret = super(OrderedUpdates, self).__init__(*key, **kwargs)
        for key in self:
            if not isinstance(key, SharedVariable):
                raise TypeError(
-                    'Updates keys must inherit from SharedVariable',
+                    'OrderedUpdates keys must inherit from SharedVariable',
                    key)
        return ret

@@ -38,12 +42,14 @@ class Updates(dict):
            # value. Should it be cast to a GPU value right away?  Should
            # literals be transformed into constants immediately?

-            return super(Updates, self).__setitem__(key, value)
+            return super(OrderedUpdates, self).__setitem__(key, value)
        else:
-            raise TypeError('Updates keys must inherit from SharedVariable',
+            raise TypeError('OrderedUpdates keys must inherit from SharedVariable',
                    key)

-    def update(self, other):
+    def update(self, other=None):
+        if other is None:
+            return
        for key, val in dict(other).iteritems():
            if key in self:
                if self[key] == val:
@@ -52,13 +58,17 @@ class Updates(dict):
            self[key] = val  # __setitem__ does type-checking

    def __add__(self, other):
-        rval = Updates()
+        rval = OrderedUpdates()
        rval.update(self)
        rval.update(other)
        return rval

    def __radd__(other, self):
-        rval = Updates()
+        rval = OrderedUpdates()
        rval.update(other)
        rval.update(self)
        return rval
+
+def Updates(*key, **kwargs):
+    warnings.warn("Updates is deprecated. Switch to OrderedUpdates.")
+    return OrderedUpdates(*key, **kwargs)