Add assert that known_grads is deterministic or of size 1.

0a7cb330 · Frederic Bastien · 8769382f · 0a7cb330 · 0a7cb330
--- a/theano/gradient.py
+++ b/theano/gradient.py
@@ -390,8 +390,8 @@ def grad(cost, wrt, consider_constant=None,
        If True, variables generated by grad will be named
        (d<cost.name>/d<wrt.name>) provided that both cost and wrt
        have names
-    known_grads : dict, optional
+    known_grads : OrderedDict, optional
-        A dictionary mapping variables to their gradients. This is
+        A ordered dictionary mapping variables to their gradients. This is
        useful in the case where you know the gradient on some
        variables but do not know the original cost.
    return_disconnected : {'zero', 'None', 'Disconnected'}
@@ -462,6 +462,9 @@ def grad(cost, wrt, consider_constant=None,
    if known_grads is None:
        known_grads = OrderedDict()
+    else:
+        m = "known_grads must be an OrderedDict. "
+        assert isinstance(known_grads, OrderedDict) or len(known_grads) <=1, m
    # The gradient of the cost is 1 unless specified otherwise by known_grads.
    if cost is not None:

--- a/theano/tests/test_gradient.py
+++ b/theano/tests/test_gradient.py
@@ -474,7 +474,7 @@ def test_known_grads():
    for layer in layers:
        print('Testing by separately computing ', layer)
        first = theano.tensor.grad(cost, layer, disconnected_inputs='ignore')
-        known = dict(izip(layer, first))
+        known = OrderedDict(izip(layer, first))
        full = theano.tensor.grad(cost=None, known_grads=known, wrt=inputs, disconnected_inputs='ignore')
        full = theano.function(inputs, full)
        full = full(*values)