Merge pull request #997 from goodfeli/fix_elemwise_grad

Add validation of input for some ops

Merge pull request #997 from goodfeli/fix_elemwise_grad
34142d69 · nouiz · d3f405db · 33476550 · 34142d69 · 34142d69
--- a/theano/gradient.py
+++ b/theano/gradient.py
@@ -873,6 +873,7 @@ def _populate_grad_dict(var_to_node_to_idx,
    # populate grad_dict[var] and return it
    def access_grad_cache(var):
        if var not in grad_dict:
+            # If var is not in grad_dict already, we must compute it
            if var in var_to_node_to_idx:
                terms = []
                node_to_idx = var_to_node_to_idx[var]
@@ -895,6 +896,11 @@ def _populate_grad_dict(var_to_node_to_idx,
                        if isinstance(term.type, DisconnectedType):
                            continue
+                        if hasattr(var,'ndim') and term.ndim != var.ndim:
+                            raise ValueError(("%s.grad returned a term with"
+                                " %d dimensions, but %d are required.") % (
+                                    str(node.op), term.ndim, var.ndim))
                        terms.append(term)
                # Add up the terms to get the total gradient on this variable
@@ -911,6 +917,7 @@ def _populate_grad_dict(var_to_node_to_idx,
                # this variable isn't connected to the cost in the computational
                # graph
                grad_dict[var] = DisconnectedType()()
+        # end if cache miss
        return grad_dict[var]
    rval = [access_grad_cache(elem) for elem in wrt]

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -4441,6 +4441,10 @@ class IncSubtensor(Op):
    def make_node(self, x, y, *inputs):
        x, y = map(as_tensor_variable, [x, y])
+        if y.ndim > x.ndim:
+            raise ValueError(("Trying to increment a %d-dimensional "
+                "subtensor with a %d-dimensional value.") % (x.ndim,
+                    y.ndim))
        inputs = tuple(map(Subtensor.my_as_scalar, inputs))
        idx_list = list(self.idx_list)

--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
@@ -101,6 +101,8 @@ class DimShuffle(Op):
        - new_order: a list representing the relationship between the
                     input's dimensions and the output's dimensions. Each
                     element of the list can either be an index or 'x'.
+                     Indices must be encoded as python integers, not
+                     theano symbolic integers.
        - inplace: if True, the output will be a view of the input.
                   If False, the output will be a copy of the input.
@@ -119,10 +121,17 @@ class DimShuffle(Op):
        self.new_order = new_order
        self.inplace = inplace
-        for i in xrange(len(new_order) - 1):
+        for i, j in enumerate(new_order):
-            j = new_order[i]
+            if j != 'x':
-            if j != 'x' and j in new_order[(i + 1):]:
+                if not isinstance(j, int):
-                raise ValueError((
+                    raise TypeError(
+                            "DimShuffle indices must be python ints.")
+                if j >= len(input_broadcastable):
+                    raise ValueError(("new_order[%d] is %d, but the input "
+                        "only has %d axes.") %
+                        (i,j,len(input_broadcastable)))
+                if j in new_order[(i + 1):]:
+                    raise ValueError((
                    "The same input dimension may not appear twice in the "
                    "list of output dimensions", (new_order)))
@@ -379,7 +388,7 @@ PyArray_SetBaseObject(%(res)s, (PyObject*)%(basename)s);
            if v != 'x':
                grad_order[v] = i
        # Do not make the DimShuffle inplace as an optimization at the
-        # canonicalization optimization phase will remove the implace.
+        # canonicalization optimization phase will remove the inplace.
        # The inplace will be reintroduced automatically later in the graph.
        return [DimShuffle(gz.type.broadcastable, grad_order)(
            Elemwise(scalar.identity)(gz))]