Merge pull request #981 from nouiz/sparse_grad_import

Sparse grad import

Merge pull request #981 from nouiz/sparse_grad_import
94ccd2ae · lamblin · 49a16ab0 · 40cd0a6c · 94ccd2ae · 94ccd2ae
--- a/theano/__init__.py
+++ b/theano/__init__.py
@@ -161,3 +161,22 @@ def dot(l, r):
        raise NotImplementedError("Dot failed for the following reasons:",
                                  (e0, e1))
    return rval
+
+
+def get_constant_value(v):
+    """return the constant scalar(0-D) value underlying variable `v`
+
+    If v is the output of dimshuffles, fills, allocs, rebroadcasts, cast
+    this function digs through them.
+
+    If theano.sparse is also there, we will look over CSM op.
+
+    If `v` is not some view of constant data, then raise a TypeError.
+    """
+    if hasattr(theano, 'sparse') and isinstance(v.type,
+                                                theano.sparse.SparseType):
+        if v.owner is not None and isinstance(v.owner.op,
+                                                 theano.sparse.CSM):
+            data = v.owner.inputs[0]
+            return tensor.get_constant_value(data)
+    return tensor.get_constant_value(v)
--- a/theano/gradient.py
+++ b/theano/gradient.py
@@ -804,36 +804,11 @@ def _populate_grad_dict(var_to_node_to_idx,

                        no_constant_value = True
                        try:
-                            constant_value = tensor.get_constant_value(term)
+                            constant_value = theano.get_constant_value(term)
                            no_constant_value = False
                        except TypeError:
                            pass

-                        extra_msg = ''
-
-                        # The above won't work if it's a sparse type, handle sparse
-                        # types here
-                        if no_constant_value:
-                            if isinstance(term.type, theano.sparse.SparseType):
-                                if term.owner is not None and isinstance(term.owner.op,
-                                        theano.sparse.CSM):
-                                    data = term.owner.inputs[0]
-                                    try:
-                                        constant_value = tensor.get_constant_value(data)
-                                        no_constant_value = False
-                                    except TypeError:
-                                        print theano.printing.min_informative_str(data)
-                                        extra_msg += " It is a CSM, but its data isn't constant."
-                                        pass
-                                else:
-                                    extra_msg += " It is a SparseType but theano doesn't know how"
-                                    extra_msg += " to turn it into a constant."
-                                #end if CSM
-                            else:
-                                extra_msg += " It is not a SparseType."
-                            #end if SparseType
-                        #end if no_constant_value
-
                        if no_constant_value:
                            msg = "%s.grad returned %s of type %s for input"
                            msg += " %d. This input's only connections to "
@@ -844,7 +819,6 @@ def _populate_grad_dict(var_to_node_to_idx,
                            msg += "DisconnectedType and theano can't "
                            msg += "simplify it to a constant, so it's not "
                            msg += "verifiably zeros."
-                            msg += extra_msg

                            msg = msg % (str(node.op), str(term),
                                    str(type(term)), i)

--- a/theano/sparse/type.py
+++ b/theano/sparse/type.py
 import numpy
 try:
-    import scipy
+    import scipy.sparse
    imported_scipy = True
 except ImportError:
    imported_scipy = False
@@ -8,6 +8,7 @@ except ImportError:
 import theano
 from theano import gof

+
 def _is_sparse(x):
    """
    @rtype: boolean

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -65,7 +65,7 @@ def check_equal_numpy(x, y):
    elif (isinstance(x, numpy.random.RandomState) and
          isinstance(y, numpy.random.RandomState)):
        return python_all(numpy.all(a == b) for a, b in
-                          zip(x.__getstate__(), y.__getstate__()))
+                          izip(x.__getstate__(), y.__getstate__()))
    else:
        return x == y

@@ -3823,7 +3823,7 @@ class Subtensor(Op):
        # infer the broadcasting pattern
        padded = (idx_list
                + [slice(None, None, None)] * (x.type.ndim - len(idx_list)))
-        broadcastable = [bc for p, bc in zip(padded, x.type.broadcastable)
+        broadcastable = [bc for p, bc in izip(padded, x.type.broadcastable)
                if isinstance(p, slice)]

        input_types = Subtensor.collapse(idx_list,
@@ -3832,7 +3832,7 @@ class Subtensor(Op):
            raise IndexError(
                    "Not enough inputs to fill in the Subtensor template.",
                    inputs, idx_list)
-        for input, expected_type in zip(inputs, input_types):
+        for input, expected_type in izip(inputs, input_types):
            if input.type != expected_type:
                raise TypeError(
                    "Wrong type for Subtensor template. Expected %s, got %s."
@@ -4458,7 +4458,7 @@ class IncSubtensor(Op):
            raise IndexError(
                    "Not enough inputs to fill in the Subtensor template.",
                    inputs, idx_list)
-        for input, expected_type in zip(inputs, input_types):
+        for input, expected_type in izip(inputs, input_types):
            if input.type != expected_type:
                raise TypeError(
                    "Wrong type for Subtensor template. Expected %s, got %s."
@@ -5830,7 +5830,7 @@ class PermuteRowElements(Op):

        # Compute the broadcastable pattern of the output
        out_broadcastable = [xb and yb for xb, yb in
-                             zip(x.type.broadcastable, y.type.broadcastable)]
+                             izip(x.type.broadcastable, y.type.broadcastable)]
        out_type = tensor(dtype=x.type.dtype, broadcastable=out_broadcastable)

        inputlist = [x, y, inverse]
@@ -5897,7 +5897,7 @@ class PermuteRowElements(Op):

        # Make sure the output is big enough
        out_s = []
-        for xdim, ydim in zip(x_s, y_s):
+        for xdim, ydim in izip(x_s, y_s):
            if xdim == ydim:
                outdim = xdim
            elif xdim == 1:

--- a/theano/tensor/elemwise.py
+++ b/theano/tensor/elemwise.py
@@ -15,6 +15,7 @@ from theano.printing import min_informative_str, pprint
 from theano.gof.python25 import all, any
 from theano.tensor.utils import hash_from_dict
 from theano.gradient import DisconnectedType
+from theano.gof.null_type import NullType

 config = theano.config

@@ -538,14 +539,14 @@ class Elemwise(Op):
        # it is multiplied by nout because Elemwise supports multiple outputs
        # (nout of them)
        out_broadcastables = [[all(bcast)
-            for bcast in zip(*[input.type.broadcastable
+            for bcast in izip(*[input.type.broadcastable
                for input in inputs])]] * shadow.nout

        #inplace_pattern maps output idx -> input idx
        inplace_pattern = self.inplace_pattern
        if inplace_pattern:
            for overwriter, overwritten in inplace_pattern.items():
-                for ob, ib in zip(out_broadcastables[overwriter],
+                for ob, ib in izip(out_broadcastables[overwriter],
                                  inputs[overwritten].type.broadcastable):
                    if ib and not ob:
                        raise ValueError((
@@ -560,7 +561,7 @@ class Elemwise(Op):
                ([i.type.dtype for i in inputs], out_dtypes, inplace_pattern)))

        outputs = [TensorType(dtype=dtype, broadcastable=broadcastable)()
-                for dtype, broadcastable in zip(out_dtypes, out_broadcastables)
+                for dtype, broadcastable in izip(out_dtypes, out_broadcastables)
                ]
        return Apply(self, inputs, outputs)

@@ -608,7 +609,7 @@ class Elemwise(Op):
            bgrads = self._bgrad(inputs, ograds)
            rop_out = None

-            for jdx, (inp, eval_point) in enumerate(zip(inputs,
+            for jdx, (inp, eval_point) in enumerate(izip(inputs,
                                                        eval_points)):
                # if None, then we can just ignore this branch ..
                # what we do is to assume that for any non-differentiable
@@ -638,9 +639,42 @@ class Elemwise(Op):

    def grad(self, inputs, ograds):

+
+        outs = self(*inputs)
+        if not isinstance(outs, (list,tuple)):
+            outs = [ outs ]
+
+
        #compute grad with respect to broadcasted input
        rval = self._bgrad(inputs, ograds)

+        # TODO: make sure that zeros are clearly identifiable
+        # to the gradient.grad method when the outputs have
+        # some integer and some floating point outputs
+        if False in [str(out.type.dtype).find('int') == -1
+                for out in outs]:
+            # For integer output, return value may
+            # only be zero or undefined
+            # We don't bother with trying to check
+            # that the scalar ops correctly
+            # returned something that evaluates to 0,
+            # we just make the return
+            # value obviously zero so that gradient.grad
+            # can tell this op did
+            # the right thing.
+            new_rval = []
+            for elem, ipt in izip(rval, inputs):
+                if isinstance(elem.type, (NullType, DisconnectedType)):
+                    new_rval.append(elem)
+                else:
+                    elem = ipt.zeros_like()
+                    if str(elem.type.dtype).find('int') != -1:
+                        elem = elem.astype(theano.config.floatX)
+                    assert str(elem.type.dtype).find('int') == -1
+                    new_rval.append(elem)
+            return new_rval
+
+
        #sum out the broadcasted dimensions
        for i, ipt in enumerate(inputs):
            if rval[i] is None:
@@ -724,7 +758,7 @@ class Elemwise(Op):
                    *[transform(ipt) for ipt in node.inputs])
            return new_r
        ret = []
-        for scalar_igrad, ipt in zip(scalar_igrads, inputs):
+        for scalar_igrad, ipt in izip(scalar_igrads, inputs):
            if scalar_igrad is None:
                # undefined gradient
                ret.append(None)
@@ -735,7 +769,7 @@ class Elemwise(Op):

    def perform(self, node, inputs, output_storage):
        maxsize = max(len(input.shape) for input in inputs)
-        for dims in zip(*[([(1, True)] * (maxsize - len(input.shape))
+        for dims in izip(*[([(1, True)] * (maxsize - len(input.shape))
                            + zip(input.shape, sinput.type.broadcastable))
                          for input, sinput in zip(inputs, node.inputs)]):
            if max(d for d, b in dims) != 1 and (1, False) in dims:
@@ -767,7 +801,7 @@ class Elemwise(Op):

        # Determine the shape of outputs
        out_shape = []
-        for values in zip(*[input.shape for input in inputs]):
+        for values in izip(*[input.shape for input in inputs]):
            if numpy.prod(values) == 0:
                # All non-broadcasted dimensions should be zero
                assert max(values) <= 1
@@ -777,7 +811,7 @@ class Elemwise(Op):
        out_shape = tuple(out_shape)

        if not self.inplace_pattern:
-            for output, storage in zip(node.outputs, output_storage):
+            for output, storage in izip(node.outputs, output_storage):
                odat = storage[0]
                if odat is not None:
                    if odat.shape != out_shape:
@@ -789,7 +823,7 @@ class Elemwise(Op):
                storage[0] = odat
        else:
            for i, (output, storage) in enumerate(
-                    zip(node.outputs, output_storage)):
+                    izip(node.outputs, output_storage)):
                #i is an output idx
                if i in self.inplace_pattern:
                    odat = inputs[self.inplace_pattern[i]]
@@ -883,7 +917,7 @@ class Elemwise(Op):
                else:
                    # there must be some input that is not broadcastable in
                    # dimension 'dim'
-                    for ishp, i in zip(i_shapes, node.inputs):
+                    for ishp, i in izip(i_shapes, node.inputs):
                        if isinstance(i.type, theano.scalar.Scalar):
                            continue  # we skip scalar
                        if not i.type.broadcastable[dim]:
@@ -926,7 +960,7 @@ class Elemwise(Op):
        # These are the outputs that we will need to allocate
        # (output, name, name of the c type), transposed
        real = zip(*[(r, s, r.type.dtype_specs()[1])
-                     for r, s in zip(node.outputs, onames) if r not in dmap])
+                     for r, s in izip(node.outputs, onames) if r not in dmap])
        if real:
            real_outputs, real_onames, real_odtypes = real
        else:
@@ -936,7 +970,7 @@ class Elemwise(Op):
        # (output, name), transposed (c type name not needed since we don't
        # need to allocate.
        aliased = zip(*[(r, s)
-                        for (r, s) in zip(node.outputs, onames) if r in dmap])
+                        for (r, s) in izip(node.outputs, onames) if r in dmap])
        if aliased:
            aliased_outputs, aliased_onames = aliased
        else:
@@ -952,7 +986,7 @@ class Elemwise(Op):
        # dimensionality)
        nnested = len(orders[0])
        sub = dict(sub)
-        for i, (input, iname) in enumerate(zip(inputs, inames)):
+        for i, (input, iname) in enumerate(izip(inputs, inames)):
            # the c generators will substitute the input names for
            # references to loop variables lv0, lv1, ...
            sub['lv%i' % i] = iname
@@ -964,7 +998,7 @@ class Elemwise(Op):
        # We loop over the "real" outputs, i.e., those that are not
        # inplace (must be allocated) and we declare/allocate/check
        # them
-        for output, oname, odtype in zip(
+        for output, oname, odtype in izip(
                real_outputs, real_onames, real_odtypes):
            i += 1  # before this loop, i = number of inputs
            sub['lv%i' % i] = oname
@@ -980,7 +1014,7 @@ class Elemwise(Op):
        # inplace (overwrite the contents of one of the inputs) and
        # make the output pointers point to theur corresponding input
        # pointers.
-        for output, oname in zip(aliased_outputs, aliased_onames):
+        for output, oname in izip(aliased_outputs, aliased_onames):
            olv_index = inputs.index(dmap[output][0])
            iname = inames[olv_index]
            # We make the output point to the corresponding input and
@@ -1006,7 +1040,7 @@ class Elemwise(Op):
        # not be declared, as they are #defined in defines
        task_decl = "".join([
            "%(dtype)s& %(name)s_i = *%(name)s_iter;\n" % locals()
-                for name, dtype in zip(inames + list(real_onames),
+                for name, dtype in izip(inames + list(real_onames),
                                       idtypes + list(real_odtypes))])

        # We generate the C code of the inner loop using the scalar op
@@ -1305,7 +1339,7 @@ class CAReduce(Op):
        nnested = len(order1)

        sub = dict(sub)
-        for i, (input, iname) in enumerate(zip(node.inputs, inames)):
+        for i, (input, iname) in enumerate(izip(node.inputs, inames)):
            sub['lv%i' % i] = iname

        decl = cgen.make_declare([order], [idtype], sub)

--- a/theano/tensor/tests/test_elemwise.py
+++ b/theano/tensor/tests/test_elemwise.py
@@ -848,6 +848,31 @@ class TestElemwise(unittest_tools.InferShapeTester):
                            [t_left_val, t_right_val], Elemwise)


+def test_gt_grad():
+    """A user test that failed.
+
+    Something about it made Elemwise.grad return something that was
+    too complicated for get_constant_value to recognize as being 0, so
+    gradient.grad reported that it was not a valid gradient of an
+    integer.
+
+    """
+    floatX = config.floatX
+    T = theano.tensor
+
+    input_ = T.vector(dtype=floatX)
+    random_values = numpy.random.RandomState(1234).uniform(low=-1, high=1, size=(2,2))
+    W_values = numpy.asarray(random_values, dtype=floatX)
+    W = theano.shared(value=W_values, name='weights')
+    correct_score = T.dot(input_, W)
+    wrong_input = T.vector(dtype=floatX)
+    wrong_score = theano.clone(correct_score, {input_: wrong_input})
+    # Hinge loss
+
+    scores = T.ones_like(correct_score) - correct_score + wrong_score
+    cost = (scores * (scores > 0)).sum()
+    T.grad(cost, input_)
+
 """
 if __name__ == '__main__':
    #unittest.main()