Make grad more general (A. Bergeron)

Gradient code is moved from tensor/tensor_grad.py to theano/gradient.py. This makes it work with sparse variables. This commit was originally written by Arnaud Bergeron. I re-authored it to avoid a big merge in repo history.

Make grad more general (A. Bergeron)
f9ca8f9d · Olivier Delalleau · 0e018bc4 · f9ca8f9d · f9ca8f9d · f9ca8f9d
--- a/theano/__init__.py
+++ b/theano/__init__.py
 """
-Theano is an optimizing compiler in Python, built to evaluate complicated expressions
-(especially matrix-valued ones) as quickly as possible.
-Theano compiles expression graphs (see :doc:`graph` ) that are built by Python code.
-The expressions in these graphs are called `Apply` nodes and the variables in these graphs are called `Variable` nodes.
-
-You compile a graph by calling `function`, which takes a graph, and returns a callable object.
-One of theano's most important features is that `function` can transform your graph before
-compiling it.
-It can replace simple expressions with faster or more numerically stable implementations.
+Theano is an optimizing compiler in Python, built to evaluate
+complicated expressions (especially matrix-valued ones) as quickly as
+possible.  Theano compiles expression graphs (see :doc:`graph` ) that
+are built by Python code. The expressions in these graphs are called
+`Apply` nodes and the variables in these graphs are called `Variable`
+nodes.
+
+You compile a graph by calling `function`, which takes a graph, and
+returns a callable object.  One of theano's most important features is
+that `function` can transform your graph before compiling it.  It can
+replace simple expressions with faster or more numerically stable
+implementations.

 To learn more, check out:

@@ -37,7 +40,8 @@ logging_default_handler.setFormatter(logging_default_formatter)
 theano_logger.addHandler(logging_default_handler)
 theano_logger.setLevel(logging.WARNING)

-import configparser, configdefaults
+import configparser
+import configdefaults

 config = configparser.TheanoConfigParser()

@@ -87,8 +91,10 @@ from updates import Updates

 import tensor
 import scalar
-#import sparse #we don't import by default as we don't want to force having scipy installed.
+#we don't import by default as we don't want to force having scipy installed.
+#import sparse
 import gradient
+from gradient import Rop, Lop, grad
 import gof

 if config.device.startswith('gpu') or config.init_gpu_device.startswith('gpu'):
@@ -126,8 +132,10 @@ del _all, _divide, _over, _under, _invalid

 ## import scalar_opt

-### This is defined here because it is designed to work across symbolic datatypes
-#   (Sparse and Tensor)
+### This is defined here because it is designed to work across symbolic
+#   datatypes (Sparse and Tensor)
+
+
 def dot(l, r):
    """Return a symbolic matrix/dot product between l and r """
    rval = NotImplemented
@@ -144,5 +152,6 @@ def dot(l, r):
        except Exception, e1:
            rval = NotImplemented
    if rval == NotImplemented:
-        raise NotImplementedError("Dot failed for the following reasons:", (e0, e1))
+        raise NotImplementedError("Dot failed for the following reasons:",
+                                  (e0, e1))
    return rval
--- a/theano/gradient.py
+++ b/theano/gradient.py
--- a/theano/sparse/basic.py
+++ b/theano/sparse/basic.py
@@ -137,9 +137,13 @@ def sp_ones_like(x):
    data, indices, indptr, shape = csm_properties(x) #TODO: don't restrict to CSM formats
    return CSM(format=x.format)(tensor.ones_like(data), indices, indptr, shape)

+
 def sp_zeros_like(x):
-    _, _, indptr, shape = csm_properties(x) #TODO: don't restrict to CSM formats
-    return CSM(format=x.format)(numpy.array([], dtype=x.type.dtype), numpy.array([]), tensor.zeros_like(indptr), shape)
+    #TODO: don't restrict to CSM formats
+    _, _, indptr, shape = csm_properties(x)
+    return CSM(format=x.format)(numpy.array([], dtype=x.type.dtype),
+                                numpy.array([]), tensor.zeros_like(indptr),
+                                shape)


 class _sparse_py_operators:
@@ -177,6 +181,9 @@ class _sparse_py_operators:
    # that stored zeros *do* count in the size.
    size = property(lambda self: csm_data(self).size)

+    def zeros_like(model):
+        return sp_zeros_like(model)
+

 class SparseVariable(gof.Variable, _sparse_py_operators):
    dtype = property(lambda self: self.type.dtype)
@@ -189,10 +196,6 @@ class SparseVariable(gof.Variable, _sparse_py_operators):
    def __repr__(self):
        return str(self)

-    def zeros_like(model, dtype=None):
-        # TODO: don't ignore dtype
-        return sp_zeros_like(model)
-
 class SparseConstantSignature(tuple):
    def __eq__(self, other):
        (a, b), (x,y) = self, other

--- a/theano/sparse/tests/test_basic.py
+++ b/theano/sparse/tests/test_basic.py
@@ -824,13 +824,16 @@ class test_zeros_like(unittest.TestCase):
    def test(self):
        x = theano.sparse.csr_matrix()
        f = theano.function([x], theano.sparse.sp_zeros_like(x))
-        vx = scipy.sparse.csr_matrix(numpy.asarray(numpy.random.binomial(1, 0.5, (100, 100)), dtype=theano.config.floatX))
+        vx = scipy.sparse.csr_matrix(numpy.asarray(
+                numpy.random.binomial(1, 0.5, (100, 100)),
+                dtype=theano.config.floatX))

        fx = f(vx)

        assert fx.nnz == 0
        assert fx.shape == vx.shape

+
 def test_shape_i():
    sparse_dtype = 'float32'


--- a/theano/tensor/__init__.py
+++ b/theano/tensor/__init__.py
@@ -30,7 +30,6 @@ import sharedvar  # adds shared-variable constructors
 # `theano.shared` and `tensor._shared`.
 from sharedvar import tensor_constructor as _shared

-
 def shared(*args, **kw):
    """
    Backward-compatibility wrapper around `tensor._shared`.
@@ -50,6 +49,5 @@ def shared(*args, **kw):

 import nnet  # used for softmax, sigmoid, etc.

-
-from tensor_grad import Rop, Lop, grad, numeric_grad, verify_grad, \
+from theano.gradient import Rop, Lop, grad, numeric_grad, verify_grad, \
    jacobian, hessian
--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -1450,16 +1450,12 @@ class _tensor_py_operators:

    def get_constant_value(self):
        return get_constant_value(self)
+    def zeros_like(model):
+        return zeros_like(model)


 class TensorVariable(_tensor_py_operators, Variable):
    """Subclass to add the tensor operators to the basic `Variable` class."""
-    def zeros_like(model, dtype=None):
-        "Used for grad, Lop and Rop"
-        # Tested through the zeros_like method below
-        if dtype is None:
-            dtype = model.type.dtype
-        return fill(model, constant(0.0, dtype=dtype))

 TensorType.Variable = TensorVariable

@@ -2369,7 +2365,9 @@ def ones_like(model, dtype=None):
 @constructor
 def zeros_like(model, dtype=None):
    """equivalent of numpy.zeros_like"""
-    return TensorVariable.zeros_like(model, dtype=None)
+    if dtype is None:
+        dtype = model.type.dtype
+    return fill(model, constant(0.0, dtype=dtype))

 def zeros(shape, dtype=config.floatX):
    """

--- a/theano/tensor/tensor_grad.py
+++ b/theano/tensor/tensor_grad.py
--- a/theano/tensor/tests/test_2nd_order_grads.py
+++ b/theano/tensor/tests/test_2nd_order_grads.py
--- a/theano/tensor/tests/test_rop.py
+++ b/theano/tensor/tests/test_rop.py