* fixed gradient of CSM to support "kernel map"

* bug fix: force todense of sparse matrices in check_equal, as two sparse matrices are never equal in scipy 0.6 * numeric_grad no longer cares that all parameters are of the same type * new Reshape optimization (@todo: reshape-flatten optimization)

* fixed gradient of CSM to support "kernel map"
9121085f · desjagui@atchoum.iro.umontreal.ca · 5c05a90c · 9121085f · 9121085f · 9121085f
--- a/theano/compile/mode.py
+++ b/theano/compile/mode.py
 import numpy
+import scipy.sparse as sp
 from .. import gof
 def check_equal(x, y):
@@ -8,6 +9,13 @@ def check_equal(x, y):
    shape if x and y are numpy.ndarray instances). Used internally.
    """
    x, y = x[0], y[0]
+    # TODO: bug in current scipy, two sparse matrices are never equal, remove when moving to 0.7
+    if sp.issparse(x):
+        x = x.todense()
+    if sp.issparse(y):
+        y = y.todense()
    if isinstance(x, numpy.ndarray) and isinstance(y, numpy.ndarray):
        if x.dtype != y.dtype or x.shape != y.shape or numpy.any(abs(x - y) > 1e-10):
            raise Exception("Output mismatch.", {'performlinker': x, 'clinker': y})

--- a/theano/sparse/basic.py
+++ b/theano/sparse/basic.py
@@ -205,20 +205,12 @@ class CSMProperties(gof.Op):
        self.map = map
    def make_node(self, csm):
-        print '******* sp:CSMProperties:make_node *******'
        csm = as_sparse(csm)
        data = tensor.Tensor(dtype=csm.type.dtype, broadcastable = (False,)).make_result()
        return gof.Apply(self, [csm], 
                [data, tensor.ivector(), tensor.ivector(), tensor.ivector()])
    def perform(self, node, (csm,), out):
-        if 0:
-            print '******* sp:CSMProperties:perform *******'
-            print 'self.map = ', self.map
-            print 'csm.data = ', csm.data
-            print 'size(csm.data) = ', numpy.size(csm.data)
-            print 'csm.todense.shape = ', csm.todense().shape
-            print 'type(csm) = ', type(csm)
        out[0][0] = csm.data if self.map is None else csm.data[self.map]
        out[1][0] = numpy.asarray(csm.indices, dtype='int32')
        out[2][0] = numpy.asarray(csm.indptr, dtype='int32')
@@ -226,16 +218,15 @@ class CSMProperties(gof.Op):
    # TODO FIX THIS
    def grad(self, (csm,), g):
-        print '******* sp:CSMProperties:grad *******'
        assert [gg is None for gg in g[1:]]
-        data, indices, indptr, shape = csm_properties(csm, self.map)
+        data, indices, indptr, shape = csm_properties(csm)
        if csm.format == 'csc':
-            return [CSM('csc',self.map)(g_data, indices, indptr, shape)]
+            return [CSM('csc')(g_data, indices, indptr, shape)]
        else:
-            return [CSR('csm',self.map)(g_data, indices, indptr, shape)]
+            return [CSR('csm')(g_data, indices, indptr, shape)]
-def csm_properties(csm, map=None): return CSMProperties(map)(csm)
+def csm_properties(csm): return CSMProperties()(csm)
-def csm_data(csm,map=None): return csm_properties(csm,map)[0]
+def csm_data(csm): return csm_properties(csm)[0]
 def csm_indices(csm): return csm_properties(csm)[1]
 def csm_indptr(csm): return csm_properties(csm)[2]
 def csm_shape(csm): return csm_properties(csm)[3]
@@ -251,7 +242,7 @@ class CSM(gof.Op):
        self.format = format
        # for efficiency, if remap does nothing, then do not apply it
-        if map is not None and all(map==N.arange(N.size(map))):
+        if map is not None and all(map==numpy.arange(numpy.size(map))):
            map = None
        self.map = map
@@ -296,15 +287,7 @@ class CSM(gof.Op):
    def perform(self, node, (data, indices, indptr, shape), (out,)):
        """Build a csc_matrix"""
        #assert len(data.flatten()) == len(indices.flatten())
-        if 0:
-            print '********** sp:CSM:perform ***********'
-            print 'data =', data.__repr__()
-            print 'size(data) = ', numpy.size(data)
-            print 'kmap =', self.map.__repr__()
        data = data[self.map] if self.map!=None else data
-        if 0:
-            print 'data[kmap] =', data.__repr__()
        if len(shape) != 2:
            raise ValueError('Shape should be an array of length 2')
@@ -321,18 +304,30 @@ class CSM(gof.Op):
                    shape.copy(),
                    copy = False #1000*len(data.flatten())
                    )
-        if 0:
-            print 'out[0] = ', out[0].todense().__repr__()
-    def grad(self, input, (g_out,)):
+    def grad(self, (data, indices, indptr, shape), (g_out,)):
        """Return a gradient on the data vector"""
        #unpack the data vector and wrap it as a 1d Tensor
-        return [csm_data(g_out,self.map), None, None, None]
+        g_data = csm_grad(self.map)(data, csm_data(g_out),csm_indices(g_out))
+        return [g_data, None, None, None]
 CSC = CSM('csc')
 CSR = CSM('csr')
+class CSMGrad(gof.op.Op):
+    def __init__(self, map=None):
+        self.map = map
+    def make_node(self, data, gout_data, gout_indices):
+        g_data = data.type()
+        return gof.Apply(self, [data, gout_data, gout_indices], [g_data])
+    def perform(self, node, (data, gout_data, gout_indices), (g_data,)):
+        grad = numpy.zeros_like(data)
+        grad[self.map] = gout_data
+        g_data[0] = grad
+csm_grad = CSMGrad
 @gof.local_optimizer([csm_properties])
 def skip_pack_csc01(node):
    """if we find csm_properties(CSM(*args)), then we can replace that with the *args

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -2156,8 +2156,10 @@ class numeric_grad:
        shapes = [p.shape for p in apt]
        dtypes = [str(p.dtype) for p in apt]
-        if not dtypes == [dtypes[0]] * len(apt):
+        # TODO: remove this eventually (why was this here in the first place ?)
-            raise TypeError('All function arguments must have same dtype')
+        # In the case of CSM, the arguments are a mixture of floats and integers...
+        #if not dtypes == [dtypes[0]] * len(apt):
+            #raise TypeError('All function arguments must have same dtype')
        total_size = __builtin__.sum(prod(sh) for sh in shapes)
@@ -2214,8 +2216,8 @@ def verify_grad(testcase, op, pt, n_tests=1, rng=numpy.random, eps=1.0e-7, tol=0
    testcase.failUnless(analytic gradient matches finite-diff gradient)
    :param pt: the list of numpy.ndarrays to use as inputs to the op
-    :param op: something that behaves like an Op instance with a single output (can be a
+    :param op: something that behaves like an Op instance with a single output
-    function)
+     (can be a python function combining multiple ops)
    :param testcase: the thing to call `fail` on if things go awry.
    """
@@ -2264,7 +2266,7 @@ def verify_grad(testcase, op, pt, n_tests=1, rng=numpy.random, eps=1.0e-7, tol=0
        #print "PT D", pt
        analytic_grad = grad_fn(*pt)
        #print "PT Z", pt
        if not isinstance(analytic_grad, (list, tuple)):
            analytic_grad = [analytic_grad]
@@ -2277,4 +2279,3 @@ def verify_grad(testcase, op, pt, n_tests=1, rng=numpy.random, eps=1.0e-7, tol=0
 verify_grad.E_grad = 'gradient error exceeded tolerance'
 """This error is raised when a gradient is calculated, but incorrect."""
--- a/theano/tensor/opt.py
+++ b/theano/tensor/opt.py
@@ -285,6 +285,23 @@ def local_inplace_setsubtensor(node):
    return False
 compile.optdb.register('inplace_setsubtensor', TopoOptimizer(local_inplace_setsubtensor), 60, 'fast_run', 'inplace') #DEBUG
+##################
+# Reshape opts   #
+##################
+@gof.local_optimizer([None, None])
+def local_reshape_chain(node):
+    """
+    Reshape(Reshape(shape1),shape2) -> Reshape(shape2)
+    """
+    if not opt.check_chain(node, T.Reshape, T.Reshape):
+        return False
+    return [node.op(node.inputs[0].owner.inputs[0], node.inputs[1])]
+register_canonicalize(local_reshape_chain)
 ##################
 # Middleman cuts #
 ##################