Merge pull request #627 from ynd/csm_grad_fix

Fix crash and corruption bug in CSM's grad

Merge pull request #627 from ynd/csm_grad_fix
28716425 · nouiz · f62af802 · 7a52de6d · 28716425 · 28716425
--- a/theano/sparse/basic.py
+++ b/theano/sparse/basic.py
@@ -698,10 +698,12 @@ class CSM(gof.Op):
                                              indptr.copy()), shape.copy(),
                                             copy=False)
-    def grad(self, (data, indices, indptr, shape), (g_out,)):
+    def grad(self, (x_data, x_indices, x_indptr, x_shape), (g_out,)):
        """Return a gradient on the data vector"""
+        g_data, g_indices, g_indptr, g_shape = csm_properties(g_out)
        #unpack the data vector and wrap it as a 1d TensorType
-        g_data = csm_grad(self.kmap)(data, csm_data(g_out), csm_indices(g_out))
+        g_data = csm_grad(self.kmap)(x_data, x_indices, x_indptr, x_shape,
+            g_data, g_indices, g_indptr, g_shape)
        return [g_data, None, None, None]
    def infer_shape(self, node, shapes):
@@ -717,6 +719,19 @@ CSR = CSM('csr')
 class CSMGrad(gof.op.Op):
+    """
+    This Op computes the gradient of the CSM Op. CSM creates a matrix from
+    data, indices, and ind_ptr vectors; it's gradient is the gradient of
+    the data vector only. There are two complexities to calculate this gradient:
+    1. The gradient may be sparser than the input matrix defined by (data,
+    indices, ind_ptr). In this case, the data vector of the gradient will have
+    less elements than the data vector of the input because sparse formats
+    remove 0s. Since we are only returning the gradient of the data vector, the
+    relevant 0s need to be added back.
+    2. The elements in the sparse dimension are not guaranteed to be sorted.
+    Therefore, the input data vector may have a different order than the
+    gradient data vector.
+    """
    def __init__(self, kmap=None):
        self.kmap = kmap
        if self.kmap is None:
@@ -733,17 +748,37 @@ class CSMGrad(gof.op.Op):
            self.__class__.__name__,
            self.kmap)
-    def make_node(self, data, gout_data, gout_indices):
+    def make_node(self, x_data, x_indices, x_indptr, x_shape,
-        g_data = gout_data.type()
+                g_data, g_indices, g_indptr, g_shape):
-        return gof.Apply(self, [data, gout_data, gout_indices], [g_data])
+        gout_data = g_data.type()
+        return gof.Apply(self, [x_data, x_indices, x_indptr, x_shape,
+            g_data, g_indices, g_indptr, g_shape], [gout_data])
-    def perform(self, node, (data, gout_data, gout_indices), (g_data,)):
+    def perform(self, node, (x_data, x_indices, x_indptr, x_shape,
+                g_data, g_indices, g_indptr, g_shape), (g_out,)):
+        if len(x_indptr) - 1 == x_shape[0]:
+            sp_dim = x_shape[1]
+        else:
+            sp_dim = x_shape[0]
+        g_row = numpy.zeros(sp_dim, dtype=g_data.dtype)
+        gout_data = numpy.zeros_like(x_data)
+        for i in range(len(x_indptr) - 1):
+            for j_ptr in range(g_indptr[i], g_indptr[i + 1]):
+                g_row[g_indices[j_ptr]] += g_data[j_ptr]
+            for j_ptr in range(x_indptr[i], x_indptr[i + 1]):
+                gout_data[j_ptr] = g_row[x_indices[j_ptr]]
+            for j_ptr in range(g_indptr[i], g_indptr[i + 1]):
+                g_row[g_indices[j_ptr]] = 0
        if self.kmap is None:
-            g_data[0] = gout_data
+            g_out[0] = gout_data
        else:
-            grad = numpy.zeros_like(data)
+            grad = numpy.zeros_like(x_data)
            grad[self.kmap] = gout_data
-            g_data[0] = grad
+            g_out[0] = grad
    def infer_shape(self, node, shapes):
        if self.kmap is None:
@@ -754,6 +789,137 @@ class CSMGrad(gof.op.Op):
 csm_grad = CSMGrad
+class CSMGradC(gof.Op):
+    def __eq__(self, other):
+        return (type(self) == type(other))
+    def __hash__(self):
+        return hash(type(self))
+    def __str__(self):
+        return self.__class__.__name__
+    def make_node(self, a_val, a_ind, a_ptr, a_dim, b_val, b_ind, b_ptr, b_dim):
+        return gof.Apply(self, [a_val, a_ind, a_ptr, a_dim,
+             b_val, b_ind, b_ptr, b_dim], [b_val.type()])
+    def c_code(self, node, name, (a_val, a_ind, a_ptr, a_dim,
+                        b_val, b_ind, b_ptr, b_dim), (z,), sub):
+        # retrieve dtype number
+        typenum_z = node.outputs[0].type.dtype_specs()[-1]
+        if node.inputs[0].type.dtype in ('complex64', 'complex128'):
+            raise NotImplementedError('Complex types are not supported for a_val')
+        if node.inputs[3].type.dtype in ('complex64', 'complex128'):
+            raise NotImplementedError('Complex types are not supported for b_val')
+        return """
+        if (%(a_val)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_val) != 1"); %(fail)s;}
+        if (%(a_ind)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ind) != 1"); %(fail)s;}
+        if (%(a_ptr)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ptr) != 1"); %(fail)s;}
+        if (%(b_val)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_val) != 1"); %(fail)s;}
+        if (%(b_ind)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_ind) != 1"); %(fail)s;}
+        if (%(b_ptr)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_ptr) != 1"); %(fail)s;}
+        if (%(a_ind)s->descr->type_num != PyArray_INT32) {
+        PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32"); %(fail)s;}
+        if (%(a_ptr)s->descr->type_num != PyArray_INT32)
+        {PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32"); %(fail)s;}
+        if (%(b_ind)s->descr->type_num != PyArray_INT32) {
+        PyErr_SetString(PyExc_NotImplementedError, "b_ind dtype not INT32"); %(fail)s;}
+        if (%(b_ptr)s->descr->type_num != PyArray_INT32)
+        {PyErr_SetString(PyExc_NotImplementedError, "b_ptr dtype not INT32"); %(fail)s;}
+        if (%(a_val)s->dimensions[0] != %(a_ind)s->dimensions[0])
+        {PyErr_SetString(PyExc_NotImplementedError, "a_val and a_ind have different lengths"); %(fail)s;}
+        if (%(b_val)s->dimensions[0] != %(b_ind)s->dimensions[0])
+        {PyErr_SetString(PyExc_NotImplementedError, "b_val and b_ind have different lengths"); %(fail)s;}
+        if (%(a_ptr)s->dimensions[0] != %(b_ptr)s->dimensions[0])
+        {PyErr_SetString(PyExc_NotImplementedError, "a_ptr and b_ptr have different lengths"); %(fail)s;}
+        if ((!%(z)s) || (%(z)s->dimensions[0] != %(a_val)s->dimensions[0]))
+        {
+            {Py_XDECREF(%(z)s);}
+            npy_intp dims[] = {0};
+            dims[0] = %(a_val)s->dimensions[0];
+            %(z)s = (PyArrayObject*) PyArray_SimpleNew(1, dims, %(typenum_z)s);
+        }
+        {
+            // sparse array has size MxK, dense KxN, output MxN
+            npy_intp M = %(a_ptr)s->dimensions[0] - 1;
+            npy_intp a_dim_0 = ((npy_int32 *)%(a_dim)s->data)[0];
+            npy_intp a_dim_1 = ((npy_int32 *)%(a_dim)s->data)[1];
+            npy_intp sp_dim = (M == a_dim_0)?a_dim_1:a_dim_0;
+            // strides tell you how many bytes to skip to go to next column/row entry
+            npy_intp Sz = %(z)s->strides[0] / %(z)s->descr->elsize;
+            npy_intp Sa_val = %(a_val)s->strides[0] / %(a_val)s->descr->elsize;
+            npy_intp Sa_ind = %(a_ind)s->strides[0] / %(a_ind)s->descr->elsize;
+            npy_intp Sa_ptr = %(a_ptr)s->strides[0] / %(a_ptr)s->descr->elsize;
+            npy_intp Sb_val = %(b_val)s->strides[0] / %(b_val)s->descr->elsize;
+            npy_intp Sb_ind = %(b_ind)s->strides[0] / %(b_ind)s->descr->elsize;
+            npy_intp Sb_ptr = %(b_ptr)s->strides[0] / %(b_ptr)s->descr->elsize;
+            // pointers to access actual data in the arrays passed as params.
+            dtype_%(z)s* __restrict__ Dz = (dtype_%(z)s*)%(z)s->data;
+            const dtype_%(a_val)s* __restrict__ Da_val = (dtype_%(a_val)s*)%(a_val)s->data;
+            const npy_int32 * __restrict__ Da_ind = (npy_int32*)%(a_ind)s->data;
+            const npy_int32 * __restrict__ Da_ptr = (npy_int32*)%(a_ptr)s->data;
+            const dtype_%(b_val)s* __restrict__ Db_val = (dtype_%(b_val)s*)%(b_val)s->data;
+            const npy_int32 * __restrict__ Db_ind = (npy_int32*)%(b_ind)s->data;
+            const npy_int32 * __restrict__ Db_ptr = (npy_int32*)%(b_ptr)s->data;
+            npy_intp nnz = %(a_ind)s->dimensions[0];
+            dtype_%(b_val)s b_row[sp_dim];
+            //clear the output array
+            for (npy_int64 i = 0; i < nnz; ++i)
+            {
+                Dz[i*Sz] = 0;
+            }
+            memset(b_row, 0, sp_dim*sizeof(dtype_%(b_val)s));
+            // loop over inner dimension
+            for (npy_int64 m = 0; m < M; ++m)
+            {
+                for (npy_int32 j_ptr = Db_ptr[m * Sb_ptr];
+                    j_ptr < Db_ptr[(m + 1) * Sb_ptr]; j_ptr++) {
+                    b_row[Db_ind[j_ptr * Sb_ind]] += Db_val[j_ptr*Sb_val];
+                }
+                for (npy_int32 j_ptr = Da_ptr[m * Sa_ptr];
+                    j_ptr < Da_ptr[(m + 1) * Sa_ptr]; j_ptr++) {
+                    Dz[j_ptr*Sz] = b_row[Da_ind[j_ptr * Sa_ind]];
+                }
+                for (npy_int32 j_ptr = Db_ptr[m * Sb_ptr];
+                    j_ptr < Db_ptr[(m + 1) * Sb_ptr]; j_ptr++) {
+                    b_row[Db_ind[j_ptr * Sb_ind]] = 0;
+                }
+            }
+        }
+        """ % dict(locals(), **sub)
+    def c_code_cache_version(self):
+        return (3,)
+csm_grad_c = CSMGradC()
+@gof.local_optimizer([csm_grad(None)])
+def local_csm_grad_c(node):
+    """ csm_grad(None) -> csm_grad_c """
+    if node.op == csm_grad(None):
+        return [csm_grad_c(*node.inputs)]
+    return False
+register_specialize(local_csm_grad_c)
 #
 # Conversion
 #

--- a/theano/sparse/tests/test_basic.py
+++ b/theano/sparse/tests/test_basic.py
@@ -22,7 +22,7 @@ from theano.sparse.basic import _is_dense_variable, _is_sparse_variable
 from theano.sparse.basic import verify_grad_sparse
 from theano.sparse import (as_sparse_variable, CSC, CSR, CSM, CSMProperties,
    csm_properties)
-from theano.sparse import SparseType, CSMGrad
+from theano.sparse import SparseType, CSMGrad, CSMGradC
 from theano.sparse import StructuredDot, StructuredDotCSC
 from theano.sparse import StructuredDotGradCSC, StructuredDotGradCSR
 from theano.sparse import AddSS, AddSD, MulSS, MulSD, Transpose, Neg, Remove0
@@ -184,7 +184,7 @@ class SparseInferShapeTester(utt.InferShapeTester):
                                    [out],
                                    [spm.data, spm.indices, spm.indptr,
                                     spm.shape],
-                                    CSMGrad
+                                    (CSMGrad, CSMGradC)
                                   )
    def test_transpose(self):
@@ -616,6 +616,107 @@ class test_csm_properties(unittest.TestCase):
                assert numpy.all(shape == spmat.shape)
+class test_csm(unittest.TestCase):
+    def setUp(self):
+        utt.seed_rng()
+    def test_csm_grad(self):
+        sp_types = {'csc': sp.csc_matrix,
+            'csr': sp.csr_matrix}
+        for format in ['csc', 'csr']:
+            for dtype in ['float32', 'float64']:
+                spmat = sp_types[format](random_lil((4, 3), dtype, 3))
+                verify_grad_sparse(lambda x: CSM(format)(x, spmat.indices,
+                    spmat.indptr, numpy.asarray(spmat.shape, 'int32')),
+                    [spmat.data], structured=True)
+    def test_csm_sparser(self):
+        """
+        Test support for gradients sparser than the input.
+        """
+        sp_types = {'csc': sp.csc_matrix,
+            'csr': sp.csr_matrix}
+        for format in ['csc', 'csr']:
+            for dtype in ['float32', 'float64']:
+                x = tensor.tensor(dtype=dtype, broadcastable=(False,))
+                y = tensor.ivector()
+                z = tensor.ivector()
+                s = tensor.ivector()
+                a = as_sparse_variable(sp_types[format](random_lil((4, 3),
+                    dtype, 1)))
+                f = theano.function([x, y, z, s], tensor.grad(dense_from_sparse(
+                    a * CSM(format)(x, y, z, s)).sum(), x))
+                spmat = sp_types[format](random_lil((4, 3), dtype, 3))
+                res = f(spmat.data, spmat.indices, spmat.indptr,
+                    numpy.asarray(spmat.shape, 'int32'))
+                assert len(spmat.data) == len(res)
+    def test_csm_unsorted(self):
+        """
+        Test support for gradients of unsorted inputs.
+        """
+        sp_types = {'csc': sp.csc_matrix,
+            'csr': sp.csr_matrix}
+        for format in ['csr', 'csc', ]:
+            for dtype in ['float32', 'float64']:
+                x = tensor.tensor(dtype=dtype, broadcastable=(False,))
+                y = tensor.ivector()
+                z = tensor.ivector()
+                s = tensor.ivector()
+                # Sparse advanced indexing produces unsorted sparse matrices
+                a = sp_types[format]([[1,2,1], [1,2,1], [1,2,1], [1,2,1]],
+                    dtype=dtype)[range(4)]
+                # Make sure it's unsorted
+                assert not a.has_sorted_indices
+                a = as_sparse_variable(a)
+                f = theano.function([x, y, z, s], tensor.grad(tensor.sum(
+                    dense_from_sparse(a * CSM(format)(x, y, z, s))), x))
+                spmat = sp_types[format](random_lil((4, 3), dtype,
+                    12))[range(4)]
+                assert not spmat.has_sorted_indices
+                res = f(spmat.data, spmat.indices, spmat.indptr,
+                    numpy.asarray(spmat.shape, 'int32'))
+                col1 = sp_types[format]((res, spmat.indices, spmat.indptr),
+                    shape=numpy.asarray(spmat.shape, 'int32'))[:, 1].data
+                assert numpy.all(col1 == 2)
+    def test_csm(self):
+        sp_types = {'csc': sp.csc_matrix,
+            'csr': sp.csr_matrix}
+        for format in ['csc', 'csr']:
+            for dtype in ['float32', 'float64']:
+                x = tensor.tensor(dtype=dtype, broadcastable=(False,))
+                y = tensor.ivector()
+                z = tensor.ivector()
+                s = tensor.ivector()
+                f = theano.function([x, y, z, s], CSM(format)(x, y, z, s))
+                spmat = sp_types[format](random_lil((4, 3), dtype, 3))
+                res = f(spmat.data, spmat.indices, spmat.indptr,
+                    numpy.asarray(spmat.shape, 'int32'))
+                assert numpy.all(res.data == spmat.data)
+                assert numpy.all(res.indices == spmat.indices)
+                assert numpy.all(res.indptr == spmat.indptr)
+                assert numpy.all(res.shape == spmat.shape)
 class test_structureddot(unittest.TestCase):
    def setUp(self):
        utt.seed_rng()