Fix crash in AddSD.grad(). Move the c code to another op AddSD_ccode

6c460c39 · Frederic · cbf1a8e8 · 6c460c39 · 6c460c39 · 6c460c39
--- a/theano/sparse/basic.py
+++ b/theano/sparse/basic.py
@@ -1742,22 +1742,16 @@ class AddSD(gof.op.Op):
    :note: The grad implemented is structured on `x`.
    """
-    def __init__(self, inplace=False, *args, **kwargs):
+    def __init__(self, *args, **kwargs):
        gof.Op.__init__(self, *args, **kwargs)
-        #Should we do inplace addition or not ?
-        self.inplace = inplace
-        if self.inplace:
-            self.destroy_map = {0: [3]}
    def __eq__(self, other):
-        return (type(self) == type(other)) and self.inplace == other.inplace
+        return (type(self) == type(other))
    def __hash__(self):
-        return hash(type(self)) ^ hash(self.inplace)
+        return hash(type(self))
    def __str__(self):
-        if self.inplace:
-            return self.__class__.__name__ + '{inplace}'
        return self.__class__.__name__
    def make_node(self, x, y):
@@ -1769,70 +1763,18 @@ class AddSD(gof.op.Op):
                " You passed %s and %s inputs dtype." % (x.type.dtype,
                                                         y.type.dtype))
-        indices, indptr, data = csm_indices(x), csm_indptr(x), csm_data(x)
-        # We either use CSC or CSR depending on the format of input
-        self.format = x.format
        # The magic number two here arises because L{scipy.sparse}
        # objects must be matrices (have dimension 2)
        assert y.type.ndim == 2
        return gof.Apply(self,
-                         [data, indices, indptr, y],
+                         [x, y],
                         [tensor.TensorType(dtype=y.type.dtype,
                                            broadcastable=y.type.broadcastable
                                           ).make_variable()])
-    def c_code(self, node, name, (_data, _indices, _indptr, y), (z, ), sub):
+    def perform(self, node, (x,  y), (out, )):
-        inplace = int(self.inplace)
-        format = {'csc': 0, 'csr': 1}[self.format]
-        code = """
-                Py_XDECREF(%(z)s);
-                if (!%(inplace)s){
-                  %(z)s = (PyArrayObject *) PyArray_NewCopy(%(y)s, NPY_CORDER);
-                }else{
-                  %(z)s = %(y)s;
-                  Py_XINCREF(%(z)s);
-                }
-                npy_intp N =  PyArray_DIMS(%(_indptr)s)[0]-1;
-                const npy_int32 * __restrict__ indptr = (npy_int32 *)PyArray_DATA(%(_indptr)s);
-                const npy_int32 * __restrict__ indices = (npy_int32*)PyArray_DATA(%(_indices)s);
-                const dtype_%(_data)s* __restrict__ data = (dtype_%(_data)s*)PyArray_DATA(%(_data)s);
-                dtype_%(y)s* ydata = (dtype_%(y)s*)PyArray_DATA(%(y)s);
-                dtype_%(z)s* zdata = (dtype_%(z)s*)PyArray_DATA(%(z)s);
-                int Yi = PyArray_STRIDES(%(y)s)[0]/PyArray_DESCR(%(y)s)->elsize;
-                int Yj = PyArray_STRIDES(%(y)s)[1]/PyArray_DESCR(%(y)s)->elsize;
-                npy_int32 pos;
-                if (%(format)s == 0){
-                for (npy_int32 col = 0; col < N; ++col){
-                  for (npy_int32 ind = indptr[col]; ind < indptr[col+1]; ++ind){
-                    npy_int32 row = indices[ind];
-                    pos = row * Yi + col * Yj;
-                    zdata[pos] = ydata[pos] + data[ind];
-                  }
-                }
-                }else{
-                for (npy_int32 row = 0; row < N; ++row){
-                  for (npy_int32 ind = indptr[row]; ind < indptr[row+1]; ++ind){
-                    npy_int32 col = indices[ind];
-                    pos = row * Yi + col * Yj;
-                    zdata[pos] = ydata[pos] + data[ind];
-                  }
-                 }
-                }
-             """ % dict(locals(), **sub)
-        return code
-    def perform(self, node, (data, indices, indptr,  y), (out, )):
        assert _is_dense(y)
-        if self.format == 'csr':
-            x = scipy.sparse.csr_matrix((data, indices, indptr), shape=y.shape)
-        elif self.format == 'csc':
-            x = scipy.sparse.csc_matrix((data, indices, indptr), shape=y.shape)
        # The asarray is needed as in some case, this return a
        # numpy.matrixlib.defmatrix.matrix object and not an ndarray.
        out[0] = theano._asarray(x + y, dtype=node.outputs[0].type.dtype)
@@ -1843,10 +1785,8 @@ class AddSD(gof.op.Op):
        return sp_ones_like(x) * gz, gz
    def infer_shape(self, node, shapes):
-        return [shapes[3]]
+        return [shapes[1]]
-    def c_code_cache_version(self):
-        return (1,)
 add_s_d = AddSD()

--- a/theano/sparse/opt.py
+++ b/theano/sparse/opt.py
@@ -49,30 +49,136 @@ theano.compile.optdb.register('local_inplace_remove0',
                              gof.TopoOptimizer(local_inplace_remove0,
    failure_callback=gof.TopoOptimizer.warn_inplace),
                              60, 'fast_run', 'inplace')
 @gof.local_optimizer([sparse.AddSD])
 def local_inplace_addsd(node):
    """
    Optimization to insert inplace versions of AddSD.
    """
-    if isinstance(node.op, sparse.AddSD) and not node.op.inplace:
+    if isinstance(node.op, AddSD_ccode) and not node.op.inplace:
-        inputs = node.inputs[:3] + [node.inputs[3].shape]
-        fmt = node.op.format
-        if fmt == 'csc':
-            x = sparse.CSC(*inputs)
-        elif fmt == 'csr':
-            x = sparse.CSR(*inputs)
-        else:
-            raise NotImplementedError('Sparse format %s is not supported' % fmt)
        new_op = node.op.__class__(inplace=True)
-        new_node = new_op(x, node.inputs[3])
+        new_node = new_op(*node.inputs)
        return [new_node]
    return False
-theano.compile.optdb.register('local_inplace_addsd',
+theano.compile.optdb.register('local_inplace_addsd_ccode',
                              gof.TopoOptimizer(local_inplace_addsd,
    failure_callback=gof.TopoOptimizer.warn_inplace),
                              60, 'fast_run', 'inplace')
+class AddSD_ccode(gof.op.Op):
+    """Add a sparse and a dense matrix.
+    :param x: A sparse matrix.
+    :param y: A dense matrix
+    :return: `x`+`y`
+    :note: The grad implemented is structured on `x`.
+    """
+    def __init__(self, inplace=False, *args, **kwargs):
+        gof.Op.__init__(self, *args, **kwargs)
+        #Should we do inplace addition or not ?
+        self.inplace = inplace
+        if self.inplace:
+            self.destroy_map = {0: [3]}
+    def __eq__(self, other):
+        return (type(self) == type(other)) and self.inplace == other.inplace
+    def __hash__(self):
+        return hash(type(self)) ^ hash(self.inplace)
+    def __str__(self):
+        if self.inplace:
+            return self.__class__.__name__ + '{inplace}'
+        return self.__class__.__name__
+    def make_node(self, x, y):
+        x, y = as_sparse_variable(x), tensor.as_tensor_variable(y)
+        if x.type.dtype != y.type.dtype:
+            raise NotImplementedError(
+                "AddSD support inputs with the same dtype only."
+                " You passed %s and %s inputs dtype." % (x.type.dtype,
+                                                         y.type.dtype))
+        indices, indptr, data = csm_indices(x), csm_indptr(x), csm_data(x)
+        # We either use CSC or CSR depending on the format of input
+        self.format = x.format
+        # The magic number two here arises because L{scipy.sparse}
+        # objects must be matrices (have dimension 2)
+        assert y.type.ndim == 2
+        return gof.Apply(self,
+                         [data, indices, indptr, y],
+                         [tensor.TensorType(dtype=y.type.dtype,
+                                            broadcastable=y.type.broadcastable
+                                           ).make_variable()])
+    def c_code(self, node, name, (_data, _indices, _indptr, y), (z, ), sub):
+        inplace = int(self.inplace)
+        format = {'csc': 0, 'csr': 1}[self.format]
+        code = """
+                Py_XDECREF(%(z)s);
+                if (!%(inplace)s){
+                  %(z)s = (PyArrayObject *) PyArray_NewCopy(%(y)s, NPY_CORDER);
+                }else{
+                  %(z)s = %(y)s;
+                  Py_XINCREF(%(z)s);
+                }
+                npy_intp N =  PyArray_DIMS(%(_indptr)s)[0]-1;
+                const npy_int32 * __restrict__ indptr = (npy_int32 *)PyArray_DATA(%(_indptr)s);
+                const npy_int32 * __restrict__ indices = (npy_int32*)PyArray_DATA(%(_indices)s);
+                const dtype_%(_data)s* __restrict__ data = (dtype_%(_data)s*)PyArray_DATA(%(_data)s);
+                dtype_%(y)s* ydata = (dtype_%(y)s*)PyArray_DATA(%(y)s);
+                dtype_%(z)s* zdata = (dtype_%(z)s*)PyArray_DATA(%(z)s);
+                int Yi = PyArray_STRIDES(%(y)s)[0]/PyArray_DESCR(%(y)s)->elsize;
+                int Yj = PyArray_STRIDES(%(y)s)[1]/PyArray_DESCR(%(y)s)->elsize;
+                npy_int32 pos;
+                if (%(format)s == 0){
+                for (npy_int32 col = 0; col < N; ++col){
+                  for (npy_int32 ind = indptr[col]; ind < indptr[col+1]; ++ind){
+                    npy_int32 row = indices[ind];
+                    pos = row * Yi + col * Yj;
+                    zdata[pos] = ydata[pos] + data[ind];
+                  }
+                }
+                }else{
+                for (npy_int32 row = 0; row < N; ++row){
+                  for (npy_int32 ind = indptr[row]; ind < indptr[row+1]; ++ind){
+                    npy_int32 col = indices[ind];
+                    pos = row * Yi + col * Yj;
+                    zdata[pos] = ydata[pos] + data[ind];
+                  }
+                 }
+                }
+             """ % dict(locals(), **sub)
+        return code
+    def perform(self, node, (data, indices, indptr,  y), (out, )):
+        assert _is_dense(y)
+        if self.format == 'csr':
+            x = scipy.sparse.csr_matrix((data, indices, indptr), shape=y.shape)
+        elif self.format == 'csc':
+            x = scipy.sparse.csc_matrix((data, indices, indptr), shape=y.shape)
+        # The asarray is needed as in some case, this return a
+        # numpy.matrixlib.defmatrix.matrix object and not an ndarray.
+        out[0] = theano._asarray(x + y, dtype=node.outputs[0].type.dtype)
+    def infer_shape(self, node, shapes):
+        return [shapes[3]]
+    def c_code_cache_version(self):
+        return (1,)
 class StructuredDotCSC(gof.Op):
    """Structured Dot CSC is like dot, except that only the
    gradient wrt non-zero elements of the sparse matrix

--- a/theano/sparse/tests/test_basic.py
+++ b/theano/sparse/tests/test_basic.py
@@ -588,11 +588,17 @@ class T_AddMul(unittest.TestCase):
                    self.assertTrue(numpy.all(val == (array1 + b)))
                    ans = numpy.array([[1., 2], [3, 4], [5, 6]])
                    self.assertTrue(numpy.all(val == ans))
+                    if isinstance(a, theano.Constant):
+                        a = a.data
+                    verify_grad_sparse(op, [a, b], structured=True)
                elif op is mul:
                    self.assertTrue(_is_sparse_variable(apb))
                    self.assertTrue(numpy.all(val.todense() == (b.multiply(array1))))
                    self.assertTrue(numpy.all(val.todense() == numpy.array(
                        [[1, 0], [9, 0], [0, 36]])))
+                    if isinstance(a, theano.Constant):
+                        a = a.data
+                    verify_grad_sparse(op, [a, b], structured=False)
    def _testDS(self, op, array1=numpy.array([[1., 0], [3, 0], [0, 6]]),
                array2=numpy.asarray([[0, 2.], [0, 4], [5, 0]])):
@@ -616,11 +622,17 @@ class T_AddMul(unittest.TestCase):
                    self.assertTrue(numpy.all(val == (a + array2)))
                    ans = numpy.array([[1., 2], [3, 4], [5, 6]])
                    self.assertTrue(numpy.all(val == ans))
+                    if isinstance(b, theano.Constant):
+                        b = b.data
+                    verify_grad_sparse(op, [a, b], structured=True)
                elif op is mul:
                    self.assertTrue(_is_sparse_variable(apb))
                    ans = numpy.array([[1, 0], [9, 0], [0, 36]])
                    self.assertTrue(numpy.all(val.todense() == (a.multiply(array2))))
                    self.assertTrue(numpy.all(val.todense() == ans))
+                    if isinstance(b, theano.Constant):
+                        b = b.data
+                    verify_grad_sparse(op, [a, b], structured=False)
    def test_upcast(self):
        array1 = numpy.array([[1, 0], [3, 0], [0, 6]], dtype='float32')