Merge pull request #1727 from nouiz/sparse

Fix Sparse grad crash and implement mixed dtype in sparse Mul/Add

Merge pull request #1727 from nouiz/sparse
8e9ebc8f · abergeron · cbf1a8e8 · d951ae3a · 8e9ebc8f · 8e9ebc8f
--- a/doc/images/theano_logo_allblue_63x21.png
+++ b/doc/images/theano_logo_allblue_63x21.png
--- a/theano/sparse/basic.py
+++ b/theano/sparse/basic.py
@@ -1654,13 +1654,12 @@ class AddSS(gof.op.Op):

    def make_node(self, x, y):
        x, y = map(as_sparse_variable, [x, y])
-        if x.type.dtype != y.type.dtype:
-            raise NotImplementedError()
+        out_dtype = scalar.upcast(x.type.dtype, y.type.dtype)
        if x.type.format != y.type.format:
            raise NotImplementedError()
        return gof.Apply(self,
                         [x, y],
-                         [SparseType(dtype=x.type.dtype,
+                         [SparseType(dtype=out_dtype,
                                     format=x.type.format
                                    ).make_variable()])

@@ -1742,97 +1741,34 @@ class AddSD(gof.op.Op):

    :note: The grad implemented is structured on `x`.
    """
-    def __init__(self, inplace=False, *args, **kwargs):
+    def __init__(self, *args, **kwargs):
        gof.Op.__init__(self, *args, **kwargs)
-        #Should we do inplace addition or not ?
-        self.inplace = inplace
-        if self.inplace:
-            self.destroy_map = {0: [3]}

    def __eq__(self, other):
-        return (type(self) == type(other)) and self.inplace == other.inplace
+        return (type(self) == type(other))

    def __hash__(self):
-        return hash(type(self)) ^ hash(self.inplace)
+        return hash(type(self))

    def __str__(self):
-        if self.inplace:
-            return self.__class__.__name__ + '{inplace}'
        return self.__class__.__name__

    def make_node(self, x, y):
        x, y = as_sparse_variable(x), tensor.as_tensor_variable(y)
+        out_dtype = scalar.upcast(x.type.dtype, y.type.dtype)

-        if x.type.dtype != y.type.dtype:
-            raise NotImplementedError(
-                "AddSD support inputs with the same dtype only."
-                " You passed %s and %s inputs dtype." % (x.type.dtype,
-                                                         y.type.dtype))
-
-        indices, indptr, data = csm_indices(x), csm_indptr(x), csm_data(x)
-
-        # We either use CSC or CSR depending on the format of input
-        self.format = x.format
        # The magic number two here arises because L{scipy.sparse}
        # objects must be matrices (have dimension 2)
        assert y.type.ndim == 2
        return gof.Apply(self,
-                         [data, indices, indptr, y],
-                         [tensor.TensorType(dtype=y.type.dtype,
+                         [x, y],
+                         [tensor.TensorType(dtype=out_dtype,
                                            broadcastable=y.type.broadcastable
                                           ).make_variable()])

-    def c_code(self, node, name, (_data, _indices, _indptr, y), (z, ), sub):
-        inplace = int(self.inplace)
-        format = {'csc': 0, 'csr': 1}[self.format]
-        code = """
-                Py_XDECREF(%(z)s);
-                if (!%(inplace)s){
-                  %(z)s = (PyArrayObject *) PyArray_NewCopy(%(y)s, NPY_CORDER);
-                }else{
-                  %(z)s = %(y)s;
-                  Py_XINCREF(%(z)s);
-                }
-
-                npy_intp N =  PyArray_DIMS(%(_indptr)s)[0]-1;
-                const npy_int32 * __restrict__ indptr = (npy_int32 *)PyArray_DATA(%(_indptr)s);
-                const npy_int32 * __restrict__ indices = (npy_int32*)PyArray_DATA(%(_indices)s);
-                const dtype_%(_data)s* __restrict__ data = (dtype_%(_data)s*)PyArray_DATA(%(_data)s);
-
-                dtype_%(y)s* ydata = (dtype_%(y)s*)PyArray_DATA(%(y)s);
-                dtype_%(z)s* zdata = (dtype_%(z)s*)PyArray_DATA(%(z)s);
-                int Yi = PyArray_STRIDES(%(y)s)[0]/PyArray_DESCR(%(y)s)->elsize;
-                int Yj = PyArray_STRIDES(%(y)s)[1]/PyArray_DESCR(%(y)s)->elsize;
-
-                npy_int32 pos;
-                if (%(format)s == 0){
-                for (npy_int32 col = 0; col < N; ++col){
-                  for (npy_int32 ind = indptr[col]; ind < indptr[col+1]; ++ind){
-                    npy_int32 row = indices[ind];
-                    pos = row * Yi + col * Yj;
-                    zdata[pos] = ydata[pos] + data[ind];
-                  }
-                }
-                }else{
-                for (npy_int32 row = 0; row < N; ++row){
-                  for (npy_int32 ind = indptr[row]; ind < indptr[row+1]; ++ind){
-                    npy_int32 col = indices[ind];
-                    pos = row * Yi + col * Yj;
-                    zdata[pos] = ydata[pos] + data[ind];
-                  }
-                 }
-                }
-             """ % dict(locals(), **sub)
-        return code
-
-    def perform(self, node, (data, indices, indptr,  y), (out, )):
+    def perform(self, node, (x,  y), (out, )):
        assert _is_dense(y)

-        if self.format == 'csr':
-            x = scipy.sparse.csr_matrix((data, indices, indptr), shape=y.shape)
-        elif self.format == 'csc':
-            x = scipy.sparse.csc_matrix((data, indices, indptr), shape=y.shape)
-
        # The asarray is needed as in some case, this return a
        # numpy.matrixlib.defmatrix.matrix object and not an ndarray.
        out[0] = theano._asarray(x + y, dtype=node.outputs[0].type.dtype)
@@ -1843,10 +1779,8 @@ class AddSD(gof.op.Op):
        return sp_ones_like(x) * gz, gz

    def infer_shape(self, node, shapes):
-        return [shapes[3]]
+        return [shapes[1]]

-    def c_code_cache_version(self):
-        return (1,)
 add_s_d = AddSD()


@@ -1983,11 +1917,16 @@ class MulSS(gof.op.Op):

    def make_node(self, x, y):
        x, y = as_sparse_variable(x), as_sparse_variable(y)
-        if x.type != y.type:
+        out_dtype = scalar.upcast(x.type.dtype, y.type.dtype)
+        if x.type.format != y.type.format:
            raise NotImplementedError(
                    "MulSS not supported for differing types. "
                    "Got %s and %s." % (str(x.type), str(y.type)))
-        return gof.Apply(self, [x, y], [x.type()])
+        return gof.Apply(self, [x, y],
+                         [SparseType(dtype=out_dtype,
+                                     format=x.type.format
+                                    )()])
+

    def perform(self, node, (x, y), (out, )):
        assert _is_sparse(x) and _is_sparse(y)
@@ -2031,23 +1970,25 @@ class MulSD(gof.op.Op):

        # upcast the tensor. Is the cast of sparse done implemented?
        dtype = scalar.upcast(x.type.dtype, y.type.dtype)
-        if y.type.dtype != dtype:
-            y = tensor.cast(y, dtype)

-        if x.type.dtype != y.type.dtype:
-            raise NotImplementedError(
-                "MulSD not implemented for different input dtypes. "
-                "Got %s and %s." % (x.type.dtype, y.type.dtype))
        # The magic number two here arises because L{scipy.sparse}
        # objects must be matrices (have dimension 2)
        # Broadcasting of the sparse matrix is not supported.
-        assert y.type.ndim <= 2
-        return gof.Apply(self, [x, y], [x.type()])
+        # We support nd == 0 used by grad of SpSum()
+        assert y.type.ndim in [0, 2]
+        out = SparseType(dtype=dtype,
+                         format=x.type.format)()
+        return gof.Apply(self, [x, y], [out])

    def perform(self, node, (x, y), (out, )):
        assert _is_sparse(x) and _is_dense(y)
        if len(y.shape) == 0:
-            out[0] = x.copy()
+            out_dtype = node.outputs[0].dtype
+            if x.dtype == out_dtype:
+                z = x.copy()
+            else:
+                z = x.astype(out_dtype)
+            out[0] = z
            out[0].data *= y
        elif len(y.shape) == 1:
            raise NotImplementedError()  # RowScale / ColScale
@@ -2057,12 +1998,16 @@ class MulSD(gof.op.Op):
            # TODO: change runtime from O(M*N) to O(nonzeros)
            M, N = x.shape
            assert x.shape == y.shape
+            out_dtype = node.outputs[0].dtype

            if x.format == 'csc':
                x_data = x.data
                indices = x.indices
                indptr = x.indptr
-                z = x.copy()
+                if x.dtype == out_dtype:
+                    z = x.copy()
+                else:
+                    z = x.astype(out_dtype)
                z_data = z.data

                for j in xrange(0, N):
@@ -2074,7 +2019,10 @@ class MulSD(gof.op.Op):
                x_data = x.data
                indices = x.indices
                indptr = x.indptr
-                z = x.copy()
+                if x.dtype == out_dtype:
+                    z = x.copy()
+                else:
+                    z = x.astype(out_dtype)
                z_data = z.data

                for i in xrange(0, M):

--- a/theano/sparse/opt.py
+++ b/theano/sparse/opt.py
@@ -8,7 +8,8 @@ from theano import gof, scalar, tensor
 from theano.tensor import blas
 from theano.sparse import (CSC, CSR, csm_properties,
                           register_specialize,
-                           csm_grad, usmm)
+                           csm_grad, usmm, csm_indices, csm_indptr,
+                           csm_data)
 from theano.sparse import basic as sparse

 _is_sparse_variable = sparse._is_sparse_variable
@@ -49,30 +50,148 @@ theano.compile.optdb.register('local_inplace_remove0',
                              gof.TopoOptimizer(local_inplace_remove0,
    failure_callback=gof.TopoOptimizer.warn_inplace),
                              60, 'fast_run', 'inplace')
+
+
+class AddSD_ccode(gof.op.Op):
+    """Add a sparse and a dense matrix.
+
+    :param x: A sparse matrix.
+    :param y: A dense matrix
+
+    :return: `x`+`y`
+
+    :note: The grad implemented is structured on `x`.
+    """
+    def __init__(self, format, inplace=False, *args, **kwargs):
+        gof.Op.__init__(self, *args, **kwargs)
+        #Should we do inplace addition or not ?
+        self.inplace = inplace
+        self.format = format
+        if self.inplace:
+            self.destroy_map = {0: [3]}
+
+    def __eq__(self, other):
+        return (type(self) == type(other) and
+                self.inplace == other.inplace and
+                self.format == other.format)
+
+    def __hash__(self):
+        return hash(type(self)) ^ hash(self.inplace) ^ hash(self.format)
+
+    def __str__(self):
+        inp = ''
+        if self.inplace:
+            inp = ',inplace'
+        return "%s{%s%s}" % (self.__class__.__name__,
+                             self.format, inp)
+
+    def make_node(self, x, y):
+        x, y = sparse.as_sparse_variable(x), tensor.as_tensor_variable(y)
+        out_dtype = scalar.upcast(x.type.dtype, y.type.dtype)
+        if self.inplace:
+            assert out_dtype == y.dtype
+
+        indices, indptr, data = csm_indices(x), csm_indptr(x), csm_data(x)
+        # We either use CSC or CSR depending on the format of input
+        assert self.format == x.type.format
+        # The magic number two here arises because L{scipy.sparse}
+        # objects must be matrices (have dimension 2)
+        assert y.type.ndim == 2
+        out = tensor.TensorType(dtype=out_dtype,
+                                broadcastable=y.type.broadcastable)()
+        return gof.Apply(self,
+                         [data, indices, indptr, y],
+                         [out])
+
+    def c_code(self, node, name, (_data, _indices, _indptr, y), (z, ), sub):
+        inplace = int(self.inplace)
+        format = {'csc': 0, 'csr': 1}[self.format]
+        out_typenum = node.outputs[0].type.dtype_specs()[2]
+        code = """
+                Py_XDECREF(%(z)s);
+                if (!%(inplace)s){
+                    if(PyArray_TYPE(%(y)s) != %(out_typenum)s){
+                        %(z)s = (PyArrayObject *) PyArray_FromArray(%(y)s,  PyArray_DescrFromType(%(out_typenum)s), 0);
+                    }else{
+                        %(z)s = (PyArrayObject *) PyArray_NewCopy(%(y)s, NPY_CORDER);
+                    }
+                }else{
+                  %(z)s = %(y)s;
+                  Py_XINCREF(%(z)s);
+                }
+
+                npy_intp N =  PyArray_DIMS(%(_indptr)s)[0]-1;
+                const npy_int32 * __restrict__ indptr = (npy_int32 *)PyArray_DATA(%(_indptr)s);
+                const npy_int32 * __restrict__ indices = (npy_int32*)PyArray_DATA(%(_indices)s);
+                const dtype_%(_data)s* __restrict__ data = (dtype_%(_data)s*)PyArray_DATA(%(_data)s);
+
+                dtype_%(y)s* ydata = (dtype_%(y)s*)PyArray_DATA(%(y)s);
+                dtype_%(z)s* zdata = (dtype_%(z)s*)PyArray_DATA(%(z)s);
+                int Yi = PyArray_STRIDES(%(y)s)[0]/PyArray_DESCR(%(y)s)->elsize;
+                int Yj = PyArray_STRIDES(%(y)s)[1]/PyArray_DESCR(%(y)s)->elsize;
+
+                npy_int32 pos;
+                if (%(format)s == 0){
+                for (npy_int32 col = 0; col < N; ++col){
+                  for (npy_int32 ind = indptr[col]; ind < indptr[col+1]; ++ind){
+                    npy_int32 row = indices[ind];
+                    pos = row * Yi + col * Yj;
+                    zdata[pos] = ydata[pos] + data[ind];
+                  }
+                }
+                }else{
+                for (npy_int32 row = 0; row < N; ++row){
+                  for (npy_int32 ind = indptr[row]; ind < indptr[row+1]; ++ind){
+                    npy_int32 col = indices[ind];
+                    pos = row * Yi + col * Yj;
+                    zdata[pos] = ydata[pos] + data[ind];
+                  }
+                 }
+                }
+             """ % dict(locals(), **sub)
+        return code
+
+    def infer_shape(self, node, shapes):
+        return [shapes[3]]
+
+    def c_code_cache_version(self):
+        return (1,)
+
+
 @gof.local_optimizer([sparse.AddSD])
-def local_inplace_addsd(node):
+def local_inplace_addsd_ccode(node):
    """
    Optimization to insert inplace versions of AddSD.
    """
-    if isinstance(node.op, sparse.AddSD) and not node.op.inplace:
-        inputs = node.inputs[:3] + [node.inputs[3].shape]
-        fmt = node.op.format
-        if fmt == 'csc':
-            x = sparse.CSC(*inputs)
-        elif fmt == 'csr':
-            x = sparse.CSR(*inputs)
-        else:
-            raise NotImplementedError('Sparse format %s is not supported' % fmt)
-        new_op = node.op.__class__(inplace=True)
-        new_node = new_op(x, node.inputs[3])
+    if isinstance(node.op, sparse.AddSD) and theano.config.cxx:
+        out_dtype = scalar.upcast(*node.inputs)
+        if out_dtype != node.inputs[1].dtype:
+            return
+        new_node = AddSD_ccode(format=node.inputs[0].type.format,
+                               inplace=True)(*node.inputs)
        return [new_node]
    return False
-theano.compile.optdb.register('local_inplace_addsd',
-                              gof.TopoOptimizer(local_inplace_addsd,
+theano.compile.optdb.register('local_inplace_addsd_ccode',
+                              gof.TopoOptimizer(local_inplace_addsd_ccode,
    failure_callback=gof.TopoOptimizer.warn_inplace),
                              60, 'fast_run', 'inplace')


+@gof.local_optimizer([sparse.AddSD])
+def local_addsd_ccode(node):
+    """
+    Convert AddSD to faster AddSD_ccode.
+    """
+    if isinstance(node.op, sparse.AddSD) and theano.config.cxx:
+        new_node = AddSD_ccode(format=node.inputs[0].type.format)(*node.inputs)
+        return [new_node]
+    return False
+theano.compile.optdb.register('local_addsd_ccode',
+                              gof.TopoOptimizer(local_addsd_ccode),
+                              #Must be after local_inplace_addsd_ccode at 60
+                              61, 'fast_run')
+
+
 class StructuredDotCSC(gof.Op):
    """Structured Dot CSC is like dot, except that only the
    gradient wrt non-zero elements of the sparse matrix
@@ -1139,6 +1258,9 @@ def local_mul_s_d(node):
            mul_s_d_csx = mul_s_d_csr
        else:
            raise NotImplemented()
+        if x.dtype != y.dtype:
+            #mul_s_d_csx don't support that case
+            return

        c_data = mul_s_d_csx(sparse.csm_data(svar),
                             sparse.csm_indices(svar),

--- a/theano/sparse/tests/test_basic.py
+++ b/theano/sparse/tests/test_basic.py