Merge pull request #1727 from nouiz/sparse

Fix Sparse grad crash and implement mixed dtype in sparse Mul/Add

Merge pull request #1727 from nouiz/sparse
8e9ebc8f · abergeron · cbf1a8e8 · d951ae3a · 8e9ebc8f · 8e9ebc8f
--- a/doc/images/theano_logo_allblue_63x21.png
+++ b/doc/images/theano_logo_allblue_63x21.png
--- a/theano/sparse/basic.py
+++ b/theano/sparse/basic.py
@@ -1654,13 +1654,12 @@ class AddSS(gof.op.Op):

    def make_node(self, x, y):
        x, y = map(as_sparse_variable, [x, y])
-        if x.type.dtype != y.type.dtype:
-            raise NotImplementedError()
+        out_dtype = scalar.upcast(x.type.dtype, y.type.dtype)
        if x.type.format != y.type.format:
            raise NotImplementedError()
        return gof.Apply(self,
                         [x, y],
-                         [SparseType(dtype=x.type.dtype,
+                         [SparseType(dtype=out_dtype,
                                     format=x.type.format
                                    ).make_variable()])

@@ -1742,97 +1741,34 @@ class AddSD(gof.op.Op):

    :note: The grad implemented is structured on `x`.
    """
-    def __init__(self, inplace=False, *args, **kwargs):
+    def __init__(self, *args, **kwargs):
        gof.Op.__init__(self, *args, **kwargs)
-        #Should we do inplace addition or not ?
-        self.inplace = inplace
-        if self.inplace:
-            self.destroy_map = {0: [3]}

    def __eq__(self, other):
-        return (type(self) == type(other)) and self.inplace == other.inplace
+        return (type(self) == type(other))

    def __hash__(self):
-        return hash(type(self)) ^ hash(self.inplace)
+        return hash(type(self))

    def __str__(self):
-        if self.inplace:
-            return self.__class__.__name__ + '{inplace}'
        return self.__class__.__name__

    def make_node(self, x, y):
        x, y = as_sparse_variable(x), tensor.as_tensor_variable(y)
+        out_dtype = scalar.upcast(x.type.dtype, y.type.dtype)

-        if x.type.dtype != y.type.dtype:
-            raise NotImplementedError(
-                "AddSD support inputs with the same dtype only."
-                " You passed %s and %s inputs dtype." % (x.type.dtype,
-                                                         y.type.dtype))
-
-        indices, indptr, data = csm_indices(x), csm_indptr(x), csm_data(x)
-
-        # We either use CSC or CSR depending on the format of input
-        self.format = x.format
        # The magic number two here arises because L{scipy.sparse}
        # objects must be matrices (have dimension 2)
        assert y.type.ndim == 2
        return gof.Apply(self,
-                         [data, indices, indptr, y],
-                         [tensor.TensorType(dtype=y.type.dtype,
+                         [x, y],
+                         [tensor.TensorType(dtype=out_dtype,
                                            broadcastable=y.type.broadcastable
                                           ).make_variable()])

-    def c_code(self, node, name, (_data, _indices, _indptr, y), (z, ), sub):
-        inplace = int(self.inplace)
-        format = {'csc': 0, 'csr': 1}[self.format]
-        code = """
-                Py_XDECREF(%(z)s);
-                if (!%(inplace)s){
-                  %(z)s = (PyArrayObject *) PyArray_NewCopy(%(y)s, NPY_CORDER);
-                }else{
-                  %(z)s = %(y)s;
-                  Py_XINCREF(%(z)s);
-                }
-
-                npy_intp N =  PyArray_DIMS(%(_indptr)s)[0]-1;
-                const npy_int32 * __restrict__ indptr = (npy_int32 *)PyArray_DATA(%(_indptr)s);
-                const npy_int32 * __restrict__ indices = (npy_int32*)PyArray_DATA(%(_indices)s);
-                const dtype_%(_data)s* __restrict__ data = (dtype_%(_data)s*)PyArray_DATA(%(_data)s);
-
-                dtype_%(y)s* ydata = (dtype_%(y)s*)PyArray_DATA(%(y)s);
-                dtype_%(z)s* zdata = (dtype_%(z)s*)PyArray_DATA(%(z)s);
-                int Yi = PyArray_STRIDES(%(y)s)[0]/PyArray_DESCR(%(y)s)->elsize;
-                int Yj = PyArray_STRIDES(%(y)s)[1]/PyArray_DESCR(%(y)s)->elsize;
-
-                npy_int32 pos;
-                if (%(format)s == 0){
-                for (npy_int32 col = 0; col < N; ++col){
-                  for (npy_int32 ind = indptr[col]; ind < indptr[col+1]; ++ind){
-                    npy_int32 row = indices[ind];
-                    pos = row * Yi + col * Yj;
-                    zdata[pos] = ydata[pos] + data[ind];
-                  }
-                }
-                }else{
-                for (npy_int32 row = 0; row < N; ++row){
-                  for (npy_int32 ind = indptr[row]; ind < indptr[row+1]; ++ind){
-                    npy_int32 col = indices[ind];
-                    pos = row * Yi + col * Yj;
-                    zdata[pos] = ydata[pos] + data[ind];
-                  }
-                 }
-                }
-             """ % dict(locals(), **sub)
-        return code
-
-    def perform(self, node, (data, indices, indptr,  y), (out, )):
+    def perform(self, node, (x,  y), (out, )):
        assert _is_dense(y)

-        if self.format == 'csr':
-            x = scipy.sparse.csr_matrix((data, indices, indptr), shape=y.shape)
-        elif self.format == 'csc':
-            x = scipy.sparse.csc_matrix((data, indices, indptr), shape=y.shape)
-
        # The asarray is needed as in some case, this return a
        # numpy.matrixlib.defmatrix.matrix object and not an ndarray.
        out[0] = theano._asarray(x + y, dtype=node.outputs[0].type.dtype)
@@ -1843,10 +1779,8 @@ class AddSD(gof.op.Op):
        return sp_ones_like(x) * gz, gz

    def infer_shape(self, node, shapes):
-        return [shapes[3]]
+        return [shapes[1]]

-    def c_code_cache_version(self):
-        return (1,)
 add_s_d = AddSD()


@@ -1983,11 +1917,16 @@ class MulSS(gof.op.Op):

    def make_node(self, x, y):
        x, y = as_sparse_variable(x), as_sparse_variable(y)
-        if x.type != y.type:
+        out_dtype = scalar.upcast(x.type.dtype, y.type.dtype)
+        if x.type.format != y.type.format:
            raise NotImplementedError(
                    "MulSS not supported for differing types. "
                    "Got %s and %s." % (str(x.type), str(y.type)))
-        return gof.Apply(self, [x, y], [x.type()])
+        return gof.Apply(self, [x, y],
+                         [SparseType(dtype=out_dtype,
+                                     format=x.type.format
+                                    )()])
+

    def perform(self, node, (x, y), (out, )):
        assert _is_sparse(x) and _is_sparse(y)
@@ -2031,23 +1970,25 @@ class MulSD(gof.op.Op):

        # upcast the tensor. Is the cast of sparse done implemented?
        dtype = scalar.upcast(x.type.dtype, y.type.dtype)
-        if y.type.dtype != dtype:
-            y = tensor.cast(y, dtype)

-        if x.type.dtype != y.type.dtype:
-            raise NotImplementedError(
-                "MulSD not implemented for different input dtypes. "
-                "Got %s and %s." % (x.type.dtype, y.type.dtype))
        # The magic number two here arises because L{scipy.sparse}
        # objects must be matrices (have dimension 2)
        # Broadcasting of the sparse matrix is not supported.
-        assert y.type.ndim <= 2
-        return gof.Apply(self, [x, y], [x.type()])
+        # We support nd == 0 used by grad of SpSum()
+        assert y.type.ndim in [0, 2]
+        out = SparseType(dtype=dtype,
+                         format=x.type.format)()
+        return gof.Apply(self, [x, y], [out])

    def perform(self, node, (x, y), (out, )):
        assert _is_sparse(x) and _is_dense(y)
        if len(y.shape) == 0:
-            out[0] = x.copy()
+            out_dtype = node.outputs[0].dtype
+            if x.dtype == out_dtype:
+                z = x.copy()
+            else:
+                z = x.astype(out_dtype)
+            out[0] = z
            out[0].data *= y
        elif len(y.shape) == 1:
            raise NotImplementedError()  # RowScale / ColScale
@@ -2057,12 +1998,16 @@ class MulSD(gof.op.Op):
            # TODO: change runtime from O(M*N) to O(nonzeros)
            M, N = x.shape
            assert x.shape == y.shape
+            out_dtype = node.outputs[0].dtype

            if x.format == 'csc':
                x_data = x.data
                indices = x.indices
                indptr = x.indptr
-                z = x.copy()
+                if x.dtype == out_dtype:
+                    z = x.copy()
+                else:
+                    z = x.astype(out_dtype)
                z_data = z.data

                for j in xrange(0, N):
@@ -2074,7 +2019,10 @@ class MulSD(gof.op.Op):
                x_data = x.data
                indices = x.indices
                indptr = x.indptr
-                z = x.copy()
+                if x.dtype == out_dtype:
+                    z = x.copy()
+                else:
+                    z = x.astype(out_dtype)
                z_data = z.data

                for i in xrange(0, M):

--- a/theano/sparse/opt.py
+++ b/theano/sparse/opt.py
@@ -8,7 +8,8 @@ from theano import gof, scalar, tensor
 from theano.tensor import blas
 from theano.sparse import (CSC, CSR, csm_properties,
                           register_specialize,
-                           csm_grad, usmm)
+                           csm_grad, usmm, csm_indices, csm_indptr,
+                           csm_data)
 from theano.sparse import basic as sparse

 _is_sparse_variable = sparse._is_sparse_variable
@@ -49,30 +50,148 @@ theano.compile.optdb.register('local_inplace_remove0',
                              gof.TopoOptimizer(local_inplace_remove0,
    failure_callback=gof.TopoOptimizer.warn_inplace),
                              60, 'fast_run', 'inplace')
+
+
+class AddSD_ccode(gof.op.Op):
+    """Add a sparse and a dense matrix.
+
+    :param x: A sparse matrix.
+    :param y: A dense matrix
+
+    :return: `x`+`y`
+
+    :note: The grad implemented is structured on `x`.
+    """
+    def __init__(self, format, inplace=False, *args, **kwargs):
+        gof.Op.__init__(self, *args, **kwargs)
+        #Should we do inplace addition or not ?
+        self.inplace = inplace
+        self.format = format
+        if self.inplace:
+            self.destroy_map = {0: [3]}
+
+    def __eq__(self, other):
+        return (type(self) == type(other) and
+                self.inplace == other.inplace and
+                self.format == other.format)
+
+    def __hash__(self):
+        return hash(type(self)) ^ hash(self.inplace) ^ hash(self.format)
+
+    def __str__(self):
+        inp = ''
+        if self.inplace:
+            inp = ',inplace'
+        return "%s{%s%s}" % (self.__class__.__name__,
+                             self.format, inp)
+
+    def make_node(self, x, y):
+        x, y = sparse.as_sparse_variable(x), tensor.as_tensor_variable(y)
+        out_dtype = scalar.upcast(x.type.dtype, y.type.dtype)
+        if self.inplace:
+            assert out_dtype == y.dtype
+
+        indices, indptr, data = csm_indices(x), csm_indptr(x), csm_data(x)
+        # We either use CSC or CSR depending on the format of input
+        assert self.format == x.type.format
+        # The magic number two here arises because L{scipy.sparse}
+        # objects must be matrices (have dimension 2)
+        assert y.type.ndim == 2
+        out = tensor.TensorType(dtype=out_dtype,
+                                broadcastable=y.type.broadcastable)()
+        return gof.Apply(self,
+                         [data, indices, indptr, y],
+                         [out])
+
+    def c_code(self, node, name, (_data, _indices, _indptr, y), (z, ), sub):
+        inplace = int(self.inplace)
+        format = {'csc': 0, 'csr': 1}[self.format]
+        out_typenum = node.outputs[0].type.dtype_specs()[2]
+        code = """
+                Py_XDECREF(%(z)s);
+                if (!%(inplace)s){
+                    if(PyArray_TYPE(%(y)s) != %(out_typenum)s){
+                        %(z)s = (PyArrayObject *) PyArray_FromArray(%(y)s,  PyArray_DescrFromType(%(out_typenum)s), 0);
+                    }else{
+                        %(z)s = (PyArrayObject *) PyArray_NewCopy(%(y)s, NPY_CORDER);
+                    }
+                }else{
+                  %(z)s = %(y)s;
+                  Py_XINCREF(%(z)s);
+                }
+
+                npy_intp N =  PyArray_DIMS(%(_indptr)s)[0]-1;
+                const npy_int32 * __restrict__ indptr = (npy_int32 *)PyArray_DATA(%(_indptr)s);
+                const npy_int32 * __restrict__ indices = (npy_int32*)PyArray_DATA(%(_indices)s);
+                const dtype_%(_data)s* __restrict__ data = (dtype_%(_data)s*)PyArray_DATA(%(_data)s);
+
+                dtype_%(y)s* ydata = (dtype_%(y)s*)PyArray_DATA(%(y)s);
+                dtype_%(z)s* zdata = (dtype_%(z)s*)PyArray_DATA(%(z)s);
+                int Yi = PyArray_STRIDES(%(y)s)[0]/PyArray_DESCR(%(y)s)->elsize;
+                int Yj = PyArray_STRIDES(%(y)s)[1]/PyArray_DESCR(%(y)s)->elsize;
+
+                npy_int32 pos;
+                if (%(format)s == 0){
+                for (npy_int32 col = 0; col < N; ++col){
+                  for (npy_int32 ind = indptr[col]; ind < indptr[col+1]; ++ind){
+                    npy_int32 row = indices[ind];
+                    pos = row * Yi + col * Yj;
+                    zdata[pos] = ydata[pos] + data[ind];
+                  }
+                }
+                }else{
+                for (npy_int32 row = 0; row < N; ++row){
+                  for (npy_int32 ind = indptr[row]; ind < indptr[row+1]; ++ind){
+                    npy_int32 col = indices[ind];
+                    pos = row * Yi + col * Yj;
+                    zdata[pos] = ydata[pos] + data[ind];
+                  }
+                 }
+                }
+             """ % dict(locals(), **sub)
+        return code
+
+    def infer_shape(self, node, shapes):
+        return [shapes[3]]
+
+    def c_code_cache_version(self):
+        return (1,)
+
+
 @gof.local_optimizer([sparse.AddSD])
-def local_inplace_addsd(node):
+def local_inplace_addsd_ccode(node):
    """
    Optimization to insert inplace versions of AddSD.
    """
-    if isinstance(node.op, sparse.AddSD) and not node.op.inplace:
-        inputs = node.inputs[:3] + [node.inputs[3].shape]
-        fmt = node.op.format
-        if fmt == 'csc':
-            x = sparse.CSC(*inputs)
-        elif fmt == 'csr':
-            x = sparse.CSR(*inputs)
-        else:
-            raise NotImplementedError('Sparse format %s is not supported' % fmt)
-        new_op = node.op.__class__(inplace=True)
-        new_node = new_op(x, node.inputs[3])
+    if isinstance(node.op, sparse.AddSD) and theano.config.cxx:
+        out_dtype = scalar.upcast(*node.inputs)
+        if out_dtype != node.inputs[1].dtype:
+            return
+        new_node = AddSD_ccode(format=node.inputs[0].type.format,
+                               inplace=True)(*node.inputs)
        return [new_node]
    return False
-theano.compile.optdb.register('local_inplace_addsd',
-                              gof.TopoOptimizer(local_inplace_addsd,
+theano.compile.optdb.register('local_inplace_addsd_ccode',
+                              gof.TopoOptimizer(local_inplace_addsd_ccode,
    failure_callback=gof.TopoOptimizer.warn_inplace),
                              60, 'fast_run', 'inplace')


+@gof.local_optimizer([sparse.AddSD])
+def local_addsd_ccode(node):
+    """
+    Convert AddSD to faster AddSD_ccode.
+    """
+    if isinstance(node.op, sparse.AddSD) and theano.config.cxx:
+        new_node = AddSD_ccode(format=node.inputs[0].type.format)(*node.inputs)
+        return [new_node]
+    return False
+theano.compile.optdb.register('local_addsd_ccode',
+                              gof.TopoOptimizer(local_addsd_ccode),
+                              #Must be after local_inplace_addsd_ccode at 60
+                              61, 'fast_run')
+
+
 class StructuredDotCSC(gof.Op):
    """Structured Dot CSC is like dot, except that only the
    gradient wrt non-zero elements of the sparse matrix
@@ -1139,6 +1258,9 @@ def local_mul_s_d(node):
            mul_s_d_csx = mul_s_d_csr
        else:
            raise NotImplemented()
+        if x.dtype != y.dtype:
+            #mul_s_d_csx don't support that case
+            return

        c_data = mul_s_d_csx(sparse.csm_data(svar),
                             sparse.csm_indices(svar),

--- a/theano/sparse/tests/test_basic.py
+++ b/theano/sparse/tests/test_basic.py
@@ -325,7 +325,7 @@ class SparseInferShapeTester(utt.InferShapeTester):
                [sp.csr_matrix(random_lil((10, 40),
                               config.floatX, 3)),
                 numpy.random.randn(10, 40).astype(config.floatX)],
-                AddSD)
+                (AddSD, sparse.opt.AddSD_ccode))

    def test_mul_ss(self):
        x = SparseType('csr', dtype=config.floatX)()
@@ -536,158 +536,115 @@ class T_AddMul(unittest.TestCase):
    def _testSS(self, op, array1=numpy.array([[1., 0], [3, 0], [0, 6]]),
                array2=numpy.asarray([[0, 2.], [0, 4], [5, 0]])):
        for mtype in _mtypes:
-            a = mtype(array1)
-            aR = as_sparse_variable(a)
-            self.assertFalse(aR.data is a)
-            self.assertTrue(_is_sparse(a))
-            self.assertTrue(_is_sparse_variable(aR))
-
-            b = mtype(array2)
-            bR = as_sparse_variable(b)
-            self.assertFalse(bR.data is b)
-            self.assertTrue(_is_sparse(b))
-            self.assertTrue(_is_sparse_variable(bR))
-
-            apb = op(aR, bR)
-            self.assertTrue(_is_sparse_variable(apb))
-
-            self.assertTrue(apb.type.dtype == aR.type.dtype, apb.type.dtype)
-            self.assertTrue(apb.type.dtype == bR.type.dtype, apb.type.dtype)
-            self.assertTrue(apb.type.format == aR.type.format, apb.type.format)
-            self.assertTrue(apb.type.format == bR.type.format, apb.type.format)
-
-            val = eval_outputs([apb])
-            self.assertTrue(val.shape == (3, 2))
-            if op is add:
-                self.assertTrue(numpy.all(val.todense() == (array1 + array2)))
-                verify_grad_sparse(op, [a, b], structured=False)
-            elif op is mul:
-                self.assertTrue(numpy.all(val.todense()
-                                          == (array1 * array2)))
-                verify_grad_sparse(op, [a, b], structured=False)
+            for dtype1, dtype2 in [('float64', 'int8'),
+                                   ('int8', 'float64'),
+                               ]:
+                a = mtype(array1).astype(dtype1)
+                aR = as_sparse_variable(a)
+                self.assertFalse(aR.data is a)
+                self.assertTrue(_is_sparse(a))
+                self.assertTrue(_is_sparse_variable(aR))

-    def _testSD(self, op, array1=numpy.array([[1., 0], [3, 0], [0, 6]]),
-                array2=numpy.asarray([[0, 2.], [0, 4], [5, 0]])):
-        for mtype in _mtypes:
-            for a in [numpy.array(array1), tensor.as_tensor_variable(array1)]:
-                b = mtype(array2)
+                b = mtype(array2).astype(dtype2)
                bR = as_sparse_variable(b)
-                self.assertFalse(bR.data is b)  # constants are copied
+                self.assertFalse(bR.data is b)
                self.assertTrue(_is_sparse(b))
                self.assertTrue(_is_sparse_variable(bR))

-                apb = op(a, bR)
+                apb = op(aR, bR)
+                self.assertTrue(_is_sparse_variable(apb))

-                self.assertTrue(apb.type.dtype == a.dtype, apb.type.dtype)
-                self.assertTrue(apb.type.dtype == bR.type.dtype, apb.type.dtype)
+                self.assertTrue(apb.type.format == aR.type.format, apb.type.format)
+                self.assertTrue(apb.type.format == bR.type.format, apb.type.format)

                val = eval_outputs([apb])
                self.assertTrue(val.shape == (3, 2))
                if op is add:
-                    self.assertTrue(_is_dense_variable(apb))
-                    self.assertTrue(numpy.all(val == (array1 + b)))
-                    ans = numpy.array([[1., 2], [3, 4], [5, 6]])
-                    self.assertTrue(numpy.all(val == ans))
+                    self.assertTrue(numpy.all(val.todense() == (array1 + array2)))
+                    if dtype1.startswith('float') and dtype2.startswith('float'):
+                        verify_grad_sparse(op, [a, b], structured=False)
                elif op is mul:
-                    self.assertTrue(_is_sparse_variable(apb))
-                    self.assertTrue(numpy.all(val.todense() == (b.multiply(array1))))
-                    self.assertTrue(numpy.all(val.todense() == numpy.array(
-                        [[1, 0], [9, 0], [0, 36]])))
+                    self.assertTrue(numpy.all(val.todense()
+                                              == (array1 * array2)))
+                    if dtype1.startswith('float') and dtype2.startswith('float'):
+                        verify_grad_sparse(op, [a, b], structured=False)

-    def _testDS(self, op, array1=numpy.array([[1., 0], [3, 0], [0, 6]]),
+    def _testSD(self, op, array1=numpy.array([[1., 0], [3, 0], [0, 6]]),
                array2=numpy.asarray([[0, 2.], [0, 4], [5, 0]])):
        for mtype in _mtypes:
-            for b in [numpy.asarray(array2), tensor.as_tensor_variable(array2)]:
-                a = mtype(array1)
-                aR = as_sparse_variable(a)
-                self.assertFalse(aR.data is a)
-                self.assertTrue(_is_sparse(a))
-                self.assertTrue(_is_sparse_variable(aR))
+            for a in [numpy.array(array1), tensor.as_tensor_variable(array1),
+                      theano.shared(array1)]:
+                for dtype1, dtype2 in [('float64', 'int8'),
+                                       ('int8', 'float64'),
+                                   ]:
+                    a = a.astype(dtype1)
+                    b = mtype(array2).astype(dtype2)
+                    bR = as_sparse_variable(b)
+                    self.assertFalse(bR.data is b)  # constants are copied
+                    self.assertTrue(_is_sparse(b))
+                    self.assertTrue(_is_sparse_variable(bR))
+
+                    apb = op(a, bR)
+
+                    val = eval_outputs([apb])
+                    self.assertTrue(val.shape == (3, 2))
+                    if op is add:
+                        self.assertTrue(_is_dense_variable(apb))
+                        self.assertTrue(numpy.all(val == (array1 + b)))
+                        ans = numpy.array([[1., 2], [3, 4], [5, 6]])
+                        self.assertTrue(numpy.all(val == ans))
+                        if isinstance(a, theano.Constant):
+                            a = a.data
+                        if dtype1.startswith('float') and dtype2.startswith('float'):
+                            verify_grad_sparse(op, [a, b], structured=True)
+                    elif op is mul:
+                        self.assertTrue(_is_sparse_variable(apb))
+                        self.assertTrue(numpy.all(val.todense() == (b.multiply(array1))))
+                        self.assertTrue(numpy.all(val.todense() == numpy.array(
+                            [[1, 0], [9, 0], [0, 36]])))
+                        if isinstance(a, theano.Constant):
+                            a = a.data
+                        if dtype1.startswith('float') and dtype2.startswith('float'):
+                            verify_grad_sparse(op, [a, b], structured=False)

-                apb = op(aR, b)
-
-                self.assertTrue(apb.type.dtype == aR.type.dtype, apb.type.dtype)
-                self.assertTrue(apb.type.dtype == b.dtype, apb.type.dtype)
-
-                val = eval_outputs([apb])
-                self.assertTrue(val.shape == (3, 2))
-                if op is add:
-                    self.assertTrue(_is_dense_variable(apb))
-                    self.assertTrue(numpy.all(val == (a + array2)))
-                    ans = numpy.array([[1., 2], [3, 4], [5, 6]])
-                    self.assertTrue(numpy.all(val == ans))
-                elif op is mul:
-                    self.assertTrue(_is_sparse_variable(apb))
-                    ans = numpy.array([[1, 0], [9, 0], [0, 36]])
-                    self.assertTrue(numpy.all(val.todense() == (a.multiply(array2))))
-                    self.assertTrue(numpy.all(val.todense() == ans))
-
-    def test_upcast(self):
-        array1 = numpy.array([[1, 0], [3, 0], [0, 6]], dtype='float32')
-        array2 = numpy.array([[1, 0], [3, 0], [0, 6]], dtype='int32')
-        array3 = numpy.array([[1, 0], [3, 0], [0, 6]], dtype='int8')
-
-        # AddSS and MulSS
-        for mtype in _mtypes:
-            a = mtype(array1)
-            aR = as_sparse_variable(a)
-            b = mtype(array2)
-            bR = as_sparse_variable(b)
-            c = mtype(array3)
-            cR = as_sparse_variable(c)
-
-            # Ops that do not upcast
-            self.assertRaises(NotImplementedError, add, aR, bR)
-            self.assertRaises(NotImplementedError, add, bR, aR)
-            self.assertRaises(NotImplementedError, add, bR, cR)
-            self.assertRaises(NotImplementedError, add, cR, bR)
-            self.assertRaises(NotImplementedError, add, aR, cR)
-            self.assertRaises(NotImplementedError, add, cR, aR)
-
-            self.assertRaises(NotImplementedError, mul, aR, bR)
-            self.assertRaises(NotImplementedError, mul, bR, aR)
-            self.assertRaises(NotImplementedError, mul, bR, cR)
-            self.assertRaises(NotImplementedError, mul, cR, bR)
-            self.assertRaises(NotImplementedError, mul, aR, cR)
-            self.assertRaises(NotImplementedError, mul, cR, aR)
-
-        # AddSD and MulSD
+    def _testDS(self, op, array1=numpy.array([[1., 0], [3, 0], [0, 6]]),
+                array2=numpy.asarray([[0, 2.], [0, 4], [5, 0]])):
        for mtype in _mtypes:
-            a = mtype(array1)
-            a_sv = as_sparse_variable(a)
-            a_dv = tensor.as_tensor_variable(array1)
-            b = mtype(array2)
-            b_sv = as_sparse_variable(b)
-            b_dv = tensor.as_tensor_variable(array2)
-            c = mtype(array3)
-            c_sv = as_sparse_variable(c)
-            c_dv = tensor.as_tensor_variable(array3)
-
-            # add does not upcast
-            self.assertRaises(NotImplementedError, add, a_sv, b_dv)
-            self.assertRaises(NotImplementedError, add, b_sv, a_dv)
-            self.assertRaises(NotImplementedError, add, b_sv, c_dv)
-            self.assertRaises(NotImplementedError, add, c_sv, b_dv)
-            self.assertRaises(NotImplementedError, add, a_sv, c_dv)
-            self.assertRaises(NotImplementedError, add, c_sv, a_dv)
-
-            # mul may upcast the dense input if needed
-            if (config.cast_policy in ('custom', 'numpy') or
-                (config.cast_policy == 'numpy+floatX' and
-                 config.floatX == 'float64')):
-                # The result should be a float64 (not implemented).
-                self.assertRaises(NotImplementedError, mul, a_sv, b_dv)
-            elif (config.cast_policy == 'numpy+floatX' and
-                  config.floatX == 'float32'):
-                # The result should be a float32.
-                assert mul(a_sv, b_dv).dtype == 'float32'
-            else:
-                raise NotImplementedError()
-            self.assertRaises(NotImplementedError, mul, b_sv, a_dv)
-            assert mul(b_sv, c_dv).dtype == 'int32'
-            self.assertRaises(NotImplementedError, mul, c_sv, b_dv)
-            assert mul(a_sv, c_dv).dtype == 'float32'
-            self.assertRaises(NotImplementedError, mul, c_sv, a_dv)
+            for b in [numpy.asarray(array2),
+                      tensor.as_tensor_variable(array2),
+                      theano.shared(array2)]:
+                for dtype1, dtype2 in [('float64', 'int8'),
+                                       ('int8', 'float64'),
+                                   ]:
+                    a = mtype(array1).astype(dtype1)
+                    aR = as_sparse_variable(a)
+                    self.assertFalse(aR.data is a)
+                    self.assertTrue(_is_sparse(a))
+                    self.assertTrue(_is_sparse_variable(aR))
+                    b = b.astype(dtype2)
+
+                    apb = op(aR, b)
+
+                    val = eval_outputs([apb])
+                    self.assertTrue(val.shape == (3, 2))
+                    if op is add:
+                        self.assertTrue(_is_dense_variable(apb))
+                        self.assertTrue(numpy.all(val == (a + array2)))
+                        ans = numpy.array([[1., 2], [3, 4], [5, 6]])
+                        self.assertTrue(numpy.all(val == ans))
+                        if isinstance(b, theano.Constant):
+                            b = b.data
+                        if dtype1.startswith('float') and dtype2.startswith('float'):
+                            verify_grad_sparse(op, [a, b], structured=True)
+                    elif op is mul:
+                        self.assertTrue(_is_sparse_variable(apb))
+                        ans = numpy.array([[1, 0], [9, 0], [0, 36]])
+                        self.assertTrue(numpy.all(val.todense() == (a.multiply(array2))))
+                        self.assertTrue(numpy.all(val.todense() == ans))
+                        if isinstance(b, theano.Constant):
+                            b = b.data
+                        if dtype1.startswith('float') and dtype2.startswith('float'):
+                            verify_grad_sparse(op, [a, b], structured=False)


 class T_conversion(unittest.TestCase):