Make AddSD_ccode work and tested.

c4475173 · Frederic · ae398862 · c4475173 · c4475173 · c4475173
--- a/theano/sparse/basic.py
+++ b/theano/sparse/basic.py
@@ -1755,19 +1755,14 @@ class AddSD(gof.op.Op):
    def make_node(self, x, y):
        x, y = as_sparse_variable(x), tensor.as_tensor_variable(y)
+        out_dtype = scalar.upcast(x.type.dtype, y.type.dtype)
-        if x.type.dtype != y.type.dtype:
-            raise NotImplementedError(
-                "AddSD support inputs with the same dtype only."
-                " You passed %s and %s inputs dtype." % (x.type.dtype,
-                                                         y.type.dtype))
        # The magic number two here arises because L{scipy.sparse}
        # objects must be matrices (have dimension 2)
        assert y.type.ndim == 2
        return gof.Apply(self,
                         [x, y],
-                         [tensor.TensorType(dtype=y.type.dtype,
+                         [tensor.TensorType(dtype=out_dtype,
                                            broadcastable=y.type.broadcastable
                                           ).make_variable()])
@@ -1975,23 +1970,25 @@ class MulSD(gof.op.Op):
        # upcast the tensor. Is the cast of sparse done implemented?
        dtype = scalar.upcast(x.type.dtype, y.type.dtype)
-        if y.type.dtype != dtype:
-            y = tensor.cast(y, dtype)
-        if x.type.dtype != y.type.dtype:
-            raise NotImplementedError(
-                "MulSD not implemented for different input dtypes. "
-                "Got %s and %s." % (x.type.dtype, y.type.dtype))
        # The magic number two here arises because L{scipy.sparse}
        # objects must be matrices (have dimension 2)
        # Broadcasting of the sparse matrix is not supported.
-        assert y.type.ndim <= 2
+        # We support nd == 0 used by grad of SpSum()
-        return gof.Apply(self, [x, y], [x.type()])
+        assert y.type.ndim in [0, 2]
+        out = SparseType(dtype=dtype,
+                         format=x.type.format)()
+        return gof.Apply(self, [x, y], [out])
    def perform(self, node, (x, y), (out, )):
        assert _is_sparse(x) and _is_dense(y)
        if len(y.shape) == 0:
-            out[0] = x.copy()
+            out_dtype = node.outputs[0].dtype
+            if x.dtype == out_dtype:
+                z = x.copy()
+            else:
+                z = x.astype(out_dtype)
+            out[0] = z
            out[0].data *= y
        elif len(y.shape) == 1:
            raise NotImplementedError()  # RowScale / ColScale
@@ -2001,12 +1998,16 @@ class MulSD(gof.op.Op):
            # TODO: change runtime from O(M*N) to O(nonzeros)
            M, N = x.shape
            assert x.shape == y.shape
+            out_dtype = node.outputs[0].dtype
            if x.format == 'csc':
                x_data = x.data
                indices = x.indices
                indptr = x.indptr
+                if x.dtype == out_dtype:
                    z = x.copy()
+                else:
+                    z = x.astype(out_dtype)
                z_data = z.data
                for j in xrange(0, N):
@@ -2018,7 +2019,10 @@ class MulSD(gof.op.Op):
                x_data = x.data
                indices = x.indices
                indptr = x.indptr
+                if x.dtype == out_dtype:
                    z = x.copy()
+                else:
+                    z = x.astype(out_dtype)
                z_data = z.data
                for i in xrange(0, M):

--- a/theano/sparse/opt.py
+++ b/theano/sparse/opt.py
@@ -87,12 +87,9 @@ class AddSD_ccode(gof.op.Op):
    def make_node(self, x, y):
        x, y = sparse.as_sparse_variable(x), tensor.as_tensor_variable(y)
+        out_dtype = scalar.upcast(x.type.dtype, y.type.dtype)
-        if x.type.dtype != y.type.dtype:
+        if self.inplace:
-            raise NotImplementedError(
+            assert out_dtype == y.dtype
-                "AddSD support inputs with the same dtype only."
-                " You passed %s and %s inputs dtype." % (x.type.dtype,
-                                                         y.type.dtype))
        indices, indptr, data = csm_indices(x), csm_indptr(x), csm_data(x)
        # We either use CSC or CSR depending on the format of input
@@ -100,7 +97,7 @@ class AddSD_ccode(gof.op.Op):
        # The magic number two here arises because L{scipy.sparse}
        # objects must be matrices (have dimension 2)
        assert y.type.ndim == 2
-        out = tensor.TensorType(dtype=y.type.dtype,
+        out = tensor.TensorType(dtype=out_dtype,
                                broadcastable=y.type.broadcastable)()
        return gof.Apply(self,
                         [data, indices, indptr, y],
@@ -109,10 +106,15 @@ class AddSD_ccode(gof.op.Op):
    def c_code(self, node, name, (_data, _indices, _indptr, y), (z, ), sub):
        inplace = int(self.inplace)
        format = {'csc': 0, 'csr': 1}[self.format]
+        out_typenum = node.outputs[0].type.dtype_specs()[2]
        code = """
                Py_XDECREF(%(z)s);
                if (!%(inplace)s){
+                    if(PyArray_TYPE(%(y)s) != %(out_typenum)s){
+                        %(z)s = (PyArrayObject *) PyArray_FromArray(%(y)s,  PyArray_DescrFromType(%(out_typenum)s), 0);
+                    }else{
                        %(z)s = (PyArrayObject *) PyArray_NewCopy(%(y)s, NPY_CORDER);
+                    }
                }else{
                  %(z)s = %(y)s;
                  Py_XINCREF(%(z)s);
@@ -162,6 +164,9 @@ def local_inplace_addsd_ccode(node):
    Optimization to insert inplace versions of AddSD.
    """
    if isinstance(node.op, sparse.AddSD) and theano.config.cxx:
+        out_dtype = scalar.upcast(*node.inputs)
+        if out_dtype != node.inputs[1].dtype:
+            return
        new_node = AddSD_ccode(format=node.inputs[0].type.format,
                               inplace=True)(*node.inputs)
        return [new_node]
@@ -178,7 +183,6 @@ def local_addsd_ccode(node):
    Convert AddSD to faster AddSD_ccode.
    """
    if isinstance(node.op, sparse.AddSD) and theano.config.cxx:
-        #import pdb;pdb.set_trace()
        new_node = AddSD_ccode(format=node.inputs[0].type.format)(*node.inputs)
        return [new_node]
    return False
@@ -1254,6 +1258,9 @@ def local_mul_s_d(node):
            mul_s_d_csx = mul_s_d_csr
        else:
            raise NotImplemented()
+        if x.dtype != y.dtype:
+            #mul_s_d_csx don't support that case
+            return
        c_data = mul_s_d_csx(sparse.csm_data(svar),
                             sparse.csm_indices(svar),

--- a/theano/sparse/tests/test_basic.py
+++ b/theano/sparse/tests/test_basic.py
@@ -325,7 +325,7 @@ class SparseInferShapeTester(utt.InferShapeTester):
                [sp.csr_matrix(random_lil((10, 40),
                               config.floatX, 3)),
                 numpy.random.randn(10, 40).astype(config.floatX)],
-                AddSD)
+                (AddSD, sparse.opt.AddSD_ccode))
    def test_mul_ss(self):
        x = SparseType('csr', dtype=config.floatX)()
@@ -572,8 +572,13 @@ class T_AddMul(unittest.TestCase):
    def _testSD(self, op, array1=numpy.array([[1., 0], [3, 0], [0, 6]]),
                array2=numpy.asarray([[0, 2.], [0, 4], [5, 0]])):
        for mtype in _mtypes:
-            for a in [numpy.array(array1), tensor.as_tensor_variable(array1)]:
+            for a in [numpy.array(array1), tensor.as_tensor_variable(array1),
-                b = mtype(array2)
+                      theano.shared(array1)]:
+                for dtype1, dtype2 in [('float64', 'int8'),
+                                       ('int8', 'float64'),
+                                   ]:
+                    a = a.astype(dtype1)
+                    b = mtype(array2).astype(dtype2)
                    bR = as_sparse_variable(b)
                    self.assertFalse(bR.data is b)  # constants are copied
                    self.assertTrue(_is_sparse(b))
@@ -581,9 +586,6 @@ class T_AddMul(unittest.TestCase):
                    apb = op(a, bR)
-                self.assertTrue(apb.type.dtype == a.dtype, apb.type.dtype)
-                self.assertTrue(apb.type.dtype == bR.type.dtype, apb.type.dtype)
                    val = eval_outputs([apb])
                    self.assertTrue(val.shape == (3, 2))
                    if op is add:
@@ -593,6 +595,7 @@ class T_AddMul(unittest.TestCase):
                        self.assertTrue(numpy.all(val == ans))
                        if isinstance(a, theano.Constant):
                            a = a.data
+                        if dtype1.startswith('float') and dtype2.startswith('float'):
                            verify_grad_sparse(op, [a, b], structured=True)
                    elif op is mul:
                        self.assertTrue(_is_sparse_variable(apb))
@@ -601,23 +604,27 @@ class T_AddMul(unittest.TestCase):
                            [[1, 0], [9, 0], [0, 36]])))
                        if isinstance(a, theano.Constant):
                            a = a.data
+                        if dtype1.startswith('float') and dtype2.startswith('float'):
                            verify_grad_sparse(op, [a, b], structured=False)
    def _testDS(self, op, array1=numpy.array([[1., 0], [3, 0], [0, 6]]),
                array2=numpy.asarray([[0, 2.], [0, 4], [5, 0]])):
        for mtype in _mtypes:
-            for b in [numpy.asarray(array2), tensor.as_tensor_variable(array2)]:
+            for b in [numpy.asarray(array2),
-                a = mtype(array1)
+                      tensor.as_tensor_variable(array2),
+                      theano.shared(array2)]:
+                for dtype1, dtype2 in [('float64', 'int8'),
+                                       ('int8', 'float64'),
+                                   ]:
+                    a = mtype(array1).astype(dtype1)
                    aR = as_sparse_variable(a)
                    self.assertFalse(aR.data is a)
                    self.assertTrue(_is_sparse(a))
                    self.assertTrue(_is_sparse_variable(aR))
+                    b = b.astype(dtype2)
                    apb = op(aR, b)
-                self.assertTrue(apb.type.dtype == aR.type.dtype, apb.type.dtype)
-                self.assertTrue(apb.type.dtype == b.dtype, apb.type.dtype)
                    val = eval_outputs([apb])
                    self.assertTrue(val.shape == (3, 2))
                    if op is add:
@@ -627,6 +634,7 @@ class T_AddMul(unittest.TestCase):
                        self.assertTrue(numpy.all(val == ans))
                        if isinstance(b, theano.Constant):
                            b = b.data
+                        if dtype1.startswith('float') and dtype2.startswith('float'):
                            verify_grad_sparse(op, [a, b], structured=True)
                    elif op is mul:
                        self.assertTrue(_is_sparse_variable(apb))
@@ -635,53 +643,9 @@ class T_AddMul(unittest.TestCase):
                        self.assertTrue(numpy.all(val.todense() == ans))
                        if isinstance(b, theano.Constant):
                            b = b.data
+                        if dtype1.startswith('float') and dtype2.startswith('float'):
                            verify_grad_sparse(op, [a, b], structured=False)
-    def test_upcast(self):
-        array1 = numpy.array([[1, 0], [3, 0], [0, 6]], dtype='float32')
-        array2 = numpy.array([[1, 0], [3, 0], [0, 6]], dtype='int32')
-        array3 = numpy.array([[1, 0], [3, 0], [0, 6]], dtype='int8')
-        # AddSS and MulSS upcated tested in _testSS
-        # AddSD and MulSD
-        for mtype in _mtypes:
-            a = mtype(array1)
-            a_sv = as_sparse_variable(a)
-            a_dv = tensor.as_tensor_variable(array1)
-            b = mtype(array2)
-            b_sv = as_sparse_variable(b)
-            b_dv = tensor.as_tensor_variable(array2)
-            c = mtype(array3)
-            c_sv = as_sparse_variable(c)
-            c_dv = tensor.as_tensor_variable(array3)
-            # add does not upcast
-            self.assertRaises(NotImplementedError, add, a_sv, b_dv)
-            self.assertRaises(NotImplementedError, add, b_sv, a_dv)
-            self.assertRaises(NotImplementedError, add, b_sv, c_dv)
-            self.assertRaises(NotImplementedError, add, c_sv, b_dv)
-            self.assertRaises(NotImplementedError, add, a_sv, c_dv)
-            self.assertRaises(NotImplementedError, add, c_sv, a_dv)
-            # mul may upcast the dense input if needed
-            if (config.cast_policy in ('custom', 'numpy') or
-                (config.cast_policy == 'numpy+floatX' and
-                 config.floatX == 'float64')):
-                # The result should be a float64 (not implemented).
-                self.assertRaises(NotImplementedError, mul, a_sv, b_dv)
-            elif (config.cast_policy == 'numpy+floatX' and
-                  config.floatX == 'float32'):
-                # The result should be a float32.
-                assert mul(a_sv, b_dv).dtype == 'float32'
-            else:
-                raise NotImplementedError()
-            self.assertRaises(NotImplementedError, mul, b_sv, a_dv)
-            assert mul(b_sv, c_dv).dtype == 'int32'
-            self.assertRaises(NotImplementedError, mul, c_sv, b_dv)
-            assert mul(a_sv, c_dv).dtype == 'float32'
-            self.assertRaises(NotImplementedError, mul, c_sv, a_dv)
 class T_conversion(unittest.TestCase):
    def setUp(self):