Merge pull request #157 from nouiz/fix_sparse_dot

Fix sparse dot

Merge pull request #157 from nouiz/fix_sparse_dot
3098fe8a · goodfeli · a87e9bb0 · f79c3b87 · 3098fe8a · 3098fe8a
--- a/theano/compile/debugmode.py
+++ b/theano/compile/debugmode.py
@@ -136,7 +136,9 @@ class BadCLinkerOutput(DebugModeError):
        sio = StringIO()
        print >> sio, "BadCLinkerOutput"
        print >> sio, "  variable:", self.r
-        print >> sio, "  Type    :", self.r.type
+        print >> sio, "  Outputs Type    :", self.r.type
+        print >> sio, "  Inputs Type:", [i.type for i in self.r.owner.inputs]
+        print >> sio, "  Apply   :", self.r.owner
        print >> sio, "  val_py  :", self.val_py
        print >> sio, "  val_c   :", self.val_c
        print >> sio, "  op      :", self.offending_op()

--- a/theano/sparse/basic.py
+++ b/theano/sparse/basic.py
@@ -327,6 +327,16 @@ class SparseType(gof.Type):
        return scipy.sparse.issparse(a) and (a.format == self.format)

 # for more dtypes, call SparseType(format, dtype)
+def matrix(format, name=None, dtype=None):
+    if dtype is None:
+        dtype = config.floatX
+    type = SparseType(format=format, dtype=dtype)
+    return type(name)
+def csc_matrix(name=None, dtype=None):
+    return matrix('csc', name, dtype)
+def csr_matrix(name=None, dtype=None):
+    return matrix('csr', name, dtype)
+# for more dtypes, call SparseType(format, dtype)
 csc_matrix = SparseType(format='csc', dtype=config.floatX)
 csr_matrix = SparseType(format='csr', dtype=config.floatX)
 csc_dmatrix = SparseType(format='csc', dtype='float64')
@@ -1505,7 +1515,7 @@ class Dot(gof.op.Op):
        rval = x * y

        if x_is_sparse and y_is_sparse:
-            rval = rval.todense()
+            rval = rval.toarray()

        out[0] = rval

@@ -1553,6 +1563,8 @@ class Usmm(gof.op.Op):
    x or y are sparse matrix(the other can be sparse or dense)
    z is a dense matrix
    alpha is a scalar
+
+    :note: We don't implement the infer_shape as it is inserted by optimization only
    """
    def __eq__(self, other):
        return type(self) == type(other)
@@ -1566,19 +1578,6 @@ class Usmm(gof.op.Op):
    def __str__(self):
        return 'Usmm{no_inplace}'

-    def infer_shape(self, node, shapes):
-        xshp, yshp = shapes
-        x, y = node.inputs
-        if x.ndim == 2 and y.ndim == 2:
-            return [(xshp[0], yshp[1])]
-        if x.ndim == 1 and y.ndim == 2:
-            return [(yshp[1],)]
-        if x.ndim == 2 and y.ndim == 1:
-            return [(xshp[0],)]
-        if x.ndim == 1 and y.ndim == 1:
-            return [()]
-        raise NotImplementedError()
-
    def make_node(self, alpha, x, y, z):
        if not _is_sparse_variable(x) and not _is_sparse_variable(y):
            # If x and y are tensor, we don't want to use this class
@@ -1634,6 +1633,8 @@ class UsmmCscDense(gof.Op):
    x are sparse matrix
    y, z is a dense matrix
    alpha is a scalar
+
+    :note: We don't implement the infer_shape as it is inserted by optimization only
    """
    def __init__(self, inplace):
        self.inplace = inplace
@@ -1652,19 +1653,6 @@ class UsmmCscDense(gof.Op):
    def __hash__(self):
        return hash(type(self)) ^ self.inplace

-    def infer_shape(self, node, shapes):
-        xshp, yshp = shapes
-        x, y = node.inputs
-        if x.ndim == 2 and y.ndim == 2:
-            return [(xshp[0], yshp[1])]
-        if x.ndim == 1 and y.ndim == 2:
-            return [(yshp[1],)]
-        if x.ndim == 2 and y.ndim == 1:
-            return [(xshp[0],)]
-        if x.ndim == 1 and y.ndim == 1:
-            return [()]
-        raise NotImplementedError()
-
    def make_node(self, alpha, x_val, x_ind, x_ptr, x_nrows, y, z):
        alpha = tensor.as_tensor_variable(alpha)
        x_val = tensor.as_tensor_variable(x_val)
@@ -1884,6 +1872,7 @@ register_specialize(local_usmm, name="local_usmm")

 @gof.local_optimizer([usmm])
 def local_usmm_csx(node):
+    """ usmm -> usmm_csc_dense """
    if node.op == usmm:
        alpha, x, y, z = node.inputs

@@ -1896,6 +1885,8 @@ def local_usmm_csx(node):
                x_nsparse = x_shape[0]
                dtype_out = scalar.upcast(alpha.type.dtype, x.type.dtype,
                                          y.type.dtype, z.type.dtype)
+                if dtype_out not in ('float32', 'float64'):
+                    return False
                # Sparse cast is not implemented.
                if y.type.dtype != dtype_out:
                    return False

--- a/theano/sparse/tests/test_basic.py
+++ b/theano/sparse/tests/test_basic.py
@@ -7,21 +7,23 @@ try:
    import scipy.sparse as sp
    import scipy.sparse
 except ImportError:
-    pass#the variable enable_sparse will be used to disable the test file.
+    pass  # The variable enable_sparse will be used to disable the test file.

 import theano
 from theano import compile, config
 from theano.sparse import enable_sparse
-from theano.gof.python25 import product
+from theano.gof.python25 import all, product

 if enable_sparse == False:
    raise SkipTest('Optional package sparse disabled')

-from theano.sparse.basic import _is_dense, _is_sparse, _is_dense_variable, _is_sparse_variable
-from theano.sparse.basic import _mtypes
-from theano.sparse import as_sparse_variable, CSC, CSR, CSM, CSMProperties, SparseType, StructuredDotCSC
+from theano.sparse.basic import _is_dense, _is_sparse, _mtypes
+from theano.sparse.basic import _is_dense_variable, _is_sparse_variable
+from theano.sparse import as_sparse_variable, CSC, CSR, CSM, CSMProperties
+from theano.sparse import SparseType, StructuredDotCSC
 from theano.sparse import add, mul, structured_dot, transpose
 from theano.sparse import csc_from_dense, csr_from_dense, dense_from_sparse
+from theano.sparse import Dot, Usmm, UsmmCscDense

 from theano.tests import unittest_tools as utt
 from theano import tensor
@@ -40,30 +42,32 @@ def as_sparse_format(data, format):
 def eval_outputs(outputs):
    return compile.function([], outputs)()[0]

+
 def random_lil(shape, dtype, nnz):
    rval = sp.lil_matrix(shape, dtype=dtype)
-    huge = 2**30
+    huge = 2 ** 30
    for k in range(nnz):
        # set non-zeros in random locations (row x, col y)
-        idx = numpy.random.random_integers(huge,size=len(shape)) % shape
+        idx = numpy.random.random_integers(huge, size=len(shape)) % shape
        value = numpy.random.rand()
        #if dtype *int*, value will always be zeros!
        if "int" in dtype:
-            value = int(value*100)
+            value = int(value * 100)
        rval.__setitem__(
                idx,
                value)
    return rval

+
 class T_transpose(unittest.TestCase):
    def setUp(self):
        utt.seed_rng()

    def test_transpose_csc(self):
-        sp = scipy.sparse.csc_matrix(scipy.sparse.eye(5,3))
+        sp = scipy.sparse.csc_matrix(scipy.sparse.eye(5, 3))
        a = as_sparse_variable(sp)
        self.assertFalse(a.data is sp)
-        self.assertTrue(a.data.shape == (5,3))
+        self.assertTrue(a.data.shape == (5, 3))
        self.assertTrue(a.type.dtype == 'float64', a.type.dtype)
        self.assertTrue(a.type.format == 'csc', a.type.format)
        ta = transpose(a)
@@ -71,10 +75,11 @@ class T_transpose(unittest.TestCase):
        self.assertTrue(ta.type.format == 'csr', ta.type.format)

        vta = eval_outputs([ta])
-        self.assertTrue(vta.shape == (3,5))
+        self.assertTrue(vta.shape == (3, 5))
+
    def test_transpose_csr(self):
-        a = as_sparse_variable(scipy.sparse.csr_matrix(scipy.sparse.eye(5,3)))
-        self.assertTrue(a.data.shape == (5,3))
+        a = as_sparse_variable(scipy.sparse.csr_matrix(scipy.sparse.eye(5, 3)))
+        self.assertTrue(a.data.shape == (5, 3))
        self.assertTrue(a.type.dtype == 'float64')
        self.assertTrue(a.type.format == 'csr')
        ta = transpose(a)
@@ -82,13 +87,16 @@ class T_transpose(unittest.TestCase):
        self.assertTrue(ta.type.format == 'csc', ta.type.format)

        vta = eval_outputs([ta])
-        self.assertTrue(vta.shape == (3,5))
+        self.assertTrue(vta.shape == (3, 5))
+

 class T_AddMul(unittest.TestCase):
    def testAddSS(self):
        self._testSS(add)
+
    def testAddSD(self):
        self._testSD(add)
+
    def testAddDS(self):
        self._testDS(add)

@@ -96,17 +104,19 @@ class T_AddMul(unittest.TestCase):
        self._testSS(mul,
                     numpy.array([[1., 0], [3, 0], [0, 6]]),
                     numpy.array([[1., 0], [3, 0], [0, 6]]))
+
    def testMulSD(self):
        self._testSD(mul,
                     numpy.array([[1., 0], [3, 0], [0, 6]]),
                     numpy.array([[1., 0], [3, 0], [0, 6]]))
+
    def testMulDS(self):
        self._testDS(mul,
                     numpy.array([[1., 0], [3, 0], [0, 6]]),
                     numpy.array([[1., 0], [3, 0], [0, 6]]))

-    def _testSS(self, op, array1 = numpy.array([[1., 0], [3, 0], [0, 6]]),
-                array2 = numpy.asarray([[0, 2.], [0, 4], [5, 0]])):
+    def _testSS(self, op, array1=numpy.array([[1., 0], [3, 0], [0, 6]]),
+                array2=numpy.asarray([[0, 2.], [0, 4], [5, 0]])):
        for mtype in _mtypes:
            a = mtype(array1)
            aR = as_sparse_variable(a)
@@ -129,26 +139,29 @@ class T_AddMul(unittest.TestCase):
            self.assertTrue(apb.type.format == bR.type.format, apb.type.format)

            val = eval_outputs([apb])
-            self.assertTrue(val.shape == (3,2))
+            self.assertTrue(val.shape == (3, 2))
            if op is add:
                self.assertTrue(numpy.all(val.todense() == (a + b).todense()))
-                self.assertTrue(numpy.all(val.todense() == numpy.array([[1., 2], [3, 4], [5, 6]])))
+                ans = numpy.array([[1., 2], [3, 4], [5, 6]])
+                self.assertTrue(numpy.all(val.todense() == ans))
            elif op is mul:
-                self.assertTrue(numpy.all(val.todense() == (a.multiply(b)).todense()))
-                self.assertTrue(numpy.all(val.todense() == numpy.array([[1, 0], [9, 0], [0, 36]])))
+                self.assertTrue(numpy.all(val.todense()
+                                          == (a.multiply(b)).todense()))
+                ans = numpy.array([[1, 0], [9, 0], [0, 36]])
+                self.assertTrue(numpy.all(val.todense() == ans))

-    def _testSD(self, op, array1 = numpy.array([[1., 0], [3, 0], [0, 6]]),
-                array2 = numpy.asarray([[0, 2.], [0, 4], [5, 0]])):
+    def _testSD(self, op, array1=numpy.array([[1., 0], [3, 0], [0, 6]]),
+                array2=numpy.asarray([[0, 2.], [0, 4], [5, 0]])):
        for mtype in _mtypes:
            a = numpy.array(array1)
            aR = tensor.as_tensor_variable(a)
-            self.assertFalse(aR.data is a) #constants are copied
+            self.assertFalse(aR.data is a)  # constants are copied
            self.assertTrue(_is_dense(a))
            self.assertTrue(_is_dense_variable(aR))

            b = mtype(array2)
            bR = as_sparse_variable(b)
-            self.assertFalse(bR.data is b) #constants are copied
+            self.assertFalse(bR.data is b)  # constants are copied
            self.assertTrue(_is_sparse(b))
            self.assertTrue(_is_sparse_variable(bR))

@@ -162,15 +175,16 @@ class T_AddMul(unittest.TestCase):
            if op is add:
                self.assertTrue(_is_dense_variable(apb))
                self.assertTrue(numpy.all(val == (a + b)))
-                self.assertTrue(numpy.all(val == numpy.array([[1., 2], [3, 4], [5, 6]])))
+                ans = numpy.array([[1., 2], [3, 4], [5, 6]])
+                self.assertTrue(numpy.all(val == ans))
            elif op is mul:
                self.assertTrue(_is_sparse_variable(apb))
                self.assertTrue(numpy.all(val.todense() == (b.multiply(a))))
                self.assertTrue(numpy.all(val.todense() == numpy.array([[1, 0],
 [9, 0], [0, 36]])))

-    def _testDS(self, op, array1 = numpy.array([[1., 0], [3, 0], [0, 6]]),
-                array2 = numpy.asarray([[0, 2.], [0, 4], [5, 0]])):
+    def _testDS(self, op, array1=numpy.array([[1., 0], [3, 0], [0, 6]]),
+                array2=numpy.asarray([[0, 2.], [0, 4], [5, 0]])):
        for mtype in _mtypes:
            a = mtype(array1)
            aR = as_sparse_variable(a)
@@ -194,12 +208,13 @@ class T_AddMul(unittest.TestCase):
            if op is add:
                self.assertTrue(_is_dense_variable(apb))
                self.assertTrue(numpy.all(val == (a + b)))
-                self.assertTrue(numpy.all(val == numpy.array([[1., 2], [3, 4], [5, 6]])))
+                ans = numpy.array([[1., 2], [3, 4], [5, 6]])
+                self.assertTrue(numpy.all(val == ans))
            elif op is mul:
                self.assertTrue(_is_sparse_variable(apb))
+                ans = numpy.array([[1, 0], [9, 0], [0, 36]])
                self.assertTrue(numpy.all(val.todense() == (a.multiply(b))))
-                self.assertTrue(numpy.all(val.todense() == numpy.array([[1, 0],
-[9, 0], [0, 36]])))
+                self.assertTrue(numpy.all(val.todense() == ans))

    def test_upcast(self):
        array1 = numpy.array([[1, 0], [3, 0], [0, 6]], dtype='float32')
@@ -278,7 +293,7 @@ class T_conversion(unittest.TestCase):
            a = tensor.as_tensor_variable(numpy.random.rand(5))
            s = csc_from_dense(a)
            val = eval_outputs([s])
-            self.assertTrue(str(val.dtype)=='float64')
+            self.assertTrue(str(val.dtype) == 'float64')
            self.assertTrue(val.format == 'csc')

    if 0:
@@ -286,7 +301,7 @@ class T_conversion(unittest.TestCase):
            a = tensor.as_tensor_variable(numpy.random.rand(5))
            s = csr_from_dense(a)
            val = eval_outputs([s])
-            self.assertTrue(str(val.dtype)=='float64')
+            self.assertTrue(str(val.dtype) == 'float64')
            self.assertTrue(val.format == 'csr')

    if 1:
@@ -296,25 +311,27 @@ class T_conversion(unittest.TestCase):
                s = t(scipy.sparse.identity(5))
                d = dense_from_sparse(s)
                # s should be copied into the graph as a constant
-                s[0,0] = 3.0 # changes s, but not the copy
+                s[0, 0] = 3.0  # changes s, but not the copy
                val = eval_outputs([d])
                return
-                self.assertTrue(str(val.dtype)==s.dtype)
-                self.assertTrue(numpy.all(val[0] == [1,0,0,0,0]))
+                self.assertTrue(str(val.dtype) == s.dtype)
+                self.assertTrue(numpy.all(val[0] == [1, 0, 0, 0, 0]))
+

 class test_structureddot(unittest.TestCase):
    def setUp(self):
        utt.seed_rng()
+
    def test_structureddot_csc_grad(self):

        #shortcut: testing csc in float32, testing csr in float64

        # allocate a random sparse matrix
-        spmat = sp.csc_matrix(random_lil((4,3), 'float32', 3))
+        spmat = sp.csc_matrix(random_lil((4, 3), 'float32', 3))

-        mat = numpy.asarray(numpy.random.randn(3,2), 'float32')
+        mat = numpy.asarray(numpy.random.randn(3, 2), 'float32')

-        def buildgraphCSC(spdata,sym_mat):
+        def buildgraphCSC(spdata, sym_mat):
            csc = CSC(spdata, spmat.indices[:spmat.size],
                    spmat.indptr, spmat.shape)
            assert csc.type.dtype == 'float32'
@@ -330,11 +347,11 @@ class test_structureddot(unittest.TestCase):
        #shortcut: testing csc in float32, testing csr in float64

        # allocate a random sparse matrix
-        spmat = sp.csr_matrix(random_lil((4,3), 'float64', 3))
+        spmat = sp.csr_matrix(random_lil((4, 3), 'float64', 3))

-        mat = numpy.asarray(numpy.random.randn(3,2), 'float64')
+        mat = numpy.asarray(numpy.random.randn(3, 2), 'float64')

-        def buildgraph(spdata,sym_mat):
+        def buildgraph(spdata, sym_mat):
            csr = CSR(spdata, spmat.indices[:spmat.size],
                    spmat.indptr, spmat.shape)
            assert csr.type.dtype == 'float64'
@@ -347,28 +364,30 @@ class test_structureddot(unittest.TestCase):

    def test_upcast(self):

-        typenames = 'float32', 'int64', 'int8', 'int32', 'int16', 'float64', 'complex64', 'complex128'
+        typenames = ('float32', 'int64', 'int8', 'int32',
+                     'int16', 'float64', 'complex64', 'complex128')
        for dense_dtype in typenames:
            for sparse_dtype in typenames:
                correct_dtype = theano.scalar.upcast(sparse_dtype, dense_dtype)
                a = SparseType('csc', dtype=sparse_dtype)()
                b = tensor.matrix(dtype=dense_dtype)
-                d = structured_dot(a,b)
+                d = structured_dot(a, b)
                assert d.type.dtype == correct_dtype

                # compile and run a function

-                f = theano.function([a,b],d)
+                f = theano.function([a, b], d)

-                M,N,K,nnz = (4,3,5,3)
-                spmat = sp.csc_matrix(random_lil((M,N), sparse_dtype, nnz))
+                M, N, K, nnz = (4, 3, 5, 3)
+                spmat = sp.csc_matrix(random_lil((M, N), sparse_dtype, nnz))
                # the following madness is necessary to workaround
                # an intc vs. int32 bug.
                # The lil makes an intc on my computer when sparse_dtype
                # is int32.
                spmat.dtype = numpy.dtype(sparse_dtype)
-                mat = numpy.asarray(numpy.random.randn(N,K)*9, dtype=dense_dtype)
-                print 'DTYPES', sparse_dtype,dense_dtype
+                mat = numpy.asarray(numpy.random.randn(N, K) * 9,
+                                    dtype=dense_dtype)
+                print 'DTYPES', sparse_dtype, dense_dtype
                print 'sym types', a.type, b.type
                print 'dtype strings', spmat.dtype, mat.dtype
                print 'numpy dtype num', mat.dtype.num
@@ -379,29 +398,32 @@ class test_structureddot(unittest.TestCase):
                assert theano_result.dtype == scipy_result.dtype
                assert _allclose(theano_result, scipy_result)

-
    def test_opt_unpack(self):
        #
-        # Test that a graph involving structured_dot(assembled_csc_matrix) is optimized to be
-        # just a structured_dot_csc Op and no assembly of a csc_matrix.
+        # Test that a graph involving
+        # structured_dot(assembled_csc_matrix) is optimized to be just
+        # a structured_dot_csc Op and no assembly of a csc_matrix.
        #
-        # The optimization from structured_dot -> structured_dot_csc is currently disabled,
-        # So this test is not expected to pass
+        # The optimization from structured_dot -> structured_dot_csc
+        # is currently disabled, So this test is not expected to pass

        return
        #
        kerns = tensor.Tensor(dtype='int64', broadcastable=[False])('kerns')
-        spmat = sp.lil_matrix((4,6), dtype='int64')
+        spmat = sp.lil_matrix((4, 6), dtype='int64')
        for i in range(5):
            # set non-zeros in random locations (row x, col y)
-            x = numpy.floor(numpy.random.rand()*spmat.shape[0])
-            y = numpy.floor(numpy.random.rand()*spmat.shape[1])
-            spmat[x,y] = numpy.random.rand()*10
+            x = numpy.floor(numpy.random.rand() * spmat.shape[0])
+            y = numpy.floor(numpy.random.rand() * spmat.shape[1])
+            spmat[x, y] = numpy.random.rand() * 10
        spmat = sp.csc_matrix(spmat)

-        images = tensor.Tensor(dtype='float32', broadcastable=[False, False])('images')
+        images = tensor.Tensor(dtype='float32',
+                               broadcastable=[False, False])(
+            'images')

-        cscmat = CSC(kerns, spmat.indices[:spmat.size], spmat.indptr, spmat.shape)
+        cscmat = CSC(kerns, spmat.indices[:spmat.size],
+                     spmat.indptr, spmat.shape)
        f = theano.function([kerns, images], structured_dot(cscmat, images.T))

        sdcscpresent = False
@@ -414,34 +436,37 @@ class test_structureddot(unittest.TestCase):
        assert sdcscpresent

        kernvals = numpy.array(spmat.data[:spmat.size])
-        #print 'kdtype', kernvals.dtype, kernvals.shape, kernvals.ndim, kernvals.dtype.num
+        #print 'kdtype', kernvals.dtype, kernvals.shape,
+        #print kernvals.ndim, kernvals.dtype.num
        #print 'type of kernvals = ', kernvals.dtype
        bsize = 3
-        imvals = 1.0 * numpy.array(numpy.arange(bsize*spmat.shape[1]).\
-                reshape(bsize,spmat.shape[1]), dtype='float32')
-        outvals = f(kernvals,imvals)
+        imvals = 1.0 * numpy.array(numpy.arange(bsize * spmat.shape[1]).\
+                reshape(bsize, spmat.shape[1]), dtype='float32')
+        outvals = f(kernvals, imvals)
        print outvals

    def test_dot_sparse_sparse(self):
        #test dot for 2 input sparse matrix
        sparse_dtype = 'float64'
-        sp_mat = {'csc':sp.csc_matrix,
-                  'csr':sp.csr_matrix}
+        sp_mat = {'csc': sp.csc_matrix,
+                  'csr': sp.csr_matrix}

-        for sparse_format_a in ['csc','csr']:
+        for sparse_format_a in ['csc', 'csr']:
            for sparse_format_b in ['csc', 'csr']:
                a = SparseType(sparse_format_a, dtype=sparse_dtype)()
                b = SparseType(sparse_format_b, dtype=sparse_dtype)()
-                d = theano.dot(a,b)
-                f = theano.function([a,b], theano.Out(d, borrow=True))
+                d = theano.dot(a, b)
+                f = theano.function([a, b], theano.Out(d, borrow=True))
                topo = f.maker.env.toposort()
-                for M,N,K,nnz in [(4,3,2,3),
-                                  (40,30,20,3),
-                                  (40,30,20,30),
-                                  (400,3000,200,6000),
+                for M, N, K, nnz in [(4, 3, 2, 3),
+                                  (40, 30, 20, 3),
+                                  (40, 30, 20, 30),
+                                  (400, 3000, 200, 6000),
                                  ]:
-                    a_val = sp_mat[sparse_format_a](random_lil((M,N), sparse_dtype, nnz))
-                    b_val = sp_mat[sparse_format_b](random_lil((N,K), sparse_dtype, nnz))
+                    a_val = sp_mat[sparse_format_a](
+                        random_lil((M, N), sparse_dtype, nnz))
+                    b_val = sp_mat[sparse_format_b](
+                        random_lil((N, K), sparse_dtype, nnz))
                    f(a_val, b_val)

    def test_csc_correct_output_faster_than_scipy(self):
@@ -450,16 +475,16 @@ class test_structureddot(unittest.TestCase):

        a = SparseType('csc', dtype=sparse_dtype)()
        b = tensor.matrix(dtype=dense_dtype)
-        d = theano.dot(a,b)
-        f = theano.function([a,b], theano.Out(d, borrow=True))
+        d = theano.dot(a, b)
+        f = theano.function([a, b], theano.Out(d, borrow=True))

-        for M,N,K,nnz in [(4,3,2,3),
-                (40,30,20,3),
-                (40,30,20,30),
-                (400,3000,200,6000),
+        for M, N, K, nnz in [(4, 3, 2, 3),
+                (40, 30, 20, 3),
+                (40, 30, 20, 30),
+                (400, 3000, 200, 6000),
                ]:
-            spmat = sp.csc_matrix(random_lil((M,N), sparse_dtype, nnz))
-            mat = numpy.asarray(numpy.random.randn(N,K), dense_dtype)
+            spmat = sp.csc_matrix(random_lil((M, N), sparse_dtype, nnz))
+            mat = numpy.asarray(numpy.random.randn(N, K), dense_dtype)
            theano_times = []
            scipy_times = []
            for i in xrange(5):
@@ -469,8 +494,8 @@ class test_structureddot(unittest.TestCase):
                scipy_result = spmat * mat
                t2 = time.time()

-                theano_times.append(t1-t0)
-                scipy_times.append(t2-t1)
+                theano_times.append(t1 - t0)
+                scipy_times.append(t2 - t1)

            theano_time = numpy.min(theano_times)
            scipy_time = numpy.min(scipy_times)
@@ -478,14 +503,16 @@ class test_structureddot(unittest.TestCase):
            speedup = scipy_time / theano_time
            print scipy_times
            print theano_times
-            print 'M=%(M)s N=%(N)s K=%(K)s nnz=%(nnz)s theano_time=%(theano_time)s speedup=%(speedup)s' % locals()
+            print ('M=%(M)s N=%(N)s K=%(K)s nnz=%(nnz)s theano_time'
+                   '=%(theano_time)s speedup=%(speedup)s') % locals()

            # fail if Theano is slower than scipy by more than a certain amount
-            overhead_tol = 0.003 # seconds overall
-            overhead_rtol = 1.2 # times as long
+            overhead_tol = 0.003  # seconds overall
+            overhead_rtol = 1.2  # times as long
            self.assertTrue(numpy.allclose(theano_result, scipy_result))
            if not theano.config.mode in ["DebugMode", "DEBUG_MODE"]:
-                self.assertFalse(theano_time > overhead_rtol*scipy_time + overhead_tol)
+                self.assertFalse(theano_time > overhead_rtol * scipy_time +
+                                 overhead_tol)

    def test_csr_correct_output_faster_than_scipy(self):

@@ -496,33 +523,34 @@ class test_structureddot(unittest.TestCase):

        a = SparseType('csr', dtype=sparse_dtype)()
        b = tensor.matrix(dtype=dense_dtype)
-        d = theano.dot(a,b)
-        f = theano.function([a,b], d)
+        d = theano.dot(a, b)
+        f = theano.function([a, b], d)

-        for M,N,K,nnz in [(4,3,2,3),
-                (40,30,20,3),
-                (40,30,20,30),
-                (400,3000,200,6000),
+        for M, N, K, nnz in [(4, 3, 2, 3),
+                (40, 30, 20, 3),
+                (40, 30, 20, 30),
+                (400, 3000, 200, 6000),
                ]:
-            spmat = sp.csr_matrix(random_lil((M,N), sparse_dtype, nnz))
-            mat = numpy.asarray(numpy.random.randn(N,K), dense_dtype)
+            spmat = sp.csr_matrix(random_lil((M, N), sparse_dtype, nnz))
+            mat = numpy.asarray(numpy.random.randn(N, K), dense_dtype)
            t0 = time.time()
            theano_result = f(spmat, mat)
            t1 = time.time()
            scipy_result = spmat * mat
            t2 = time.time()

-            theano_time = t1-t0
-            scipy_time = t2-t1
+            theano_time = t1 - t0
+            scipy_time = t2 - t1
            #print theano_result
            #print scipy_result
            print 'theano took', theano_time,
            print 'scipy took', scipy_time
-            overhead_tol = 0.002 # seconds
-            overhead_rtol = 1.1 # times as long
+            overhead_tol = 0.002  # seconds
+            overhead_rtol = 1.1  # times as long
            self.assertTrue(numpy.allclose(theano_result, scipy_result))
            if not theano.config.mode in ["DebugMode", "DEBUG_MODE"]:
-                self.assertFalse(theano_time > overhead_rtol*scipy_time + overhead_tol)
+                self.assertFalse(theano_time > overhead_rtol * scipy_time +
+                                 overhead_tol)


 class DotTests(unittest.TestCase):
@@ -530,11 +558,16 @@ class DotTests(unittest.TestCase):
        x_size = (10, 1000)
        y_size = (1000, 10000)

-        self.x_csr = scipy.sparse.csr_matrix(numpy.random.binomial(1, 0.5, x_size), dtype=theano.config.floatX)
-        self.x_csc = scipy.sparse.csc_matrix(numpy.random.binomial(1, 0.5, x_size), dtype=theano.config.floatX)
-        self.y = numpy.asarray(numpy.random.uniform(-1, 1, y_size), dtype=theano.config.floatX)
-        self.y_csr = scipy.sparse.csr_matrix(numpy.random.binomial(1, 0.5, y_size), dtype=theano.config.floatX)
-        self.y_csc = scipy.sparse.csc_matrix(numpy.random.binomial(1, 0.5, y_size), dtype=theano.config.floatX)
+        self.x_csr = scipy.sparse.csr_matrix(
+            numpy.random.binomial(1, 0.5, x_size), dtype=theano.config.floatX)
+        self.x_csc = scipy.sparse.csc_matrix(
+            numpy.random.binomial(1, 0.5, x_size), dtype=theano.config.floatX)
+        self.y = numpy.asarray(numpy.random.uniform(-1, 1, y_size),
+                               dtype=theano.config.floatX)
+        self.y_csr = scipy.sparse.csr_matrix(
+            numpy.random.binomial(1, 0.5, y_size), dtype=theano.config.floatX)
+        self.y_csc = scipy.sparse.csc_matrix(
+            numpy.random.binomial(1, 0.5, y_size), dtype=theano.config.floatX)

    def test_csr_dense(self):
        x = theano.sparse.csr_matrix('x')
@@ -543,7 +576,19 @@ class DotTests(unittest.TestCase):
        f_a = theano.function([x, y], theano.sparse.dot(x, y))
        f_b = lambda x, y: x * y

-        assert abs(f_a(self.x_csr, self.y) - f_b(self.x_csr, self.y)).max() < 1e-4
+        assert _allclose(f_a(self.x_csr, self.y), f_b(self.x_csr, self.y))
+
+        # Test infer_shape
+        f_a = theano.function([x, y], theano.sparse.dot(x, y).shape)
+        f_b = lambda x, y: (x * y).shape
+        assert numpy.all(f_a(self.x_csr, self.y) == f_b(self.x_csr, self.y))
+        topo = f_a.maker.env.toposort()
+        if theano.config.mode != 'FAST_COMPILE':
+            nb = 0
+        else:
+            nb = 1
+        assert sum([isinstance(node.op, (Dot, Usmm, UsmmCscDense))
+                    for node in topo]) == nb

    def test_csc_dense(self):
        x = theano.sparse.csc_matrix('x')
@@ -552,19 +597,32 @@ class DotTests(unittest.TestCase):
        f_a = theano.function([x, y], theano.sparse.dot(x, y))
        f_b = lambda x, y: x * y

-        assert (abs(f_a(self.x_csc, self.y) - f_b(self.x_csc, self.y)).max()
-                < 1e-4)
+        assert _allclose(f_a(self.x_csc, self.y), f_b(self.x_csc, self.y))
+
+        # Test infer_shape
+        f_a = theano.function([x, y], theano.sparse.dot(x, y).shape)
+        f_b = lambda x, y: (x * y).shape
+        assert numpy.all(f_a(self.x_csc, self.y) == f_b(self.x_csc, self.y))
+        topo = f_a.maker.env.toposort()
+        if theano.config.mode != 'FAST_COMPILE':
+            nb = 0
+        else:
+            nb = 1
+        assert sum([isinstance(node.op, (Dot, Usmm, UsmmCscDense))
+                    for node in topo]) == nb

    def test_sparse_sparse(self):
        for d1, d2 in [('float32', 'float32'),
                       ('float32', 'float64'),
                       ('float64', 'float32'),
                       ('float64', 'float64'),
+                       ('float32', 'int16'),
+                       ('float32', 'complex64'),
                       ]:
-            for x_f, y_f in [('csc','csc'),
-                             ('csc','csr'),
-                             ('csr','csc'),
-                             ('csr','csr'),
+            for x_f, y_f in [('csc', 'csc'),
+                             ('csc', 'csr'),
+                             ('csr', 'csc'),
+                             ('csr', 'csr'),
                             ]:
                x = theano.sparse.SparseType(format=x_f, dtype=d1)('x')
                y = theano.sparse.SparseType(format=x_f, dtype=d2)('x')
@@ -572,20 +630,38 @@ class DotTests(unittest.TestCase):
                f_a = theano.function([x, y], theano.sparse.dot(x, y))
                f_b = lambda x, y: x * y

-                vx = getattr(self,'x_'+x_f).astype(d1)
-                vy = getattr(self,'y_'+y_f).astype(d2)
-                assert abs(f_a(vx, vy) - f_b(vx, vy)).max() < 1e-4
+                vx = getattr(self, 'x_' + x_f).astype(d1)
+                vy = getattr(self, 'y_' + y_f).astype(d2)
+                assert _allclose(f_a(vx, vy), f_b(vx, vy).toarray())
+
+                # Test infer_shape
+                f_a = theano.function([x, y], theano.sparse.dot(x, y).shape)
+                f_b = lambda x, y: (x * y).shape
+                assert numpy.all(f_a(vx, vy) == f_b(vx, vy))
+                topo = f_a.maker.env.toposort()
+                if theano.config.mode != 'FAST_COMPILE':
+                    nb = 0
+                else:
+                    nb = 1
+                assert sum([isinstance(node.op, (Dot, Usmm, UsmmCscDense))
+                            for node in topo]) == nb


 class UsmmTests(unittest.TestCase):
+    """ Test the Usmm and UsmmCscDense class and related optimization """
    def setUp(self):
-        x_size = (10, 200)
-        y_size = (200, 2000)
+        x_size = (10, 100)
+        y_size = (100, 200)
        z_size = (x_size[0], y_size[1])

-        self.x = numpy.asarray(numpy.random.binomial(1, 0.5, x_size), dtype=theano.config.floatX)
-        self.y = numpy.asarray(numpy.random.uniform(-1, 1, y_size), dtype=theano.config.floatX)
-        self.z = numpy.asarray(numpy.random.uniform(-1, 1, z_size), dtype=theano.config.floatX)
+        self.x = numpy.asarray(numpy.random.binomial(1, 0.5, x_size),
+                               dtype=theano.config.floatX)
+        self.y = numpy.asarray(numpy.random.uniform(-1, 1, y_size),
+                               dtype=theano.config.floatX)
+        self.z = numpy.asarray(numpy.random.uniform(-1, 1, z_size),
+                               dtype=theano.config.floatX)
+        utt.seed_rng()
+        self.rng = numpy.random.RandomState(seed=utt.fetch_seed())

    def test(self):
        def mat(format, name, dtype):
@@ -594,9 +670,14 @@ class UsmmTests(unittest.TestCase):
            else:
                return theano.sparse.matrix(format, name, dtype=dtype)

-        params = product(*([['float32', 'float64']] * 4 +
+        params = product(*([['float32', 'float64', 'int16', 'complex64']] * 4 +
                           [['dense', 'csc', 'csr']] * 2))

+        # All test are too slow, so we randomly take 100 of them.
+        # The buildbot change the seed, so we will finish by running them all.
+        # As of this writing they where all passing.
+        #params = self.rng.permutation(list(params))[:500]
+
        for dtype1, dtype2, dtype3, dtype4, format1, format2 in params:
            if format1 == 'dense' and format2 == 'dense':
                # Usmm won't be used!
@@ -604,9 +685,7 @@ class UsmmTests(unittest.TestCase):
            x = mat(format1, 'x', dtype1)
            y = mat(format2, 'y', dtype2)
            a = theano.tensor.scalar('a', dtype=dtype3)
-            z = theano.tensor.shared(
-                numpy.asarray(self.z, dtype=dtype4).copy()
-            )
+            z = theano.shared(numpy.asarray(self.z, dtype=dtype4).copy())

            f_b = lambda z, a, x, y: z - a * (x * y)
            x_data = numpy.asarray(self.x, dtype=dtype1)
@@ -615,9 +694,10 @@ class UsmmTests(unittest.TestCase):
            y_data = numpy.asarray(self.y, dtype=dtype2)
            if format2 != 'dense':
                y_data = as_sparse_format(y_data, format2)
-            z_data = numpy.asarray(self.z, dtype=dtype3)
+            a_data = numpy.asarray(1.5, dtype=dtype3)
+            z_data = numpy.asarray(self.z, dtype=dtype4)

-            f_b_out = f_b(z_data, 1, x_data, y_data)
+            f_b_out = f_b(z_data, a_data, x_data, y_data)

            # Can it work inplace?
            inplace = dtype4 == theano.scalar.upcast(dtype1, dtype2, dtype3)
@@ -630,25 +710,39 @@ class UsmmTests(unittest.TestCase):
                f_a = theano.function([a, x, y], [],
                                      updates=updates,
                                      mode=mode)
-                f_a(1, x_data, y_data)
-                assert abs(z.get_value(borrow=True) - f_b_out).max() < 1e-4
+                f_a(a_data, x_data, y_data)
+                f_a_out = z.get_value(borrow=True)
            else:
                f_a = theano.function([a, x, y],
                                      z - a * theano.sparse.dot(x, y),
                                      mode=mode)
-                f_a_out = f_a(1, x_data, y_data)
-                assert abs(f_a_out - f_b_out).max() < 1e-4
+                # In DebugMode there is a strange difference with complex
+                # So we raise a little the threashold a little.
+                try:
+                    orig = theano.tensor.basic.float64_rtol
+                    theano.tensor.basic.float64_rtol = 1e-5
+                    f_a_out = f_a(a_data, x_data, y_data)
+                finally:
+                    theano.tensor.basic.float64_rtol = orig
+
+            assert _allclose(f_a_out, f_b_out, rtol=1e-5)
            topo = f_a.maker.env.toposort()
            up = theano.scalar.upcast(dtype1, dtype2, dtype3, dtype4)
-            if y.type.dtype == up and format1 == 'csc' and format2 == 'dense':
+
+            fast_compile = theano.config.mode == "FAST_COMPILE"
+
+            if (y.type.dtype == up and format1 == 'csc' and format2 == 'dense'
+                and not fast_compile) and up in ('float32', 'float64'):
+                # The op UsmmCscDense should be inserted
                assert (sum([isinstance(node.op, tensor.Elemwise) and
                             isinstance(node.op.scalar_op,
                                        theano.scalar.basic.Cast)
                             for node in topo]) == len(topo) - 5)
                new_topo = []
                for node in topo:
-                    if not isinstance(node.op, tensor.Elemwise) and \
-                       isinstance(node.op.scalar_op, theano.scalar.basic.Cast):
+                    if not (isinstance(node.op, tensor.Elemwise) and \
+                       isinstance(node.op.scalar_op,
+                                  theano.scalar.basic.Cast)):
                        new_topo.append(node)
                topo = new_topo
                assert len(topo) == 5, topo
@@ -663,19 +757,70 @@ class UsmmTests(unittest.TestCase):
                assert isinstance(topo[4].op, theano.sparse.UsmmCscDense)
                if inplace:
                    assert topo[4].op.inplace
-            else:
-                assert len(topo)==3, topo
+            elif not fast_compile:
+                # The op Usmm should be inserted
+                assert len(topo) == 3, topo
                assert isinstance(topo[0].op, theano.tensor.DimShuffle)
                assert topo[1].op == theano.tensor.neg
                assert isinstance(topo[2].op, theano.sparse.Usmm)

+    def test_infer_shape(self):
+        def mat(format, name, dtype):
+            if format == 'dense':
+                return theano.tensor.matrix(name, dtype=dtype)
+            else:
+                return theano.sparse.matrix(format, name, dtype=dtype)
+
+        params = [('float32', 'float64', 'int16', 'complex64', 'csc', 'dense'),
+                  ('float32', 'float64', 'int16', 'complex64', 'csr', 'dense')]
+        for dtype1, dtype2, dtype3, dtype4, format1, format2 in params:
+            if format1 == 'dense' and format2 == 'dense':
+                # Usmm won't be used!
+                continue
+            x = mat(format1, 'x', dtype1)
+            y = mat(format2, 'y', dtype2)
+            a = theano.tensor.scalar('a', dtype=dtype3)
+            z = theano.shared(numpy.asarray(self.z, dtype=dtype4).copy())
+
+            f_b = lambda z, a, x, y: z - a * (x * y)
+            x_data = numpy.asarray(self.x, dtype=dtype1)
+            if format1 != 'dense':
+                x_data = as_sparse_format(x_data, format1)
+            y_data = numpy.asarray(self.y, dtype=dtype2)
+            if format2 != 'dense':
+                y_data = as_sparse_format(y_data, format2)
+            a_data = numpy.asarray(1.5, dtype=dtype3)
+            z_data = numpy.asarray(self.z, dtype=dtype4)
+
+            f_b_out = f_b(z_data, a_data, x_data, y_data)
+
+            # Can it work inplace?
+            inplace = dtype4 == theano.scalar.upcast(dtype1, dtype2, dtype3)
+
+            # To make it easier to check the toposort
+            mode = theano.compile.mode.get_default_mode().excluding('fusion')
+
+            # test infer_shape of Dot got applied
+            f_shape = theano.function([a, x, y],
+                                      (z - a * theano.sparse.dot(x, y)).shape,
+                                      mode=mode)
+            assert all(f_shape(a_data, x_data, y_data) == f_b_out.shape)
+            topo = f_shape.maker.env.toposort()
+            if theano.config.mode != 'FAST_COMPILE':
+                nb = 0
+            else:
+                nb = 1
+            assert sum([isinstance(node.op, (Dot, Usmm, UsmmCscDense))
+                        for node in topo]) == nb
+

 def test_shape_i():
    sparse_dtype = 'float32'

    a = SparseType('csr', dtype=sparse_dtype)()
    f = theano.function([a], a.shape[1])
-    assert f(sp.csr_matrix(random_lil((100,10), sparse_dtype, 3))) == 10
+    assert f(sp.csr_matrix(random_lil((100, 10), sparse_dtype, 3))) == 10
+

 def test_shape():
    # Test that getting the shape of a sparse variable
@@ -684,47 +829,69 @@ def test_shape():

    a = SparseType('csr', dtype=sparse_dtype)()
    f = theano.function([a], a.shape)
-    assert numpy.all(f(sp.csr_matrix(random_lil((100,10), sparse_dtype, 3)))==(100,10))
-    if theano.config.mode!='FAST_COMPILE':
+    assert numpy.all(f(sp.csr_matrix(random_lil((100, 10), sparse_dtype, 3)))
+                     == (100, 10))
+    if theano.config.mode != 'FAST_COMPILE':
        topo = f.maker.env.toposort()
-        assert len(topo)==3
-        assert isinstance(topo[0].op,tensor.opt.Shape_i)
-        assert isinstance(topo[1].op,tensor.opt.Shape_i)
-        assert isinstance(topo[2].op,tensor.opt.MakeVector)
+        assert len(topo) == 3
+        assert isinstance(topo[0].op, tensor.opt.Shape_i)
+        assert isinstance(topo[1].op, tensor.opt.Shape_i)
+        assert isinstance(topo[2].op, tensor.opt.MakeVector)
+

 def test_may_share_memory():
-    a=scipy.sparse.csc_matrix(scipy.sparse.eye(5,3))
-    b=scipy.sparse.csc_matrix(scipy.sparse.eye(4,3))
+    a = scipy.sparse.csc_matrix(scipy.sparse.eye(5, 3))
+    b = scipy.sparse.csc_matrix(scipy.sparse.eye(4, 3))
    as_ar = lambda a: theano._asarray(a, dtype='int32')
-    for a_,b_,rep in [(a,a,True),(b,b,True),(a,b,False),
-                    (a,a.data,True),(a,a.indptr,True),(a,a.indices,True),(a,as_ar(a.shape),False),
-                    (a.data,a,True),(a.indptr,a,True),(a.indices,a,True),(as_ar(a.shape),a,False),
-                    (b,b.data,True),(b,b.indptr,True),(b,b.indices,True),(b,as_ar(b.shape),False),
-                    (b.data,b,True),(b.indptr,b,True),(b.indices,b,True),(as_ar(b.shape),b,False),
-                    (b.data,a,False),(b.indptr,a,False),(b.indices,a,False),(as_ar(b.shape),a,False),
-                    ]:
+    for a_, b_, rep in [(a, a, True),
+                        (b, b, True),
+                        (a, b, False),
+                        (a, a.data, True),
+                        (a, a.indptr, True),
+                        (a, a.indices, True),
+                        (a, as_ar(a.shape), False),
+                        (a.data, a, True),
+                        (a.indptr, a, True),
+                        (a.indices, a, True),
+                        (as_ar(a.shape), a, False),
+                        (b, b.data, True),
+                        (b, b.indptr, True),
+                        (b, b.indices, True),
+                        (b, as_ar(b.shape), False),
+                        (b.data, b, True),
+                        (b.indptr, b, True),
+                        (b.indices, b, True),
+                        (as_ar(b.shape), b, False),
+                        (b.data, a, False),
+                        (b.indptr, a, False),
+                        (b.indices, a, False),
+                        (as_ar(b.shape), a, False),
+                        ]:
+
+        assert SparseType.may_share_memory(a_, b_) == rep

-        assert SparseType.may_share_memory(a_,b_)==rep

 def test_sparse_shared_memory():
-    # Note : There are no inplace ops on sparse matrix yet. If  one is someday implemented, we could test it here.
-    a = random_lil((3,4), 'float32', 3).tocsr()
-    m1 = random_lil((4,4), 'float32', 3).tocsr()
-    m2 = random_lil((4,4), 'float32', 3).tocsr()
+    # Note : There are no inplace ops on sparse matrix yet. If one is
+    # someday implemented, we could test it here.
+    a = random_lil((3, 4), 'float32', 3).tocsr()
+    m1 = random_lil((4, 4), 'float32', 3).tocsr()
+    m2 = random_lil((4, 4), 'float32', 3).tocsr()
    x = SparseType('csr', dtype='float32')()
    y = SparseType('csr', dtype='float32')()

    sdot = theano.sparse.structured_dot
-    z = sdot(x*3,m1) + sdot(y*2, m2)
+    z = sdot(x * 3, m1) + sdot(y * 2, m2)

-    f = theano.function([theano.In(x,mutable=True),theano.In(y,mutable = True)],z, mode='FAST_RUN')
+    f = theano.function([theano.In(x, mutable=True),
+                         theano.In(y, mutable=True)], z, mode='FAST_RUN')

-    def f_(x,y,m1=m1,m2=m2):
-        return numpy.dot(x*3,m1) + numpy.dot(y*2,m2)
+    def f_(x, y, m1=m1, m2=m2):
+        return numpy.dot(x * 3, m1) + numpy.dot(y * 2, m2)

-    assert SparseType.may_share_memory(a,a) #This is trivial
-    result  = f(a,a)
-    result_ = f_(a,a)
+    assert SparseType.may_share_memory(a, a)  # This is trivial
+    result = f(a, a)
+    result_ = f_(a, a)
    assert (result_.todense() == result.todense()).all()


@@ -736,6 +903,7 @@ def test_size():
        x = getattr(theano.sparse, sparse_type)()
        y = getattr(scipy.sparse, sparse_type)((5, 7)).astype(config.floatX)
        get_size = theano.function([x], x.size)
+
        def check():
            assert y.size == get_size(y)
        # We verify that the size is correctly updated as we store more data
@@ -748,20 +916,20 @@ def test_size():


 import theano.tensor.tests.test_sharedvar
-test_shared_options=theano.tensor.tests.test_sharedvar.makeSharedTester(
-    shared_constructor_ = theano.sparse.shared,
-    dtype_ = 'float64',
-    get_value_borrow_true_alias_ = True,
-    shared_borrow_true_alias_ = True,
-    set_value_borrow_true_alias_ = True,
-    set_value_inplace_ = False,
-    set_casted_value_inplace_ = False,
-    shared_constructor_accept_ndarray_ = False,
-    internal_type_ = scipy.sparse.csc_matrix,
-    test_internal_type_ = scipy.sparse.issparse,
-    theano_fct_ = lambda a: dense_from_sparse(a*2.),
-    ref_fct_ = lambda a: numpy.asarray((a*2).todense()),
-    cast_value_ = scipy.sparse.csr_matrix,
+test_shared_options = theano.tensor.tests.test_sharedvar.makeSharedTester(
+    shared_constructor_=theano.sparse.shared,
+    dtype_='float64',
+    get_value_borrow_true_alias_=True,
+    shared_borrow_true_alias_=True,
+    set_value_borrow_true_alias_=True,
+    set_value_inplace_=False,
+    set_casted_value_inplace_=False,
+    shared_constructor_accept_ndarray_=False,
+    internal_type_=scipy.sparse.csc_matrix,
+    test_internal_type_=scipy.sparse.issparse,
+    theano_fct_=lambda a: dense_from_sparse(a * 2.),
+    ref_fct_=lambda a: numpy.asarray((a * 2).todense()),
+    cast_value_=scipy.sparse.csr_matrix,
    name='test_shared_options',
    )


--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -390,20 +390,24 @@ else:
    #more strict. Atleast float32 precision.
    float64_rtol = 1.0000000000000001e-06

-def _allclose(a, b):
+def _allclose(a, b, rtol=None, atol=None):
    narrow = 'float32', 'complex64'
    if (str(a.dtype) in narrow) or (str(b.dtype) in narrow):
-        atol = float32_atol
-        rtol = float32_rtol
+        atol_ = float32_atol
+        rtol_ = float32_rtol
    else:
-        atol = float64_atol
-        rtol = float64_rtol
+        atol_ = float64_atol
+        rtol_ = float64_rtol
+    if rtol is not None:
+        rtol_ = rtol
+    if atol is not None:
+        atol_ = atol

    # Work around bug in Numpy, see http://projects.scipy.org/numpy/ticket/1684
    if str(b.dtype) in int_dtypes and (numpy.absolute(b) < 0).any():
        b = theano._asarray(b, dtype='float64')

-    return numpy.allclose(a,b, atol=atol, rtol=rtol)
+    return numpy.allclose(a, b, atol=atol_, rtol=rtol_)

 def get_constant_value(v):
    """return the constant scalar(0-D) value underlying variable `v`