merge

ed9143d0 · James Bergstra · b0cfc18d · 2125a099 · ed9143d0 · ed9143d0
--- a/theano/compile/debugmode.py
+++ b/theano/compile/debugmode.py
@@ -219,17 +219,17 @@ class BadDestroyMap(DebugModeError):
        self.new_val = new_val
    
    def __str__(self):
-        npy_old_val = numpy.asarray(self.old_val)
-        npy_new_val = numpy.asarray(self.new_val)
+        sio = StringIO()
+        print >> sio, "  node:", self.node
+        print >> sio, "  node.inputs:", [(str(i), id(i)) for i in self.node.inputs]
+        print >> sio, "  destroy_map:", getattr(self.node.op, 'destroy_map', {})
+        print >> sio, "  changed input idx:", self.idx
+        print >> sio, "  changed input type:", self.node.inputs[self.idx].type
+        print >> sio, "  repr (old val):", repr(self.old_val)
+        print >> sio, "  repr (new val):", repr(self.new_val)
        try:
-            sio = StringIO()
-            print >> sio, "  node:", self.node
-            print >> sio, "  node.inputs:", [(str(i), id(i)) for i in self.node.inputs]
-            print >> sio, "  destroy_map:", getattr(self.node.op, 'destroy_map', {})
-            print >> sio, "  changed input idx:", self.idx
-            print >> sio, "  changed input type:", self.node.inputs[self.idx].type
-            print >> sio, "  repr (old val):", repr(self.old_val)
-            print >> sio, "  repr (new val):", repr(self.new_val)
+            npy_old_val = numpy.asarray(self.old_val)
+            npy_new_val = numpy.asarray(self.new_val)
            print >> sio, "  value dtype (new <space> old):", npy_new_val.dtype, npy_old_val.dtype
            print >> sio, "  value shape (new <space> old):", npy_new_val.shape, npy_old_val.shape
            print >> sio, "  value min (new <space> old):", npy_new_val.min(), npy_old_val.min()
@@ -237,10 +237,10 @@ class BadDestroyMap(DebugModeError):
            print >> sio, "  value min (new-old):", (npy_new_val-npy_old_val).min()
            print >> sio, "  value max (new-old):", (npy_new_val-npy_old_val).max()
            print >> sio, ""
-            print >> sio, "  Hint: this can also be caused by a deficient values_eq_approx() or __eq__() implementation [which compared input values]"
-            return sio.getvalue()
        except Exception, e:
-            return str(e)
+            print >> sio, "(Numpy-hints failed with: %s)" %str(e)
+        print >> sio, "  Hint: this can also be caused by a deficient values_eq_approx() or __eq__() implementation [which compared input values]"
+        return sio.getvalue()

 class BadViewMap(DebugModeError):
    """Exception: Some perform() or c_code() created a memory alias that wasn't in the view_map"""
@@ -868,6 +868,11 @@ class _Linker(gof.link.LocalLinker):
        return self

    def make_all(self, profiler = None, input_storage = None, output_storage = None):
+        
+        if 1:
+            #can't import at toplevel because of circular import
+            # TODO: don't do this ugly hacky way of setting the filter_checks_isfinite
+            from theano.tensor import TensorType #to set filter_check_isfinite
        env = self.env
        input_storage_ = input_storage
        output_storage_ = output_storage
@@ -932,7 +937,7 @@ class _Linker(gof.link.LocalLinker):
        # This is the function that runs when you evaluate the graph
        #####
        def f():
-            debug("starting f")
+            debug("starting a DebugMode call")
            for x in no_recycling:
                x[0] = None

@@ -1027,7 +1032,10 @@ class _Linker(gof.link.LocalLinker):
                            storage_map[r][0] = _lessbroken_deepcopy(r_vals[r])

                        debug(i, "DEBUGMODE running thunk_c")
-                        thunk_c()
+                        try:
+                            thunk_c()
+                        except:
+                            raise_with_op(node)

                        for r in node.outputs:
                            # check output values for type-correctness
@@ -1075,9 +1083,6 @@ class _Linker(gof.link.LocalLinker):
                    if True: 
                        gc.collect()

-                #except:
-                #    raise_with_op(node)
-
                _find_bad_optimizations(order, env.equivalence_tracker.reasons, r_vals)

                #####
@@ -1132,10 +1137,27 @@ class _Linker(gof.link.LocalLinker):
                if (r.owner is None):
                    assert storage_map[r][0] is not None

+
            ###############
-            # Done f
+            # Done debugmode function call 'f'
            ##############

+        def run_with_tensortype_filter_check(f):
+            def deco():
+                # WARNING: this is a global mechanism... 
+                # so it will screw up if we are trying to use
+                # multiple modes at once.
+                old_filter_checks_isfinite = TensorType.filter_checks_isfinite
+                TensorType.filter_checks_isfinite = self.maker.mode.check_isfinite
+                try:
+                    return f()
+                finally:
+                    # put back the filter_checks_isfinite
+                    TensorType.filter_checks_isfinite = old_filter_checks_isfinite
+            return deco
+
+        f = run_with_tensortype_filter_check(f)
+
        f.allow_gc = True
        assert len(env.inputs) == len(input_storage)
        assert len(env.outputs) == len(output_storage)
@@ -1170,11 +1192,6 @@ class _Maker(FunctionMaker): #inheritance buys a few helper functions

        """

-        # WARNING: this is a global mechanism... so it will screw up if we are trying to use
-        # multiple modes at once.
-        from theano.tensor import TensorType #to set filter_check_isfinite
-        TensorType.filter_checks_isfinite = mode.check_isfinite
-
        # Handle the case where inputs and/or outputs is a single Variable (not in a list)
        unpack_single = False
        return_none = False

--- a/theano/sparse/basic.py
+++ b/theano/sparse/basic.py
@@ -8,7 +8,6 @@ To read about different sparse formats, see U{http://www-users.cs.umn.edu/~saad/

 import sys, operator
 import numpy, theano
-from scipy import sparse
 import scipy.sparse
 from theano.printing import Print

@@ -16,6 +15,7 @@ from theano import gof
 from theano import tensor
 from theano import compile
 from theano import scalar
+from theano import config

 #TODO: move this decorator to the compile submodule
 def register_specialize(lopt, *tags, **kwargs):
@@ -23,11 +23,11 @@ def register_specialize(lopt, *tags, **kwargs):


 """ Types of sparse matrices to use for testing """
-_mtypes = [sparse.csc_matrix, sparse.csr_matrix]
+_mtypes = [scipy.sparse.csc_matrix, scipy.sparse.csr_matrix]
 #_mtypes = [sparse.csc_matrix, sparse.csr_matrix, sparse.dok_matrix, sparse.lil_matrix, sparse.coo_matrix]
 #* new class ``dia_matrix`` : the sparse DIAgonal format
 #* new class ``bsr_matrix`` : the Block CSR format
-_mtype_to_str = {sparse.csc_matrix: "csc", sparse.csr_matrix: "csr"}
+_mtype_to_str = {scipy.sparse.csc_matrix: "csc", scipy.sparse.csr_matrix: "csr"}

 def _is_sparse_variable(x):
    """
@@ -51,15 +51,15 @@ def _is_sparse(x):
    @rtype: boolean
    @return: True iff x is a L{scipy.sparse.spmatrix} (and not a L{numpy.ndarray})
    """
-    if not isinstance(x, sparse.spmatrix) and not isinstance(x, numpy.ndarray):
+    if not isinstance(x, scipy.sparse.spmatrix) and not isinstance(x, numpy.ndarray):
        raise NotImplementedError("this function should only be called on sparse.scipy.sparse.spmatrix or numpy.ndarray, not,", x)
-    return isinstance(x, sparse.spmatrix)
+    return isinstance(x, scipy.sparse.spmatrix)
 def _is_dense(x):
    """
    @rtype: boolean
    @return: True unless x is a L{scipy.sparse.spmatrix} (and not a L{numpy.ndarray})
    """
-    if not isinstance(x, sparse.spmatrix) and not isinstance(x, numpy.ndarray):
+    if not isinstance(x, scipy.sparse.spmatrix) and not isinstance(x, numpy.ndarray):
        raise NotImplementedError("this function should only be called on sparse.scipy.sparse.spmatrix or numpy.ndarray, not,", x)
    return isinstance(x, numpy.ndarray)

@@ -101,22 +101,23 @@ def as_sparse_variable(x):
 as_sparse = as_sparse_variable

 def constant(x):
-    if not isinstance(x, sparse.spmatrix):
+    if not isinstance(x, scipy.sparse.spmatrix):
        raise TypeError("sparse.constant must be called on a scipy.sparse.spmatrix")
    try:
        return SparseConstant(SparseType(format = x.format,
-                                     dtype = x.dtype), x)
+                                     dtype = x.dtype), x.copy())
    except TypeError:
        raise TypeError("Could not convert %s to SparseType" % x, type(x))

-def value(x):
-    if not isinstance(x, sparse.spmatrix):
-        raise TypeError("sparse.value must be called on a scipy.sparse.spmatrix")
-    try:
-        return SparseValue(SparseType(format = x.format,
-                                  dtype = x.dtype), x)
-    except TypeError:
-        raise TypeError("Could not convert %s to SparseType" % x, type(x))
+if 0:
+    def value(x):
+        if not isinstance(x, scipy.sparse.spmatrix):
+            raise TypeError("sparse.value must be called on a scipy.sparse.spmatrix")
+        try:
+            return SparseValue(SparseType(format = x.format,
+                                      dtype = x.dtype), x)
+        except TypeError:
+            raise TypeError("Could not convert %s to SparseType" % x, type(x))

 def sp_ones_like(x):
    data, indices, indptr, shape = csm_properties(x) #TODO: don't restrict to CSM formats
@@ -132,13 +133,13 @@ class SparseType(gof.Type):
    @note As far as I can tell, L{scipy.sparse} objects must be matrices, i.e. have dimension 2.
    """
    format_cls = {
-            'csr' : sparse.csr_matrix,
-            'csc' : sparse.csc_matrix
+            'csr' : scipy.sparse.csr_matrix,
+            'csc' : scipy.sparse.csc_matrix
            }
    dtype_set = set(['int', 'int8', 'int16','int32', 'int64', 'float32', 'float64', 'complex64','complex128'])
    ndim = 2

-    def __init__(self, format, dtype = 'float64'):
+    def __init__(self, format, dtype):
        """
        Fundamental way to create a sparse node.
        @param dtype:   Type of numbers in the matrix.
@@ -187,16 +188,31 @@ class SparseType(gof.Type):
        return "Sparse[%s, %s]" % (str(self.dtype), str(self.format))

    def values_eq_approx(self, a, b, eps=1e-6):
-#        print "VEA", a, b, scipy.sparse.issparse(a), scipy.sparse.issparse(b), abs(a-b).sum(), abs(a-b).sum() < (1e-6 * a.nnz)
+        #WARNING: equality comparison of sparse matrices is not fast or easy
+        # we definitely do not want to be doing this un-necessarily during
+        # a FAST_RUN computation..
        return scipy.sparse.issparse(a) \
                and scipy.sparse.issparse(b) \
                and abs(a-b).sum() < (1e-6 * a.nnz)

+    def values_eq(self, a, b):
+        #WARNING: equality comparison of sparse matrices is not fast or easy
+        # we definitely do not want to be doing this un-necessarily during
+        # a FAST_RUN computation..
+        return scipy.sparse.issparse(a) \
+                and scipy.sparse.issparse(b) \
+                and abs(a-b).sum() == 0.0
+
    def is_valid_value(self, a):
        return scipy.sparse.issparse(a) and (a.format == self.format)

-csc_matrix = SparseType(format='csc')
-csr_matrix = SparseType(format='csr')
+# for more dtypes, call SparseType(format, dtype)
+csc_matrix = SparseType(format='csc', dtype=config.floatX)
+csr_matrix = SparseType(format='csr', dtype=config.floatX)
+csc_dmatrix = SparseType(format='csc', dtype='float64')
+csr_dmatrix = SparseType(format='csr', dtype='float64')
+csc_fmatrix = SparseType(format='csc', dtype='float32')
+csr_fmatrix = SparseType(format='csr', dtype='float32')

 class _sparse_py_operators:
    T = property(lambda self: transpose(self), doc = "Return aliased transpose of self (read-only)")
@@ -270,9 +286,11 @@ class CSMProperties(gof.Op):

    def perform(self, node, (csm,), out):
        if self.kmap is None:
-          out[0][0] = csm.data
+            out[0][0] = csm.data
        else:
-          out[0][0] = csm.data[self.kmap]
+            out[0][0] = csm.data[self.kmap]
+        if str(csm.data.dtype) == 'int32':
+            out[0][0] = theano._asarray(out[0][0], dtype='int32')
        #backport
        #out[0][0] = csm.data if self.kmap is None else csm.data[self.kmap]
        out[1][0] = theano._asarray(csm.indices, dtype='int32')
@@ -377,13 +395,13 @@ class CSM(gof.Op):
                     'as indices (shape'+`indices.shape`+') or elements as kmap ('+`numpy.size(self.kmap)`+')'
            raise ValueError(errmsg)
        if self.format == 'csc':
-            out[0] = sparse.csc_matrix((data, indices.copy(), indptr.copy()), 
+            out[0] = scipy.sparse.csc_matrix((data, indices.copy(), indptr.copy()), 
                    numpy.asarray(shape),
                    copy = False #1000*len(data.flatten())
                    )
        else:
            assert self.format == 'csr'
-            out[0] = sparse.csr_matrix((data, indices.copy(), indptr.copy()), 
+            out[0] = scipy.sparse.csr_matrix((data, indices.copy(), indptr.copy()), 
                    shape.copy(),
                    copy = False #1000*len(data.flatten())
                    )
@@ -546,7 +564,6 @@ class AddSS(gof.op.Op):
        if x.type.dtype != y.type.dtype:
            raise NotImplementedError()
        if x.type.format != y.type.format:
-            print x.type.format, y.type.format
            raise NotImplementedError()
        return gof.Apply(self,
                         [x, y],
@@ -795,11 +812,11 @@ class StructuredDotCSC(gof.Op):
        return r

    def perform(self, node, (a_val, a_ind, a_ptr, a_nrows, b), (out,)):
-        a = sparse.csc_matrix((a_val, a_ind, a_ptr), 
+        a = scipy.sparse.csc_matrix((a_val, a_ind, a_ptr), 
                (a_nrows, b.shape[0]),
                copy = False)
        #out[0] = a.dot(b)
-        out[0] = a * b
+        out[0] = theano._asarray(a * b, dtype=node.outputs[0].type.dtype)
        assert _is_dense(out[0]) # scipy 0.7 automatically converts to dense

    def c_code(self, node, name, (a_val, a_ind, a_ptr, a_nrows, b), (z,), sub):
@@ -952,7 +969,7 @@ class StructuredDotCSR(gof.Op):
        return r

    def perform(self, node, (a_val, a_ind, a_ptr, b), (out,)):
-        a = sparse.csr_matrix((a_val, a_ind, a_ptr), 
+        a = scipy.sparse.csr_matrix((a_val, a_ind, a_ptr), 
                (len(a_ptr)-1, b.shape[0]),
                copy = True) #use view_map before setting this to False
        #out[0] = a.dot(b)

--- a/theano/sparse/tests/test_basic.py
+++ b/theano/sparse/tests/test_basic.py
@@ -5,7 +5,7 @@ from nose.plugins.skip import SkipTest
 if enable_sparse == False:
    raise SkipTest('Optional package sparse disabled')

-import random
+import random, time
 import unittest
 import theano

@@ -21,14 +21,25 @@ from theano.tests import unittest_tools as utt
 def eval_outputs(outputs):
    return compile.function([], outputs)()[0]

+def random_lil(shape, dtype, nnz):
+    rval = sp.lil_matrix(shape, dtype=dtype)
+    huge = 2**30
+    for k in range(nnz):
+        # set non-zeros in random locations (row x, col y)
+        idx = numpy.random.random_integers(huge,size=len(shape)) % shape
+        rval.__setitem__(
+                idx,
+                numpy.random.rand())
+    return rval
+
 class T_transpose(unittest.TestCase):
    def setUp(self):
        utt.seed_rng()

    def test_transpose_csc(self):
-        sp = sparse.csc_matrix(sparse.eye(5,3))
+        sp = scipy.sparse.csc_matrix(scipy.sparse.eye(5,3))
        a = as_sparse_variable(sp)
-        self.failUnless(a.data is sp)
+        self.failIf(a.data is sp)
        self.failUnless(a.data.shape == (5,3))
        self.failUnless(a.type.dtype == 'float64', a.type.dtype)
        self.failUnless(a.type.format == 'csc', a.type.format)
@@ -39,7 +50,7 @@ class T_transpose(unittest.TestCase):
        vta = eval_outputs([ta])
        self.failUnless(vta.shape == (3,5))
    def test_transpose_csr(self):
-        a = as_sparse_variable(sparse.csr_matrix(sparse.eye(5,3)))
+        a = as_sparse_variable(scipy.sparse.csr_matrix(scipy.sparse.eye(5,3)))
        self.failUnless(a.data.shape == (5,3))
        self.failUnless(a.type.dtype == 'float64')
        self.failUnless(a.type.format == 'csr')
@@ -55,13 +66,13 @@ class T_Add(unittest.TestCase):
        for mtype in _mtypes:
            a = mtype(numpy.array([[1., 0], [3, 0], [0, 6]]))
            aR = as_sparse_variable(a)
-            self.failUnless(aR.data is a)
+            self.failIf(aR.data is a)
            self.failUnless(_is_sparse(a))
            self.failUnless(_is_sparse_variable(aR))

            b = mtype(numpy.asarray([[0, 2.], [0, 4], [5, 0]]))
            bR = as_sparse_variable(b)
-            self.failUnless(bR.data is b)
+            self.failIf(bR.data is b)
            self.failUnless(_is_sparse(b))
            self.failUnless(_is_sparse_variable(bR))

@@ -82,13 +93,13 @@ class T_Add(unittest.TestCase):
        for mtype in _mtypes:
            a = numpy.array([[1., 0], [3, 0], [0, 6]])
            aR = tensor.as_tensor_variable(a)
-            self.failUnless(aR.data is a)
+            self.failIf(aR.data is a) #constants are copied
            self.failUnless(_is_dense(a))
            self.failUnless(_is_dense_variable(aR))

            b = mtype(numpy.asarray([[0, 2.], [0, 4], [5, 0]]))
            bR = as_sparse_variable(b)
-            self.failUnless(bR.data is b)
+            self.failIf(bR.data is b) #constants are copied
            self.failUnless(_is_sparse(b))
            self.failUnless(_is_sparse_variable(bR))

@@ -107,13 +118,13 @@ class T_Add(unittest.TestCase):
        for mtype in _mtypes:
            a = mtype(numpy.array([[1., 0], [3, 0], [0, 6]]))
            aR = as_sparse_variable(a)
-            self.failUnless(aR.data is a)
+            self.failIf(aR.data is a)
            self.failUnless(_is_sparse(a))
            self.failUnless(_is_sparse_variable(aR))

            b = numpy.asarray([[0, 2.], [0, 4], [5, 0]])
            bR = tensor.as_tensor_variable(b)
-            self.failUnless(bR.data is b)
+            self.failIf(bR.data is b)
            self.failUnless(_is_dense(b))
            self.failUnless(_is_dense_variable(bR))

@@ -132,136 +143,117 @@ class T_conversion(unittest.TestCase):
    def setUp(self):
        utt.seed_rng()

-    def test0(self):
-        a = tensor.as_tensor_variable(numpy.random.rand(5))
-        s = csc_from_dense(a)
-        val = eval_outputs([s])
-        self.failUnless(str(val.dtype)=='float64')
-        self.failUnless(val.format == 'csc')
-
-    def test1(self):
-        a = tensor.as_tensor_variable(numpy.random.rand(5))
-        s = csr_from_dense(a)
-        val = eval_outputs([s])
-        self.failUnless(str(val.dtype)=='float64')
-        self.failUnless(val.format == 'csr')
-
-    def test2(self):
-        #call dense_from_sparse
-        for t in _mtypes:
-            s = t((2,5))
-            s = t(scipy.sparse.identity(5))
-            d = dense_from_sparse(s)
-            s[0,0] = 1.0
-            val = eval_outputs([d])
+    if 0:
+        def test0(self):
+            a = tensor.as_tensor_variable(numpy.random.rand(5))
+            s = csc_from_dense(a)
+            val = eval_outputs([s])
            self.failUnless(str(val.dtype)=='float64')
-            self.failUnless(numpy.all(val[0] == [1,0,0,0,0]))
+            self.failUnless(val.format == 'csc')

+    if 0:
+        def test1(self):
+            a = tensor.as_tensor_variable(numpy.random.rand(5))
+            s = csr_from_dense(a)
+            val = eval_outputs([s])
+            self.failUnless(str(val.dtype)=='float64')
+            self.failUnless(val.format == 'csr')
+
+    if 1:
+        def test2(self):
+            #call dense_from_sparse
+            for t in _mtypes:
+                s = t(scipy.sparse.identity(5))
+                d = dense_from_sparse(s)
+                # s should be copied into the graph as a constant
+                s[0,0] = 3.0 # changes s, but not the copy
+                val = eval_outputs([d])
+                return
+                self.failUnless(str(val.dtype)==s.dtype)
+                self.failUnless(numpy.all(val[0] == [1,0,0,0,0]))

 import scipy.sparse as sp
 class test_structureddot(unittest.TestCase):
    def setUp(self):
        utt.seed_rng()
+    def test_structureddot_csc_grad(self):
+
+        #shortcut: testing csc in float32, testing csr in float64
+
+        # allocate a random sparse matrix
+        spmat = sp.csc_matrix(random_lil((4,3), 'float32', 3))
+
+        mat = numpy.asarray(numpy.random.randn(3,2), 'float32')
+
+        def buildgraphCSC(spdata,sym_mat):
+            csc = CSC(spdata, spmat.indices[:spmat.size],
+                    spmat.indptr, spmat.shape)
+            assert csc.type.dtype == 'float32'
+            rval = structured_dot(csc, sym_mat)
+            assert rval.type.dtype == 'float32'
+            return rval
+
+        utt.verify_grad(buildgraphCSC, 
+                    [spmat.data, mat])
+
+    def test_structureddot_csr_grad(self):
+
+        #shortcut: testing csc in float32, testing csr in float64
+
+        # allocate a random sparse matrix
+        spmat = sp.csr_matrix(random_lil((4,3), 'float64', 3))
+
+        mat = numpy.asarray(numpy.random.randn(3,2), 'float64')
+
+        def buildgraph(spdata,sym_mat):
+            csr = CSR(spdata, spmat.indices[:spmat.size],
+                    spmat.indptr, spmat.shape)
+            assert csr.type.dtype == 'float64'
+            rval = structured_dot(csr, sym_mat)
+            assert rval.type.dtype == 'float64'
+            return rval
+
+        utt.verify_grad(buildgraph, 
+                    [spmat.data, mat])
+
+    def test_upcast(self):

-    def test_structuredot(self):
-        bsize = 2
        typenames = 'float32', 'int64', 'int8', 'int32', 'int16', 'float64', 'complex64', 'complex128'
-       
        for dense_dtype in typenames:
            for sparse_dtype in typenames:
-                #print >> sys.stderr, dense_dtype, sparse_dtype
-                # iterate for a few different random graph patterns
-                for i in range(10):
-                    spmat = sp.csc_matrix((4,6), dtype=sparse_dtype)
-                    for k in range(5):
-                        # set non-zeros in random locations (row x, col y)
-                        x = numpy.floor(numpy.random.rand()*spmat.shape[0])
-                        y = numpy.floor(numpy.random.rand()*spmat.shape[1])
-                        spmat[x,y] = numpy.random.rand()*10
-                    spmat = sp.csc_matrix(spmat)
-               
-                    kerns = tensor.Tensor(broadcastable=[False],
-                            dtype=sparse_dtype)('kerns')
-                    images = tensor.Tensor(broadcastable=[False, False],
-                            dtype=dense_dtype)('images')
-
-                    output_dtype = theano.scalar.upcast(sparse_dtype, dense_dtype)
-                    ##
-                    # Test compressed-sparse column matrices ###
-                    ##
-
-                    # build symbolic theano graph
-                    def buildgraphCSC(kerns,images):
-                        csc = CSC(kerns, spmat.indices[:spmat.size],
-                                spmat.indptr, spmat.shape)
-                        assert csc.type.dtype == sparse_dtype
-                        rval = structured_dot(csc, images.T)
-                        assert rval.type.dtype == output_dtype
-                        return rval
-
-                    out = buildgraphCSC(kerns,images)
-                    f = theano.function([kerns,images], out)
-
-                    # compute theano outputs
-                    kernvals = spmat.data[:spmat.size]
-                    imvals = 1.0 + 1.0 * numpy.array(
-                            numpy.arange(bsize*spmat.shape[1]).\
-                            reshape(bsize,spmat.shape[1]), dtype=dense_dtype)
-                    #print('dense_dtype=%s' % dense_dtype)
-                    #print('sparse_dtype=%s' % sparse_dtype)
-                    #print('i=%s' % i)
-                    print 'kerntype', str(kernvals.dtype), kernvals.dtype.num
-                    outvals = f(kernvals,imvals)
-                    print 'YAY'
-                    print spmat.todense()
-                    print imvals.T
-                    print "OUT1", outvals
-                    # compare to scipy
-                    c = spmat * (imvals.T)
-                    assert _is_dense(c)
-                    assert str(outvals.dtype) == output_dtype
-                    assert numpy.all(numpy.abs(outvals - 
-                        numpy.array(c, dtype=output_dtype)) < 1e-4)
-
-                    if (sparse_dtype.startswith('float') and
-                            dense_dtype.startswith('float')):
-                        utt.verify_grad(buildgraphCSC, 
-                                [kernvals, imvals])
-
-                    print 'BBB'
-
-                    ##
-                    # Test compressed-sparse row matrices ###
-                    ##
-                    spmat = spmat.tocsr()
-                    
-                    # build theano graph
-                    def buildgraphCSR(kerns,images):
-                        csr = CSR(kerns, spmat.indices[:spmat.size], spmat.indptr, spmat.shape)
-                        return structured_dot(csr, images.T)
-                    out = buildgraphCSR(kerns,images)
-                    f = theano.function([kerns,images], out)
-                    # compute theano output
-                    kernvals[:] = spmat.data[:spmat.size]
-                    #kernvals = numpy.empty(spmat.size, dtype=dense_dtype)
-                    imvals = 1.0 * numpy.arange(bsize*spmat.shape[1]).reshape(bsize,spmat.shape[1])
-                    print 'kerntype2', str(kernvals.dtype), kernvals.dtype.num
-                    outvals = f(kernvals,imvals)
-                    print 'YAYAGI'
-                    # compare to scipy
-                    c = spmat * (imvals.T)
-                    assert _is_dense(c)
-                    assert str(outvals.dtype) == output_dtype
-                    assert numpy.all(numpy.abs(outvals - 
-                                     numpy.array(c, dtype=output_dtype)) < 1e-4)
-
-                    # we could test more, but hopefully this suffices?
-                    if sparse_dtype.startswith('float') and dense_dtype.startswith('float'):
-                        utt.verify_grad( buildgraphCSR, [kernvals,imvals])
+                correct_dtype = theano.scalar.upcast(sparse_dtype, dense_dtype)
+                a = SparseType('csc', dtype=sparse_dtype)()
+                b = tensor.matrix(dtype=dense_dtype)
+                d = structured_dot(a,b)
+                assert d.type.dtype == correct_dtype
+
+                # compile and run a function
+
+                f = theano.function([a,b],d)
+
+                M,N,K,nnz = (4,3,5,3)
+                spmat = sp.csc_matrix(random_lil((M,N), sparse_dtype, nnz))
+                # the following madness is necessary to workaround
+                # an intc vs. int32 bug.
+                # The lil makes an intc on my computer when sparse_dtype
+                # is int32.
+                spmat.dtype = numpy.dtype(sparse_dtype)
+                mat = numpy.asarray(numpy.random.randn(N,K)*9, dtype=dense_dtype)
+                print 'DTYPES', sparse_dtype,dense_dtype
+                print 'sym types', a.type, b.type
+                print 'dtype strings', spmat.dtype, mat.dtype
+                print 'numpy dtype num', mat.dtype.num
+                print 'scipy dtype num', spmat.data.dtype.num
+                theano_result = f(spmat, mat)
+                scipy_result = spmat * mat
+                assert theano_result.shape == scipy_result.shape
+                assert theano_result.dtype == scipy_result.dtype
+                assert numpy.allclose(theano_result, scipy_result)
+

    def test_opt_unpack(self):
        kerns = tensor.Tensor(dtype='int64', broadcastable=[False])('kerns')
-        spmat = sp.csc_matrix((4,6), dtype='int64')
+        spmat = sp.lil_matrix((4,6), dtype='int64')
        for i in range(5):
            # set non-zeros in random locations (row x, col y)
            x = numpy.floor(numpy.random.rand()*spmat.shape[0])
@@ -292,5 +284,94 @@ class test_structureddot(unittest.TestCase):
        outvals = f(kernvals,imvals)
        print outvals

+    def test_csc_correct_output_faster_than_scipy(self):
+        sparse_dtype = 'float64'
+        dense_dtype = 'float64'
+
+        a = SparseType('csc', dtype=sparse_dtype)()
+        b = tensor.matrix(dtype=dense_dtype)
+        d = theano.dot(a,b)
+        f = theano.function([a,b], d, mode='FAST_RUN')
+
+        # technically we could be using DEBUG MODE to verify internal problems.
+        # in fact, if this test fails for correctness, then it would be good to use DEBUG_MODE
+        # to figure out where thigns go wrong.
+        # however, comparing FAST_RUN  with scipy is a quick way of ensuring all's well that
+        # ends well, and also lets us ensure that our speed optimizations are working.
+
+        print f.maker.mode
+
+        #print f.maker.env.toposort()
+
+        for M,N,K,nnz in [(4,3,2,3), 
+                (40,30,20,3),
+                (40,30,20,30),
+                (400,3000,200,6000),
+                ]:
+            spmat = sp.csc_matrix(random_lil((M,N), sparse_dtype, nnz))
+            mat = numpy.asarray(numpy.random.randn(N,K), dense_dtype)
+            t0 = time.time()
+            theano_result = f(spmat, mat)
+            t1 = time.time()
+            scipy_result = spmat * mat
+            t2 = time.time()
+
+            theano_time = t1-t0
+            scipy_time = t2-t1
+            #print theano_result
+            #print scipy_result
+            print 'theano took', theano_time,
+            print 'scipy took', scipy_time
+
+            # fail if Theano is slower than scipy by more than a certain amount
+            overhead_tol = 0.003 # seconds overall
+            overhead_rtol = 1.2 # times as long
+            self.failUnless(numpy.allclose(theano_result, scipy_result))
+            self.failIf(theano_time > overhead_rtol*scipy_time + overhead_tol)
+
+    def test_csr_correct_output_faster_than_scipy(self):
+
+        #contrast with test_grad, we put csr in float32, csc in float64
+
+        sparse_dtype = 'float32'
+        dense_dtype = 'float32'
+
+        a = SparseType('csr', dtype=sparse_dtype)()
+        b = tensor.matrix(dtype=dense_dtype)
+        d = theano.dot(a,b)
+        f = theano.function([a,b], d, mode='FAST_RUN')
+
+        # technically we could be using DEBUG MODE to verify internal problems.
+        # in fact, if this test fails for correctness, then it would be good to use DEBUG_MODE
+        # to figure out where thigns go wrong.
+        # however, comparing FAST_RUN  with scipy is a quick way of ensuring all's well that
+        # ends well, and also lets us ensure that our speed optimizations are working.
+
+        print f.maker.env.toposort()
+
+        for M,N,K,nnz in [(4,3,2,3), 
+                (40,30,20,3),
+                (40,30,20,30),
+                (400,3000,200,6000),
+                ]:
+            spmat = sp.csr_matrix(random_lil((M,N), sparse_dtype, nnz))
+            mat = numpy.asarray(numpy.random.randn(N,K), dense_dtype)
+            t0 = time.time()
+            theano_result = f(spmat, mat)
+            t1 = time.time()
+            scipy_result = spmat * mat
+            t2 = time.time()
+
+            theano_time = t1-t0
+            scipy_time = t2-t1
+            #print theano_result
+            #print scipy_result
+            print 'theano took', theano_time,
+            print 'scipy took', scipy_time
+            overhead_tol = 0.002 # seconds
+            overhead_rtol = 1.1 # times as long
+            self.failUnless(numpy.allclose(theano_result, scipy_result))
+            self.failIf(theano_time > overhead_rtol*scipy_time + overhead_tol)
+
 if __name__ == '__main__':
    unittest.main()
--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -237,7 +237,7 @@ def constant_or_value(x, rtype, name=None, ndim=None, dtype=None):
                x_shape = None
            return rtype(
                    TensorType(dtype = x_.dtype, broadcastable = bcastable, shape=x_shape),
-                    x_, name=name)
+                    x_.copy(), name=name)
        else:
            # leave the shape out of the type
            return rtype(TensorType(dtype = x_.dtype, broadcastable = bcastable), x_, name=name)