removed sparse CSC1-related optimization. it doesnt speed things up

1c2e05db · bergstra@ip05.m · e7d32ce6 · 1c2e05db
--- a/theano/sparse/basic.py
+++ b/theano/sparse/basic.py
@@ -906,158 +906,6 @@ class StructuredDotCSC(gof.Op):
        return rval
 sd_csc = StructuredDotCSC()

-class StructuredDotCSC1(gof.Op):
-    def __init__(self, avaltype):
-        self.avaltype = avaltype
-
-    def __eq__(self, other):
-        return type(self) == type(other) and self.avaltype == other.avaltype
-
-    def __hash__(self):
-        return hash(type(self)) ^ hash(self.avaltype)
-
-    def make_node(self, a_ind, a_ptr, a_nrows, b):
-        dtype_out = scalar.upcast(self.avaltype.dtype, b.type.dtype)
-        r = gof.Apply(self, [a_ind, a_ptr, a_nrows, b], 
-                [tensor.tensor(dtype_out, (False, False))])
-        return r
-
-    def perform(self, node, (a_ind, a_ptr, a_nrows, b), (out,)):
-        ones = numpy.asarray(numpy.ones_like(a_ind), dtype=self.avaltype.dtype)
-        a = sparse.csc_matrix((ones, a_ind, a_ptr), 
-                (a_nrows, b.shape[0]),
-                copy = False)
-        #out[0] = a.dot(b)
-        out[0] = a * b
-        assert _is_dense(out[0]) # scipy 0.7 automatically converts to dense
-
-    def c_code(self, node, name, (a_ind, a_ptr, a_nrows, b), (z,), sub):
-        """
-        C-implementation of the dot product of the sparse matrix A and matrix B.
-        @param a_ind: column indices of the non-null values (.indices of a scipy.csc_matrix)
-        @param a_ptr: a_ptr indicates col indices for col. i are in the range a_ptr[i]:a_ptr[i+1]
-        @param n_rows: number of rows of sparse matrix
-        @param b: dense matrix to perform dot product with, as in dot(a,b)
-        @param z: return value
-        @param sub: TODO, not too sure, something to do with weave probably
-        """
-
-        if node.inputs[3].type.dtype in ('complex64', 'complex128'):
-            raise NotImplementedError('Complex types are not supported for b')
-
-        typenum_z = node.outputs[0].type.dtype_specs()[-1] # retrieve dtype number
-        typenum_b = node.inputs[3].type.dtype_specs()[-1] # retrieve dtype number
-
-        rval = """
-
-        if (%(a_ind)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ind) != 1"); %(fail)s;}
-        if (%(a_ptr)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ptr) != 1"); %(fail)s;}
-        if (%(a_nrows)s->nd != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(nrows) != 0"); %(fail)s;}
-        if (%(b)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2"); %(fail)s;}
-
-        if (%(b)s->descr->type_num != %(typenum_b)s) {
-        PyErr_SetString(PyExc_NotImplementedError, "Invalid type for b"); %(fail)s;}
-
-        if (%(a_ind)s->descr->type_num != PyArray_INT32) {
-        PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32"); %(fail)s;}
-
-        if (%(a_ptr)s->descr->type_num != PyArray_INT32)
-        {PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32"); %(fail)s;}
-
-        if (%(a_nrows)s->descr->type_num != PyArray_INT32)
-        {PyErr_SetString(PyExc_NotImplementedError, "a_nrows dtype not INT32"); %(fail)s;}
-
-        if (%(a_ptr)s->dimensions[0] != %(b)s->dimensions[0]+1)
-        {PyErr_SetString(PyExc_NotImplementedError, "a's number of columns doesn't match b's rows"); %(fail)s;}
-
-        if ((!%(z)s)
-            || (%(z)s->dimensions[0] != ((npy_int32 *)%(a_nrows)s->data)[0])
-            || (%(z)s->dimensions[1] != %(b)s->dimensions[1])
-            )
-        {
-            if (%(z)s) Py_DECREF(%(z)s);
-            npy_intp dims[] = {0,0};
-            dims[0] = ((npy_int32 *)%(a_nrows)s->data)[0];
-            dims[1] = %(b)s->dimensions[1];
-            %(z)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, %(typenum_z)s);
-        }
-
-        {
-            // sparse array has size MxK, dense KxN, output MxN
-            npy_intp M = %(z)s->dimensions[0];
-            npy_intp N = %(z)s->dimensions[1];
-            npy_intp K = %(b)s->dimensions[0];
-
-            // strides tell you how many bytes to skip to go to next column/row entry
-            npy_intp Szm = %(z)s->strides[0] / %(z)s->descr->elsize;
-            npy_intp Szn = %(z)s->strides[1] / %(z)s->descr->elsize;
-            //npy_intp Sbm = %(b)s->strides[0] / %(b)s->descr->elsize;
-            npy_intp Sbn = %(b)s->strides[1] / %(b)s->descr->elsize;
-            npy_intp Sind = %(a_ind)s->strides[0] / %(a_ind)s->descr->elsize;
-            npy_intp Sptr = %(a_ptr)s->strides[0] / %(a_ptr)s->descr->elsize;
-
-            // pointers to access actual data in the arrays passed as params.
-            dtype_%(z)s*     __restrict__ Dz   = (dtype_%(z)s*)%(z)s->data;
-            const npy_int32 * __restrict__ Dind = (npy_int32*)%(a_ind)s->data;
-            const npy_int32 * __restrict__ Dptr = (npy_int32*)%(a_ptr)s->data;
-
-            //npy_intp nnz = %(a_ind)s->dimensions[0];
-
-            //clear the output array
-            for (npy_intp m = 0; m < M; ++m)
-            {
-                for (npy_intp n = 0; n < N; ++n)
-                {
-                    Dz[m*Szm + n*Szn] = 0;
-                }
-            }
-
-            //iterate over the sparse array, making the most of an entry wherever we find it.
-            //
-            // Normal matrix matrix multiply: A MxK, B KxN =>  Z = AB
-            // for m
-            //   for n
-            //     for k
-            //        z[m,n] += a[m,k] * b[k,n]
-            // Here instead: Z = 
-            // for k
-            //   for m (sparse)
-            //     for n
-            //        z[m,n] += a[m,k] * b[k,n]
-
-            // loop over inner dimension
-            for (npy_int32 k = 0; k < K; ++k)
-            {
-                // get pointer to k-th row of dense matrix
-                const dtype_%(b)s* __restrict__ bk = (dtype_%(b)s*)(%(b)s->data + %(b)s->strides[0] * k);
-                
-                // loop over sparse column indices through index pointer array 
-                // (amounts to looping over rows M of sparse matrix)
-                for (npy_int32 m_idx = Dptr[k * Sptr]; m_idx < Dptr[(k+1) * Sptr]; ++m_idx)
-                {
-                    npy_int32 m = Dind[m_idx * Sind]; // row index of non-null value for column K
-    
-                    // pointer to m-th row of the output matrix Z
-                    dtype_%(z)s* __restrict__ zm = (dtype_%(z)s*)(%(z)s->data + %(z)s->strides[0] * m);
-
-                    //RESOLVE: a.shape[0] equals z.shape[0], why is this not an equality constraint?
-                    if (m >= %(z)s->dimensions[0]) 
-                    {PyErr_SetString(PyExc_NotImplementedError, "illegal row index in a"); %(fail)s;}
-
-                    // loop over final dimension (cols of dense matrix) and perform dot product
-                    for(npy_int32 n = 0; n < N; ++n)
-                    {
-                        zm[n*Szn] += bk[n*Sbn];
-                    }
-                }
-            }
-        }
-        """% dict(locals(), **sub)
-
-        # print rval
-
-        return rval
-
 class StructuredDotCSR(gof.Op):
    def make_node(self, a_val, a_ind, a_ptr, b):
        self.dtype_out = scalar.upcast(a_val.type.dtype, b.type.dtype)
@@ -1204,16 +1052,6 @@ def local_structured_dot(node):
    return False
 register_specialize(local_structured_dot)

-@gof.local_optimizer([_structured_dot])
-def local_structureddotcsc_w_ones(node):
-    if node.op == sd_csc:
-        aval, aind, aptr, ashp, b = node.inputs
-        if isinstance(aval, gof.Constant):
-            if numpy.all(aval.data == 1):
-                return [StructuredDotCSC1(aval.type)(aind, aptr, ashp, b)]
-    return False
-register_specialize(local_structureddotcsc_w_ones)
-
 def structured_dot_grad(sparse_A, dense_B, ga):
    if sparse_A.type.format in ('csc','csr'):