added new ops in sanbox

2227af02 · Yann N. Dauphin · adbc78dd · 2227af02
--- a/theano/sparse/sandbox/sp2.py
+++ b/theano/sparse/sandbox/sp2.py
+from theano.sparse.basic import * # To facilitate later merge into sparse module
+from theano.sparse.basic import _is_sparse, _is_sparse_variable, \
+    _is_dense_variable, _is_sparse, _is_dense, _kmap_eq, _kmap_hash
+class Cast(gof.op.Op):
+    def __init__(self, out_type):
+        self.out_type = out_type
+    def __eq__(self, other):
+        return (type(self) == type(other)) and self.out_type == other.out_type
+    def __hash__(self):
+        return hash(type(self)) ^ hash(self.out_type)
+    def make_node(self, x):
+        x = as_sparse_variable(x)
+        return gof.Apply(self, [x],
+            [SparseType(dtype=self.out_type, format=x.format).make_variable()])
+    def perform(self, node, (x, ), (out, )):
+        assert _is_sparse(x)
+        out[0] = x
+        out[0].data = numpy.asarray(out[0].data, dtype=self.out_type)
+fcast = Cast('float32')
+dcast = Cast('float64')
+class Poisson(gof.op.Op):
+    def __eq__(self, other):
+        return (type(self) == type(other))
+    def __hash__(self):
+        return hash(type(self))
+    def make_node(self, x):
+        x = as_sparse_variable(x)
+        return gof.Apply(self, [x], [x.type()])
+    def perform(self, node, (x, ), (out, )):
+        assert _is_sparse(x)
+        out[0] = x.copy()
+        out[0].data = numpy.asarray(numpy.random.poisson(out[0].data), dtype=x.dtype)
+        out[0].eliminate_zeros()
+poisson = Poisson()
+class Multinomial(gof.op.Op):
+    def __eq__(self, other):
+        return (type(self) == type(other))
+    def __hash__(self):
+        return hash(type(self))
+    def make_node(self, n, p):
+        n = tensor.as_tensor_variable(n)
+        p = as_sparse_variable(p)
+        return gof.Apply(self, [n, p], [p.type()])
+    def perform(self, node, (n, p), (out, )):
+        assert _is_sparse(p)
+        if p.format != 'csr':
+            raise NotImplemented()
+        out[0] = p.copy()
+        for i in xrange(p.shape[0]):
+            k, l = p.indptr[i], p.indptr[i+1]
+            out[0].data[k:l] = numpy.random.multinomial(n[i], p.data[k:l])
+multinomial = Multinomial()
+class EliminateZeros(gof.op.Op):
+    def __eq__(self, other):
+        return (type(self) == type(other))
+    def __hash__(self):
+        return hash(type(self))
+    def make_node(self, x):
+        x = as_sparse_variable(x)
+        return gof.Apply(self, [x], [x.type()])
+    def perform(self, node, (x, ), (out, )):
+        assert _is_sparse(x)
+        out[0] = x.copy()
+        out[0].eliminate_zeros()
+eliminate_zeros = EliminateZeros()
+class Sum(gof.op.Op):
+    def __eq__(self, other):
+        return (type(self) == type(other))
+    def __hash__(self):
+        return hash(type(self))
+    def make_node(self, x, a):
+        x = as_sparse_variable(x)
+        a = tensor.as_tensor_variable(a)
+        return gof.Apply(self, [x, a], [tensor.TensorType(dtype = x.type.dtype,
+                        broadcastable = (False,)).make_variable()])
+    def perform(self, node, (x, a), (out, )):
+        assert _is_sparse(x)
+        out[0] = numpy.asarray(x.sum(a), dtype=x.dtype).flatten()
+sum = Sum()
+class Binomial(gof.op.Op):
+    def __init__(self, format, dtype):
+        self.format = format
+        self.dtype = dtype
+    def __eq__(self, other):
+        return (type(self) == type(other)) and self.format == other.format and \
+            self.dtype == other.dtype
+    def __hash__(self):
+        return hash(type(self)) ^ hash(self.format) ^ hash(self.dtype)
+    def make_node(self, n, p, shape):
+        n = tensor.as_tensor_variable(n)
+        p = tensor.as_tensor_variable(p)
+        shape = tensor.as_tensor_variable(shape)
+        return gof.Apply(self, [n, p, shape], [SparseType(dtype = self.dtype,
+                                 format = self.format).make_variable()])
+    def perform(self, node, (n, p, shape, ), (out, )):
+        N = n * p * shape[0] * shape[1]
+        data = numpy.ones(N, dtype=self.dtype)
+        row = numpy.random.randint(0, shape[0], N)
+        col = numpy.random.randint(0, shape[1], N)
+        res = scipy.sparse.coo_matrix((data, (row, col)), shape=shape)
+        out[0] = getattr(res, 'to' + self.format)()
+        out[0].data = numpy.ones_like(out[0].data)
+csr_fbinomial = Binomial('csr', 'float32')
+csc_fbinomial = Binomial('csc', 'float32')
+csr_dbinomial = Binomial('csr', 'float64')
+csc_dbinomial = Binomial('csc', 'float64')
+def structured_sigmoid(x):
+    """
+    Element-wise sigmoid function only to the non-zero elements.
+    """
+    x = as_sparse_variable(x)
+    x_data, x_ind, x_ptr, x_shape = csm_properties(x)
+    x_data = tensor.nnet.sigmoid(x_data)
+    return CSR(x_data, x_ind, x_ptr, x_shape)
+def structured_exp(x):
+    """
+    Element-wise exponential function to the non-zero elements.
+    """
+    x = as_sparse_variable(x)
+    x_data, x_ind, x_ptr, x_shape = csm_properties(x)
+    x_data = tensor.exp(x_data)
+    return CSR(x_data, x_ind, x_ptr, x_shape)
+def structured_pow(x, y):
+    """
+    Element-wise power function only to non-zero elements.
+    """
+    x = as_sparse_variable(x)
+    y = tensor.as_tensor_variable(y)
+    x_data, x_ind, x_ptr, x_shape = csm_properties(x)
+    x_data = tensor.pow(x_data, y)
+    return CSR(x_data, x_ind, x_ptr, x_shape)
+def structured_minimum(x, y):
+    """
+    Element-wise minimum function only to non-zero elements.
+    """
+    x = as_sparse_variable(x)
+    y = tensor.as_tensor_variable(y)
+    x_data, x_ind, x_ptr, x_shape = csm_properties(x)
+    x_data = tensor.minimum(x_data, y)
+    return CSR(x_data, x_ind, x_ptr, x_shape)
+class StructuredAddSV(gof.op.Op):
+    '''Structured addition of a sparse matrix and a dense vector.
+    The elements of the vector are are only added to the corresponding
+    non-zero elements. Therefore, this operation outputs another sparse
+    matrix.'''
+    def __eq__(self, other):
+        return (type(self) == type(other))
+    def __hash__(self):
+        return hash(type(self))
+    def make_node(self, x, y):
+        x = as_sparse_variable(x)
+        y = tensor.as_tensor_variable(y)
+        assert y.type.ndim == 1
+        if x.type.dtype != y.type.dtype:
+            raise NotImplementedError()
+        return gof.Apply(self,
+                         [x, y],
+                         [SparseType(dtype = x.type.dtype,
+                                 format = x.type.format).make_variable()])
+    def perform(self, node, (x, y), (out, )):
+        assert _is_sparse(x) and not _is_sparse(y)
+        assert x.shape[1] == y.shape[0]
+        out[0] = x.__class__(x + (x.toarray() != 0) * y)
+    def grad(self, (x, y), (gz,)):
+        assert _is_sparse_variable(x) and _is_sparse_variable(y)
+        assert _is_sparse_variable(gz)
+        return gz, gz
+structured_add_s_v = StructuredAddSV()
+class StrucutedAddSVCSR(gof.Op):
+    def __eq__(self, other):
+        return (type(self) == type(other))
+    def __hash__(self):
+        return hash(type(self))
+    def make_node(self, a_data, a_indices, a_indptr, b):
+        assert b.type.ndim == 1
+        return gof.Apply(self, [a_data, a_indices, a_indptr, b],
+                               [tensor.tensor(b.dtype, (False,))])
+    def c_code(self, node, name, (_data, _indices, _indptr, _b,), (_zout, ), sub):
+        if node.inputs[0].type.dtype in ('complex64', 'complex128'):
+            raise NotImplementedError('Complex types are not supported for a')
+        if node.inputs[3].type.dtype in ('complex64', 'complex128'):
+            raise NotImplementedError('Complex types are not supported for b')
+        return """
+        if (%(_b)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 1"); %(fail)s;}
+        if (%(_data)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(data) != 1"); %(fail)s;}
+        if (%(_indices)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1"); %(fail)s;}
+        if (%(_indptr)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1"); %(fail)s;}
+        if( %(_indices)s->descr->type_num != PyArray_INT32) {
+        PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;}
+        if( %(_indptr)s->descr->type_num != PyArray_INT32)
+        {PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
+        if (!%(_zout)s)
+        {
+            %(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, %(_indices)s->dimensions, %(_b)s->descr->type_num);
+        }
+        if (%(_zout)s->dimensions[0] != %(_indices)s->dimensions[0])
+        {
+            PyErr_SetString(PyExc_NotImplementedError, "somehow _zout got the wrong size.. and I don't know how to resize it.");
+            %(fail)s;
+        }
+        {   //makes it compile even though labels jump over variable definitions.
+            const npy_intp nnz = %(_indices)s->dimensions[0];
+            const npy_intp N =  %(_indptr)s->dimensions[0]-1; //TODO: error checking with this
+            const dtype_%(_data)s * const __restrict__ data = (dtype_%(_data)s*)%(_data)s->data;
+            const npy_int32 * const __restrict__ indptr = (npy_int32 *)%(_indptr)s->data;
+            const npy_int32 * const __restrict__ indices = (npy_int32 *)%(_indices)s->data;
+            const dtype_%(_b)s* __restrict__ Db = (dtype_%(_b)s*)%(_b)s->data;
+            dtype_%(_zout)s * const __restrict__ zout = (dtype_%(_zout)s*)%(_zout)s->data;
+            const npy_intp Sb = %(_b)s->strides[0] / %(_b)s->descr->elsize;
+            // loop over columns
+            for (npy_int32 j = 0; j < N; ++j)
+            {
+                // for each non-null value in the sparse column
+                for (npy_int32 i_idx = indptr[j]; i_idx < indptr[j+1]; ++i_idx)
+                {
+                    // extract row index of non-null value
+                    npy_int32 i = indices[i_idx];
+                    // write resulting gradient to sparse output
+                    zout[i_idx] = data[i_idx] + Db[i * Sb];
+                }
+            }
+        }
+        """% dict(locals(), **sub)
+structured_add_s_v_csr = StrucutedAddSVCSR()
+@gof.local_optimizer([structured_add_s_v])
+def local_structured_add_s_v(node):
+    if node.op == structured_add_s_v:
+        x, y = node.inputs
+        x_is_sparse_variable = _is_sparse_variable(x)
+        y_is_sparse_variable = _is_sparse_variable(y)
+        if x_is_sparse_variable:
+          svar = x
+          dvar = y
+        else:
+          svar = y
+          dvar = x
+        if dvar.type.ndim != 1:
+            return False
+        elif svar.type.format == 'csr':
+          CSx = CSR
+          structured_add_s_v_csx = structured_add_s_v_csr
+        else:
+            raise NotImplemented()
+        s_val, s_ind, s_ptr, s_shape = csm_properties(svar)
+        c_data = structured_add_s_v_csx(s_val, s_ind, s_ptr, dvar)
+        return [CSx(c_data, s_ind, s_ptr, s_shape)]
+    return False
+register_specialize(local_structured_add_s_v)
+class SamplingDot(gof.op.Op):
+    """
+    Operand for calculating the dot product DOT(X, Y) = Z when you only want to calculate
+    a subset of Z. It is equivalent to P o (X . Y) where o is the element-wise product, X and Y operands of
+    the dot product and P is a matrix that contains 1 when the corresponding element of Z should be calculated
+    and 0 when it shouldn't. Note that SamplingDot has a different interface than DOT because SamplingDot
+    requires X to be a MxK matrix while Y is a NxK matrix instead of the usual KxN matrix.
+    It will work if the pattern is not binary value, but if the pattern doesn't have a high sparsity proportion
+    it will be slower then a more optimized dot followed by a normal elemwise multiplication.
+    """
+    def __eq__(self, other):
+        return type(self) == type(other)
+    def __hash__(self):
+        return hash(type(self))
+    def __str__(self):
+        return 'SamplingDot'
+    def make_node(self, x, y, p):
+        x = tensor.as_tensor_variable(x)
+        y = tensor.as_tensor_variable(y)
+        if not _is_sparse_variable(p):
+            raise TypeError(p)
+        dtype_out = scalar.upcast(x.type.dtype, y.type.dtype, p.type.dtype)
+        return gof.Apply(self, [x, y, p], [p.type()])
+    def perform(self, node, (x, y, p), (out,)):
+        if _is_sparse_variable(x):
+            raise TypeError(x)
+        if _is_sparse_variable(y):
+            raise TypeError(y)
+        if not _is_sparse(p):
+            raise TypeError(p)
+        rval = p.__class__(p.multiply(numpy.dot(x, y.T)))
+        out[0] = rval
+    def grad(self, (x, y, p), (gz,)):
+        rval = [
+            dot(gz, y),
+            dot(gz.T, x),
+            None
+        ]
+        return rval
+sampling_dot = SamplingDot()
+class SamplingDotCsr(gof.Op):
+    """
+    Optimized SamplingDot when the pattern P is a CSR matrix.
+    If we have the input of mixed dtype, we insert cast elemwise in the graph
+    to be able to call blas function as they don't allow mixed dtype.
+    """
+    def __eq__(self, other):
+        return type(self) == type(other)
+    def __hash__(self):
+        return hash(type(self))
+    def __str__(self):
+        return 'SamplingDot{Csr}'
+    def make_node(self, x, y, p_data, p_ind, p_ptr, p_ncols):
+        x = tensor.as_tensor_variable(x)
+        y = tensor.as_tensor_variable(y)
+        p_data = tensor.as_tensor_variable(p_data)
+        p_ind = tensor.as_tensor_variable(p_ind)
+        p_ptr = tensor.as_tensor_variable(p_ptr)
+        p_ncols = tensor.as_tensor_variable(p_ncols)
+        assert p_ncols.dtype == 'int32'
+        dtype_out = scalar.upcast(x.type.dtype, y.type.dtype, p_data.type.dtype)
+        dot_out = scalar.upcast(x.type.dtype, y.type.dtype)
+        # We call blas ?dot function that take only param of the same type
+        x = tensor.cast(x, dot_out)
+        y = tensor.cast(y, dot_out)
+        return gof.Apply(self, [x, y, p_data, p_ind, p_ptr, p_ncols], [
+            tensor.tensor(dtype=dtype_out, broadcastable=(False,)),
+            tensor.tensor(dtype=p_ind.type.dtype, broadcastable=(False,)),
+            tensor.tensor(dtype=p_ptr.type.dtype, broadcastable=(False,))
+        ])
+    def c_support_code(self):
+        return blas.blas_header_text()
+    def c_libraries(self):
+        import pdb; pdb.set_trace()
+        return blas.ldflags()
+    def c_compile_args(self):
+        return blas.ldflags(libs=False, flags=True)
+    def c_lib_dirs(self):
+        return blas.ldflags(libs=False, libs_dir=True)
+    def c_header_dirs(self):
+        return blas.ldflags(libs=False, include_dir=True)
+    def c_code(self, node, name, (x, y, p_data, p_ind, p_ptr, p_ncols), (z_data, z_ind, z_ptr), sub):
+        if node.inputs[0].type.dtype in ('complex64', 'complex128'):
+            raise NotImplementedError('Complex types are not supported for x')
+        if node.inputs[1].type.dtype in ('complex64', 'complex128'):
+            raise NotImplementedError('Complex types are not supported for y')
+        if node.inputs[2].type.dtype in ('complex64', 'complex128'):
+            raise NotImplementedError('Complex types are not supported for pattern')
+        dot_out = scalar.upcast(node.inputs[0].type.dtype, node.inputs[0].type.dtype)
+        if dot_out == "float32":
+            conv_type = "float"
+            cdot = "sdot_sub_"
+        else:
+            conv_type = "double"
+            cdot = "ddot_sub_"
+        typenum_x = node.inputs[0].type.dtype_specs()[-1] # retrieve dtype number
+        typenum_y = node.inputs[1].type.dtype_specs()[-1] # retrieve dtype number
+        typenum_p = node.inputs[2].type.dtype_specs()[-1] # retrieve dtype number
+        typenum_zd = tensor.TensorType(node.outputs[0].dtype, []).dtype_specs()[-1] # retrieve dtype number
+        typenum_zi = tensor.TensorType(node.outputs[1].dtype, []).dtype_specs()[-1] # retrieve dtype number
+        typenum_zp = tensor.TensorType(node.outputs[2].dtype, []).dtype_specs()[-1] # retrieve dtype number
+        rval = """
+        if (%(x)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(x) != 2"); %(fail)s;}
+        if (%(y)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(y) != 2"); %(fail)s;}
+        if (%(x)s->descr->type_num != %(typenum_x)s) {
+        PyErr_SetString(PyExc_NotImplementedError, "Invalid type for x"); %(fail)s;}
+        if (%(y)s->descr->type_num != %(typenum_y)s) {
+        PyErr_SetString(PyExc_NotImplementedError, "Invalid type for y"); %(fail)s;}
+        if (%(p_data)s->descr->type_num != %(typenum_p)s) {
+        PyErr_SetString(PyExc_NotImplementedError, "Invalid type for pattern"); %(fail)s;}
+        if (%(x)s->dimensions[1] != %(y)s->dimensions[1])
+        {PyErr_SetString(PyExc_NotImplementedError, "x's number of columns doesn't match y's rows! Note: sampling_dot is different from dot because y is assumed to be transposed."); %(fail)s;}
+        if (%(y)s->dimensions[0] != ((npy_int32 *)%(p_ncols)s->data)[0] || %(x)s->dimensions[0] != (%(p_ptr)s->dimensions[0] - 1))
+        {PyErr_SetString(PyExc_NotImplementedError, "The dimension of the pattern and the output must match"); %(fail)s;}
+        // Allocate output
+        if (!%(z_data)s
+            || (%(z_data)s->dimensions[0] != %(p_data)s->dimensions[0])
+            || (%(z_data)s->descr->type_num != %(typenum_zd)s)) {
+            {Py_XDECREF(%(z_data)s);}
+            npy_intp dims[] = {0};
+            dims[0] = %(p_data)s->dimensions[0];
+            %(z_data)s = (PyArrayObject*) PyArray_SimpleNew(1, dims, %(typenum_zd)s);
+        }
+        if (!%(z_ind)s
+            || (%(z_ind)s->dimensions[0] != %(p_ind)s->dimensions[0])
+            || (%(z_ind)s->descr->type_num != %(typenum_zi)s)) {
+            {Py_XDECREF(%(z_ind)s);}
+            npy_intp dims[] = {0};
+            dims[0] = %(p_ind)s->dimensions[0];
+            %(z_ind)s = (PyArrayObject*) PyArray_SimpleNew(1, dims, %(typenum_zi)s);
+        }
+        if (!%(z_ptr)s
+            || (%(z_ptr)s->dimensions[0] != %(p_ptr)s->dimensions[0])
+            || (%(z_ptr)s->descr->type_num != %(typenum_zp)s)) {
+            {Py_XDECREF(%(z_ptr)s);}
+            npy_intp dims[] = {0};
+            dims[0] = %(p_ptr)s->dimensions[0];
+            %(z_ptr)s = (PyArrayObject*) PyArray_SimpleNew(1, dims, %(typenum_zp)s);
+        }
+        {
+            // Product of MxK and NxK, output MxN
+            npy_intp M = %(x)s->dimensions[0];
+            npy_intp N = %(y)s->dimensions[0];
+            npy_intp K = %(y)s->dimensions[1];
+            // pointers to access actual data in the arrays passed as params.
+            const dtype_%(x)s* __restrict__ Dx = (dtype_%(x)s*)%(x)s->data;
+            const dtype_%(y)s* __restrict__ Dy = (dtype_%(y)s*)%(y)s->data;
+            const dtype_%(p_data)s* __restrict__ Dpd = (dtype_%(p_data)s*)%(p_data)s->data;
+            const dtype_%(p_ind)s* __restrict__ Dpi = (dtype_%(p_ind)s*)%(p_ind)s->data;
+            const dtype_%(p_ptr)s* __restrict__ Dpp = (dtype_%(p_ptr)s*)%(p_ptr)s->data;
+            dtype_%(z_data)s* __restrict__ Dzd = (dtype_%(z_data)s*)%(z_data)s->data;
+            dtype_%(z_ind)s* __restrict__ Dzi = (dtype_%(z_ind)s*)%(z_ind)s->data;
+            dtype_%(z_ptr)s* __restrict__ Dzp = (dtype_%(z_ptr)s*)%(z_ptr)s->data;
+            const npy_intp Sdx = %(x)s->strides[1]/%(x)s->descr->elsize;
+            const npy_intp Sdy = %(y)s->strides[1]/%(y)s->descr->elsize;
+            const npy_intp Sdpd = %(p_data)s->strides[0] / %(p_data)s->descr->elsize;
+            const npy_intp Sdpi = %(p_ind)s->strides[0] / %(p_ind)s->descr->elsize;
+            const npy_intp Sdpp = %(p_ptr)s->strides[0] / %(p_ptr)s->descr->elsize;
+            const npy_intp Sdzd = %(z_data)s->strides[0] / %(z_data)s->descr->elsize;
+            const npy_intp Sdzi = %(z_ind)s->strides[0] / %(z_ind)s->descr->elsize;
+            const npy_intp Sdzp = %(z_ptr)s->strides[0] / %(z_ptr)s->descr->elsize;
+            memcpy(Dzi, Dpi, %(p_ind)s->dimensions[0]*sizeof(dtype_%(p_ind)s));
+            memcpy(Dzp, Dpp, %(p_ptr)s->dimensions[0]*sizeof(dtype_%(p_ptr)s));
+            for (npy_int32 m = 0; m < M; ++m) {
+                for (npy_int32 n_idx = Dpp[m * Sdpp]; n_idx < Dpp[(m+1)*Sdpp]; ++n_idx) {
+                    const npy_int32 n = Dpi[n_idx * Sdpi]; // row index of non-null value for column K
+                    const dtype_%(x)s* x_row = (dtype_%(x)s*)(%(x)s->data + %(x)s->strides[0] * m);
+                    const dtype_%(y)s* y_col = (dtype_%(y)s*)(%(y)s->data + %(y)s->strides[0] * n);
+                    %(cdot)s((int*)&K, (const %(conv_type)s*)x_row, (int*)&Sdx, (const %(conv_type)s*)y_col, (int*)&Sdy, &Dzd[n_idx * Sdzd]);
+                    Dzd[n_idx * Sdzd] *= Dpd[n_idx * Sdpd];
+                }
+            }            
+        }
+        """% dict(locals(), **sub)
+        return rval
+sampling_dot_csr = SamplingDotCsr()
+# register a specialization to replace SamplingDot -> SamplingDotCsr
+@gof.local_optimizer([sampling_dot])
+def local_sampling_dot_csr(node):
+    if node.op == sampling_dot:
+        x, y, p = node.inputs
+        if p.type.format == 'csr':
+            p_data, p_ind, p_ptr, p_shape = csm_properties(p)
+            z_data, z_ind, z_ptr = sampling_dot_csr(x, y, p_data,
+                p_ind, p_ptr, p_shape[1])
+            return [CSR(z_data, z_ind, z_ptr, p_shape)]
+    return False
+register_specialize(local_sampling_dot_csr, name='local_sampling_dot_csr')