提交 3f031546 authored 作者: James Bergstra's avatar James Bergstra

changes to sparse, more types supported

上级 48240dcd
......@@ -15,6 +15,7 @@ from theano.printing import Print
from .. import gof
from .. import tensor
from .. import compile
from .. import scalar
#TODO: move this decorator to the compile submodule
def register_specialize(lopt, *tags, **kwargs):
......@@ -138,7 +139,7 @@ class SparseType(gof.Type):
'csr' : sparse.csr_matrix,
'csc' : sparse.csc_matrix
}
dtype_set = set(['int', 'int32', 'int64', 'float32', 'float64'])
dtype_set = set(['int', 'int8', 'int16','int32', 'int64', 'float32', 'float64', 'complex64','complex128'])
ndim = 2
def __init__(self, format, dtype = 'float64'):
......@@ -220,10 +221,26 @@ class SparseVariable(gof.Variable, _sparse_py_operators):
dtype = property(lambda self: self.type.dtype)
format = property(lambda self: self.type.format)
class SparseConstantSignature(tuple):
def __eq__(self, other):
(a, b), (x,y) = self, other
return a == x \
and (b.dtype == y.dtype)\
and (type(b) == type(y))\
and (b.shape == y.shape)\
and (abs(b-y).sum() < 1e-6 * b.nnz)
def __hash__(self):
(a,b) = self
return hash(type(self)) ^ hash(a) ^ hash(type(b))
class SparseConstant(gof.Constant, _sparse_py_operators):
dtype = property(lambda self: self.type.dtype)
format = property(lambda self: self.type.format)
def signature(self):
assert self.data is not None
return SparseConstantSignature((self.type, self.data))
class SparseValue(gof.Value, _sparse_py_operators):
dtype = property(lambda self: self.type.dtype)
format = property(lambda self: self.type.format)
......@@ -269,8 +286,7 @@ class CSMProperties(gof.Op):
return [CSM('csc')(g_data, indices, indptr, shape)]
else:
return [CSR('csm')(g_data, indices, indptr, shape)]
def csm_properties(csm): return CSMProperties()(csm)
csm_properties = CSMProperties() #don't make this a function or it breaks some optimizations below
def csm_data(csm): return csm_properties(csm)[0]
def csm_indices(csm): return csm_properties(csm)[1]
def csm_indptr(csm): return csm_properties(csm)[2]
......@@ -322,18 +338,24 @@ class CSM(gof.Op):
"""
data = tensor.as_tensor_variable(data)
if not isinstance(indices, tensor.TensorVariable):
indices = numpy.asarray(indices, dtype='int32')
if not isinstance(indptr, tensor.TensorVariable):
indptr = numpy.asarray(indptr, dtype='int32')
if not isinstance(shape, tensor.TensorVariable):
shape = numpy.asarray(shape, dtype='int32')
indices = tensor.as_tensor_variable(indices)
indptr = tensor.as_tensor_variable(indptr)
shape = tensor.as_tensor_variable(shape)
if data.type.ndim != 1:
raise TypeError('data argument must be a vector', data.type)
if indices.type not in tensor.int_vector_types:
raise TypeError('indices must be vector of integers')
if indptr.type not in tensor.int_vector_types:
raise TypeError('indices must be vector of integers')
if shape.type not in tensor.int_vector_types:
raise TypeError('n_rows must be integer type')
if indices.type != tensor.ivector:
raise TypeError('indices must be vector of integers', indices)
if indptr.type != tensor.ivector:
raise TypeError('indices must be vector of integers', indptr)
if shape.type != tensor.ivector:
raise TypeError('n_rows must be integer type', shape)
return gof.Apply(self,
[data, indices, indptr, shape],
......@@ -342,8 +364,6 @@ class CSM(gof.Op):
def perform(self, node, (data, indices, indptr, shape), (out,)):
"""Build a csc_matrix"""
#assert len(data.flatten()) == len(indices.flatten())
# for efficiency, if remap does nothing, then do not apply it
if self.kmap is not None:
data = data[self.kmap]
......@@ -389,7 +409,6 @@ class CSMGrad(gof.op.Op):
def __hash__(self):
return 82345 ^ hash(type(self)) ^ _kmap_hash(self.kmap)
def make_node(self, data, gout_data, gout_indices):
g_data = data.type()
return gof.Apply(self, [data, gout_data, gout_indices], [g_data])
......@@ -668,11 +687,10 @@ class StructuredDot(gof.Op):
The output is presumed to be a dense matrix, and is represented by a TensorType instance.
"""
def make_node(self, a, b):
assert a.type.dtype == b.type.dtype
if type(a) is not SparseVariable and type(a) is not SparseConstant:
raise TypeError('First argument must be of type SparseVariable or SparseConstant');
return gof.Apply(self, [a,b], [tensor.tensor(a.type.dtype, (False, False))])
dtype_out = scalar.upcast(a.type.dtype, b.type.dtype)
return gof.Apply(self, [a,b], [tensor.tensor(dtype_out, (False, False))])
def perform(self, node, (a,b), (out,)):
if a.shape[1] != b.shape[0]:
......@@ -696,7 +714,7 @@ class StructuredDot(gof.Op):
else:
raise Exception("a.shape=%s, b.shape=%s, variable.shape=%s ??? I have no idea why")
## Commenting this out because variable should be a numpy.ndarray since the assert above
## Commenting this out because variable should be a numpy.ndarray since the "assert _is_dense(variable)" above
## (JB 20090109)
# out[0] = numpy.asarray(variable) #TODO: fix this really bad implementation
#
......@@ -714,6 +732,7 @@ def structured_dot(x, y):
"""
@todo: Maybe the triple-transposition formulation (when x is dense)
is slow. See if there is a direct way to do this.
(JB 20090528: Transposing tensors and sparse matrices is constant-time, inplace, and fast.)
"""
if hasattr(x, 'getnnz'): x = as_sparse_variable(x)
if hasattr(y, 'getnnz'): y = as_sparse_variable(y)
......@@ -732,9 +751,9 @@ def structured_dot(x, y):
class StructuredDotCSC(gof.Op):
def make_node(self, a_val, a_ind, a_ptr, a_nrows, b):
assert a_val.type.dtype == b.type.dtype
dtype_out = scalar.upcast(a_val.type.dtype, b.type.dtype)
r = gof.Apply(self, [a_val, a_ind, a_ptr, a_nrows, b],
[tensor.tensor(a_val.type.dtype, (False, False))])
[tensor.tensor(dtype_out, (False, False))])
return r
def perform(self, node, (a_val, a_ind, a_ptr, a_nrows, b), (out,)):
......@@ -756,15 +775,29 @@ class StructuredDotCSC(gof.Op):
@param z: return value
@param sub: TODO, not too sure, something to do with weave probably
"""
return """
if node.inputs[0].type.dtype in ('complex64', 'complex128'):
raise NotImplementedError('Complex types are not supported for a_val')
if node.inputs[4].type.dtype in ('complex64', 'complex128'):
raise NotImplementedError('Complex types are not supported for b')
typenum_z = node.outputs[0].type.dtype_specs()[-1] # retrieve dtype number
typenum_a_val = node.inputs[0].type.dtype_specs()[-1] # retrieve dtype number
typenum_b = node.inputs[4].type.dtype_specs()[-1] # retrieve dtype number
rval = """
if (%(a_val)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_val) != 1"); %(fail)s;}
if (%(a_ind)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ind) != 1"); %(fail)s;}
if (%(a_ptr)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ptr) != 1"); %(fail)s;}
if (%(a_nrows)s->nd != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(nrows) != 0"); %(fail)s;}
if (%(b)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2"); %(fail)s;}
if (%(a_val)s->descr->type_num != PyArray_DOUBLE)
{PyErr_SetString(PyExc_NotImplementedError, "a_val dtype not NPY_DOUBLE"); %(fail)s;}
if (%(a_val)s->descr->type_num != %(typenum_a_val)s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for a_val"); %(fail)s;}
if (%(b)s->descr->type_num != %(typenum_b)s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for b"); %(fail)s;}
if (%(a_ind)s->descr->type_num != PyArray_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32"); %(fail)s;}
......@@ -775,9 +808,6 @@ class StructuredDotCSC(gof.Op):
if (%(a_nrows)s->descr->type_num != PyArray_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "a_nrows dtype not INT32"); %(fail)s;}
if (%(b)s->descr->type_num != PyArray_DOUBLE)
{PyErr_SetString(PyExc_NotImplementedError, "b's dtype not NPY_DOUBLE"); %(fail)s;}
if (%(a_val)s->dimensions[0] != %(a_ind)s->dimensions[0])
{PyErr_SetString(PyExc_NotImplementedError, "a_val and a_ind have different lengths"); %(fail)s;}
......@@ -793,7 +823,7 @@ class StructuredDotCSC(gof.Op):
npy_intp dims[] = {0,0};
dims[0] = ((npy_int32 *)%(a_nrows)s->data)[0];
dims[1] = %(b)s->dimensions[1];
%(z)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, %(b)s->descr->type_num);
%(z)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, %(typenum_z)s);
}
{
......@@ -812,9 +842,8 @@ class StructuredDotCSC(gof.Op):
npy_intp Sptr = %(a_ptr)s->strides[0] / %(a_ptr)s->descr->elsize;
// pointers to access actual data in the arrays passed as params.
npy_double * __restrict__ Dz = (npy_double*)%(z)s->data;
//const npy_double * __restrict__ Db = (npy_double*)%(b)s->data;
const npy_double * __restrict__ Dval = (npy_double*)%(a_val)s->data;
dtype_%(z)s* __restrict__ Dz = (dtype_%(z)s*)%(z)s->data;
const dtype_%(a_val)s* __restrict__ Dval = (dtype_%(a_val)s*)%(a_val)s->data;
const npy_int32 * __restrict__ Dind = (npy_int32*)%(a_ind)s->data;
const npy_int32 * __restrict__ Dptr = (npy_int32*)%(a_ptr)s->data;
......@@ -846,17 +875,17 @@ class StructuredDotCSC(gof.Op):
for (npy_int32 k = 0; k < K; ++k)
{
// get pointer to k-th row of dense matrix
const npy_double * __restrict__ bk = (double *)(%(b)s->data + %(b)s->strides[0] * k);
const dtype_%(b)s* __restrict__ bk = (dtype_%(b)s*)(%(b)s->data + %(b)s->strides[0] * k);
// loop over sparse column indices through index pointer array
// (amounts to looping over rows M of sparse matrix)
for (npy_int32 m_idx = Dptr[k * Sptr]; m_idx < Dptr[(k+1) * Sptr]; ++m_idx)
{
npy_int32 m = Dind[m_idx * Sind]; // row index of non-null value for column K
const double Amk = Dval[m_idx * Sval]; // actual value at that location
const dtype_%(a_val)s Amk = Dval[m_idx * Sval]; // actual value at that location
// pointer to m-th row of the output matrix Z
npy_double * __restrict__ zm = (npy_double *)(%(z)s->data + %(z)s->strides[0] * m);
dtype_%(z)s* __restrict__ zm = (dtype_%(z)s*)(%(z)s->data + %(z)s->strides[0] * m);
//RESOLVE: a.shape[0] equals z.shape[0], why is this not an equality constraint?
if (m >= %(z)s->dimensions[0])
......@@ -871,14 +900,169 @@ class StructuredDotCSC(gof.Op):
}
}
"""% dict(locals(), **sub)
# print rval
return rval
sd_csc = StructuredDotCSC()
class StructuredDotCSC1(gof.Op):
def __init__(self, avaltype):
self.avaltype = avaltype
def __eq__(self, other):
return type(self) == type(other) and self.avaltype == other.avaltype
def __hash__(self):
return hash(type(self)) ^ hash(self.avaltype)
def make_node(self, a_ind, a_ptr, a_nrows, b):
dtype_out = scalar.upcast(self.avaltype.dtype, b.type.dtype)
r = gof.Apply(self, [a_ind, a_ptr, a_nrows, b],
[tensor.tensor(dtype_out, (False, False))])
return r
def perform(self, node, (a_ind, a_ptr, a_nrows, b), (out,)):
ones = numpy.asarray(numpy.ones_like(a_ind), dtype=self.avaltype.dtype)
a = sparse.csc_matrix((ones, a_ind, a_ptr),
(a_nrows, b.shape[0]),
copy = False)
#out[0] = a.dot(b)
out[0] = a * b
assert _is_dense(out[0]) # scipy 0.7 automatically converts to dense
def c_code(self, node, name, (a_ind, a_ptr, a_nrows, b), (z,), sub):
"""
C-implementation of the dot product of the sparse matrix A and matrix B.
@param a_ind: column indices of the non-null values (.indices of a scipy.csc_matrix)
@param a_ptr: a_ptr indicates col indices for col. i are in the range a_ptr[i]:a_ptr[i+1]
@param n_rows: number of rows of sparse matrix
@param b: dense matrix to perform dot product with, as in dot(a,b)
@param z: return value
@param sub: TODO, not too sure, something to do with weave probably
"""
if node.inputs[3].type.dtype in ('complex64', 'complex128'):
raise NotImplementedError('Complex types are not supported for b')
typenum_z = node.outputs[0].type.dtype_specs()[-1] # retrieve dtype number
typenum_b = node.inputs[3].type.dtype_specs()[-1] # retrieve dtype number
rval = """
if (%(a_ind)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ind) != 1"); %(fail)s;}
if (%(a_ptr)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ptr) != 1"); %(fail)s;}
if (%(a_nrows)s->nd != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(nrows) != 0"); %(fail)s;}
if (%(b)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2"); %(fail)s;}
if (%(b)s->descr->type_num != %(typenum_b)s) {
PyErr_SetString(PyExc_NotImplementedError, "Invalid type for b"); %(fail)s;}
if (%(a_ind)s->descr->type_num != PyArray_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32"); %(fail)s;}
if (%(a_ptr)s->descr->type_num != PyArray_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32"); %(fail)s;}
if (%(a_nrows)s->descr->type_num != PyArray_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "a_nrows dtype not INT32"); %(fail)s;}
if (%(a_ptr)s->dimensions[0] != %(b)s->dimensions[0]+1)
{PyErr_SetString(PyExc_NotImplementedError, "a's number of columns doesn't match b's rows"); %(fail)s;}
if ((!%(z)s)
|| (%(z)s->dimensions[0] != ((npy_int32 *)%(a_nrows)s->data)[0])
|| (%(z)s->dimensions[1] != %(b)s->dimensions[1])
)
{
if (%(z)s) Py_DECREF(%(z)s);
npy_intp dims[] = {0,0};
dims[0] = ((npy_int32 *)%(a_nrows)s->data)[0];
dims[1] = %(b)s->dimensions[1];
%(z)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, %(typenum_z)s);
}
{
// sparse array has size MxK, dense KxN, output MxN
npy_intp M = %(z)s->dimensions[0];
npy_intp N = %(z)s->dimensions[1];
npy_intp K = %(b)s->dimensions[0];
// strides tell you how many bytes to skip to go to next column/row entry
npy_intp Szm = %(z)s->strides[0] / %(z)s->descr->elsize;
npy_intp Szn = %(z)s->strides[1] / %(z)s->descr->elsize;
//npy_intp Sbm = %(b)s->strides[0] / %(b)s->descr->elsize;
npy_intp Sbn = %(b)s->strides[1] / %(b)s->descr->elsize;
npy_intp Sind = %(a_ind)s->strides[0] / %(a_ind)s->descr->elsize;
npy_intp Sptr = %(a_ptr)s->strides[0] / %(a_ptr)s->descr->elsize;
// pointers to access actual data in the arrays passed as params.
dtype_%(z)s* __restrict__ Dz = (dtype_%(z)s*)%(z)s->data;
const npy_int32 * __restrict__ Dind = (npy_int32*)%(a_ind)s->data;
const npy_int32 * __restrict__ Dptr = (npy_int32*)%(a_ptr)s->data;
//npy_intp nnz = %(a_ind)s->dimensions[0];
//clear the output array
for (npy_intp m = 0; m < M; ++m)
{
for (npy_intp n = 0; n < N; ++n)
{
Dz[m*Szm + n*Szn] = 0;
}
}
//iterate over the sparse array, making the most of an entry wherever we find it.
//
// Normal matrix matrix multiply: A MxK, B KxN => Z = AB
// for m
// for n
// for k
// z[m,n] += a[m,k] * b[k,n]
// Here instead: Z =
// for k
// for m (sparse)
// for n
// z[m,n] += a[m,k] * b[k,n]
// loop over inner dimension
for (npy_int32 k = 0; k < K; ++k)
{
// get pointer to k-th row of dense matrix
const dtype_%(b)s* __restrict__ bk = (dtype_%(b)s*)(%(b)s->data + %(b)s->strides[0] * k);
// loop over sparse column indices through index pointer array
// (amounts to looping over rows M of sparse matrix)
for (npy_int32 m_idx = Dptr[k * Sptr]; m_idx < Dptr[(k+1) * Sptr]; ++m_idx)
{
npy_int32 m = Dind[m_idx * Sind]; // row index of non-null value for column K
// pointer to m-th row of the output matrix Z
dtype_%(z)s* __restrict__ zm = (dtype_%(z)s*)(%(z)s->data + %(z)s->strides[0] * m);
//RESOLVE: a.shape[0] equals z.shape[0], why is this not an equality constraint?
if (m >= %(z)s->dimensions[0])
{PyErr_SetString(PyExc_NotImplementedError, "illegal row index in a"); %(fail)s;}
// loop over final dimension (cols of dense matrix) and perform dot product
for(npy_int32 n = 0; n < N; ++n)
{
zm[n*Szn] += bk[n*Sbn];
}
}
}
}
"""% dict(locals(), **sub)
# print rval
return rval
class StructuredDotCSR(gof.Op):
def make_node(self, a_val, a_ind, a_ptr, b):
assert a_val.type.dtype == b.type.dtype
self.dtype_out = scalar.upcast(a_val.type.dtype, b.type.dtype)
r = gof.Apply(self, [a_val, a_ind, a_ptr, b],
[tensor.tensor(a_val.type.dtype, (False, False))])
[tensor.tensor(self.dtype_out, (False, False))])
return r
def perform(self, node, (a_val, a_ind, a_ptr, b), (out,)):
......@@ -900,37 +1084,37 @@ class StructuredDotCSR(gof.Op):
@param z: return value
@param sub: TODO, not too sure, something to do with weave probably
"""
typenum_z = tensor.TensorType(self.dtype_out, []).dtype_specs()[-1] # retrieve dtype number
if node.inputs[0].type.dtype in ('complex64', 'complex128'):
raise NotImplementedError('Complex types are not supported for a_val')
if node.inputs[3].type.dtype in ('complex64', 'complex128'):
raise NotImplementedError('Complex types are not supported for b')
return """
if (%(a_val)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_val) != 1"); %(fail)s;}
if (%(a_ind)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ind) != 1"); %(fail)s;}
if (%(a_ptr)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ptr) != 1"); %(fail)s;}
if (%(b)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2"); %(fail)s;}
if (%(a_val)s->descr->type_num != PyArray_DOUBLE)
{PyErr_SetString(PyExc_NotImplementedError, "a_val dtype not NPY_DOUBLE"); %(fail)s;}
if (%(a_ind)s->descr->type_num != PyArray_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32"); %(fail)s;}
if (%(a_ptr)s->descr->type_num != PyArray_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32"); %(fail)s;}
if (%(b)s->descr->type_num != PyArray_DOUBLE)
{PyErr_SetString(PyExc_NotImplementedError, "b's dtype not NPY_DOUBLE"); %(fail)s;}
if (%(a_val)s->dimensions[0] != %(a_ind)s->dimensions[0])
{PyErr_SetString(PyExc_NotImplementedError, "a_val and a_ind have different lengths"); %(fail)s;}
if ((!%(z)s)
|| (%(z)s->dimensions[0] != %(a_ptr)s->dimensions[0]-1) //a's rows
|| (%(z)s->dimensions[1] != %(b)s->dimensions[1]) //b's columns
|| (%(z)s->dimensions[0] != %(a_ptr)s->dimensions[0]-1) //a's rows
|| (%(z)s->dimensions[1] != %(b)s->dimensions[1]) //b's columns
)
{
if (%(z)s) Py_DECREF(%(z)s);
npy_intp dims[] = {0,0};
dims[0] = %(a_ptr)s->dimensions[0]-1;
dims[1] = %(b)s->dimensions[1];
%(z)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, %(b)s->descr->type_num);
%(z)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, %(typenum_z)s);
}
{
......@@ -949,9 +1133,7 @@ class StructuredDotCSR(gof.Op):
npy_intp Sptr = %(a_ptr)s->strides[0] / %(a_ptr)s->descr->elsize;
// pointers to access actual data in the arrays passed as params.
npy_double * __restrict__ Dz = (npy_double*)%(z)s->data;
//const npy_double * __restrict__ Db = (npy_double*)%(b)s->data;
const npy_double * __restrict__ Dval = (npy_double*)%(a_val)s->data;
dtype_%(z)s* __restrict__ Dz = (dtype_%(z)s*)%(z)s->data;
const npy_int32 * __restrict__ Dind = (npy_int32*)%(a_ind)s->data;
const npy_int32 * __restrict__ Dptr = (npy_int32*)%(a_ptr)s->data;
......@@ -979,10 +1161,10 @@ class StructuredDotCSR(gof.Op):
// z[m,n] += a[m,k] * b[k,n]
// loop over inner dimension
for (npy_int32 m = 0; m < M; ++m)
for (npy_int64 m = 0; m < M; ++m)
{
// pointer to m-th row of the output matrix Z
npy_double * __restrict__ zm = (npy_double *)(%(z)s->data + %(z)s->strides[0] * m);
dtype_%(z)s* __restrict__ zm = (dtype_%(z)s*)(%(z)s->data + %(z)s->strides[0] * m);
// loop over sparse rows indices through index pointer array
// (amounts to looping over cols k of sparse matrix)
......@@ -992,7 +1174,7 @@ class StructuredDotCSR(gof.Op):
const double Amk = Dval[k_idx * Sval]; // actual value at that location
// get pointer to k-th row of dense matrix
const npy_double * __restrict__ bk = (double *)(%(b)s->data + %(b)s->strides[0] * k);
const dtype_%(b)s* __restrict__ bk = (dtype_%(b)s*)(%(b)s->data + %(b)s->strides[0] * k);
// loop over final dimension (cols of dense matrix) and perform dot product
for(npy_int32 n = 0; n < N; ++n)
......@@ -1021,6 +1203,15 @@ def local_structured_dot(node):
return False
register_specialize(local_structured_dot)
@gof.local_optimizer([_structured_dot])
def local_structureddotcsc_w_ones(node):
if node.op == sd_csc:
aval, aind, aptr, ashp, b = node.inputs
if isinstance(aval, gof.Constant):
if numpy.all(aval.data == 1):
return [StructuredDotCSC1(aval.type)(aind, aptr, ashp, b)]
return False
register_specialize(local_structureddotcsc_w_ones)
def structured_dot_grad(sparse_A, dense_B, ga):
if sparse_A.type.format in ('csc','csr'):
......@@ -1039,7 +1230,8 @@ def structured_dot_grad(sparse_A, dense_B, ga):
class StructuredDotGradCSC(gof.Op):
def make_node(self, a_indices, a_indptr, b, g_ab):
return gof.Apply(self, [a_indices, a_indptr, b, g_ab], [tensor.tensor(b.dtype, (False,))])
return gof.Apply(self, [a_indices, a_indptr, b, g_ab],
[tensor.tensor(g_ab.dtype, (False,))])
def perform(self, node, (a_indices, a_indptr, b, g_ab), (out,)):
g_a_data = numpy.zeros(a_indices.shape, dtype=g_ab.dtype)
for j in xrange(len(a_indptr)-1):
......@@ -1050,6 +1242,12 @@ class StructuredDotGradCSC(gof.Op):
g_a_data[i_idx] = numpy.dot(g_ab[i], b[j])
out[0] = g_a_data
def c_code(self, node, name, (_indices, _indptr, _d, _g), (_zout, ), sub):
if node.inputs[2].type.dtype in ('complex64', 'complex128'):
raise NotImplementedError('Complex types are not supported for b')
if node.inputs[3].type.dtype in ('complex64', 'complex128'):
raise NotImplementedError('Complex types are not supported for g_ab')
return """
if (%(_d)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(d) != 2"); %(fail)s;}
if (%(_g)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(g) != 2"); %(fail)s;}
......@@ -1062,12 +1260,6 @@ class StructuredDotGradCSC(gof.Op):
if( %(_indptr)s->descr->type_num != PyArray_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if( %(_d)s->descr->type_num != PyArray_DOUBLE)
{PyErr_SetString(PyExc_NotImplementedError, "d's dtype not NPY_DOUBLE"); %(fail)s;}
if( %(_g)s->descr->type_num != PyArray_DOUBLE)
{PyErr_SetString(PyExc_NotImplementedError, "g's dtype not NPY_DOUBLE"); %(fail)s;}
if( %(_d)s->dimensions[1] != %(_g)s->dimensions[1])
{PyErr_SetString(PyExc_NotImplementedError, "d and g have different numbers of columns"); %(fail)s;}
......@@ -1101,7 +1293,7 @@ class StructuredDotGradCSC(gof.Op):
for (npy_int32 j = 0; j < N; ++j)
{
// extract j-th row of dense matrix
const npy_double * __restrict__ d_row = (double *)(%(_d)s->data + %(_d)s->strides[0] * j);
const dtype_%(_d)s* __restrict__ d_row = (dtype_%(_d)s*)(%(_d)s->data + %(_d)s->strides[0] * j);
if(j >= %(_d)s->dimensions[0]) {PyErr_SetString(PyExc_NotImplementedError, "G"); %(fail)s;}
// for each non-null value in the sparse column
......@@ -1111,7 +1303,7 @@ class StructuredDotGradCSC(gof.Op):
npy_int32 i = indices[i_idx * Sindices];
// extract corresponding row in gradient
const npy_double * __restrict__ g_row = (npy_double *)(%(_g)s->data + %(_g)s->strides[0] * i);
const dtype_%(_g)s* __restrict__ g_row = (dtype_%(_g)s*)(%(_g)s->data + %(_g)s->strides[0] * i);
double ip = 0.0;
// make sure that row index is not bigger than actual number of rows
......@@ -1127,7 +1319,7 @@ class StructuredDotGradCSC(gof.Op):
}
// write resulting gradient to sparse output
((double * __restrict__)(%(_zout)s->data + i_idx * %(_zout)s->strides[0]))[0] = ip;
((dtype_%(_zout)s* __restrict__)(%(_zout)s->data + i_idx * %(_zout)s->strides[0]))[0] = ip;
}
}
}
......@@ -1137,8 +1329,10 @@ sdg_csc = StructuredDotGradCSC()
class StructuredDotGradCSR(gof.Op):
def make_node(self, a_indices, a_indptr, b, g_ab):
return gof.Apply(self, [a_indices, a_indptr, b, g_ab], [tensor.tensor(b.dtype, (False,))])
def perform(self, node, (a_indices, a_indptr, b, g_ab), (out,)):
g_a_data = numpy.zeros(a_indices.shape, dtype=g_ab.dtype)
for i in xrange(len(a_indptr)-1): # loop over rows
......@@ -1149,7 +1343,14 @@ class StructuredDotGradCSR(gof.Op):
# grad is dot product of i-th row of gradient with j-th row of b
g_a_data[j_idx] = numpy.dot(g_ab[i], b[j])
out[0] = g_a_data
def c_code(self, node, name, (_indices, _indptr, _d, _g), (_zout, ), sub):
if node.inputs[2].type.dtype in ('complex64', 'complex128'):
raise NotImplementedError('Complex types are not supported for b')
if node.inputs[3].type.dtype in ('complex64', 'complex128'):
raise NotImplementedError('Complex types are not supported for g_ab')
return """
if (%(_d)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(d) != 2"); %(fail)s;}
if (%(_g)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(g) != 2"); %(fail)s;}
......@@ -1162,12 +1363,6 @@ class StructuredDotGradCSR(gof.Op):
if( %(_indptr)s->descr->type_num != PyArray_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if( %(_d)s->descr->type_num != PyArray_DOUBLE)
{PyErr_SetString(PyExc_NotImplementedError, "d's dtype not NPY_DOUBLE"); %(fail)s;}
if( %(_g)s->descr->type_num != PyArray_DOUBLE)
{PyErr_SetString(PyExc_NotImplementedError, "g's dtype not NPY_DOUBLE"); %(fail)s;}
if( %(_d)s->dimensions[1] != %(_g)s->dimensions[1])
{PyErr_SetString(PyExc_NotImplementedError, "d and g have different numbers of columns"); %(fail)s;}
......@@ -1208,11 +1403,11 @@ class StructuredDotGradCSR(gof.Op):
npy_int32 j = indices[j_idx * Sindices];
// extract j-th row of dense matrix
const npy_double * __restrict__ d_row = (double *)(%(_d)s->data + %(_d)s->strides[0] * j);
const dtype_%(_d)s* __restrict__ d_row = (dtype_%(_d)s*)(%(_d)s->data + %(_d)s->strides[0] * j);
if(j >= %(_d)s->dimensions[0]) {PyErr_SetString(PyExc_NotImplementedError, "G"); %(fail)s;}
// extract corresponding row in gradient
const npy_double * __restrict__ g_row = (npy_double *)(%(_g)s->data + %(_g)s->strides[0] * i);
const dtype_%(_g)s* __restrict__ g_row = (dtype_%(_g)s*)(%(_g)s->data + %(_g)s->strides[0] * i);
double ip = 0.0;
// make sure that row index is not bigger than actual number of rows
......@@ -1228,7 +1423,7 @@ class StructuredDotGradCSR(gof.Op):
}
// write resulting gradient to sparse output
((double * __restrict__)(%(_zout)s->data + j_idx * %(_zout)s->strides[0]))[0] = ip;
((dtype_%(_zout)s* __restrict__)(%(_zout)s->data + j_idx * %(_zout)s->strides[0]))[0] = ip;
}
}
}
......
......@@ -161,62 +161,154 @@ class test_structureddot(unittest.TestCase):
def test_structuredot(self):
bsize = 2
typenames = 'int8', 'int32', 'int16', 'int64', 'float32', 'float64', 'complex64', 'complex128'
# iterate 10 times just to make sure (cannot get this wrong !)
for i in range(10):
spmat = sp.lil_matrix((4,6))
for i in range(5):
x = numpy.floor(numpy.random.rand()*spmat.shape[0])
y = numpy.floor(numpy.random.rand()*spmat.shape[1])
spmat[x,y] = numpy.random.rand()*10
spmat = sp.csc_matrix(spmat)
kerns = tensor.dvector('kerns')
images = tensor.dmatrix('images')
##
# Test compressed-sparse column matrices ###
##
# build symbolic theano graph
def buildgraphCSC(kerns,images):
csc = CSC(kerns, spmat.indices[:spmat.size], spmat.indptr, spmat.shape)
return structured_dot(csc, images.T)
out = buildgraphCSC(kerns,images)
f = theano.function([kerns,images], out)
# compute theano outputs
kernvals = spmat.data[:spmat.size]
imvals = 1.0 * numpy.arange(bsize*spmat.shape[1]).reshape(bsize,spmat.shape[1])
outvals = f(kernvals,imvals)
# compare to scipy
c = spmat * (imvals.T)
assert _is_dense(c)
assert numpy.all(outvals == c)
utt.verify_grad(buildgraphCSC, [kernvals,imvals])
##
# Test compressed-sparse row matrices ###
##
spmat = spmat.tocsr()
# build theano graph
def buildgraphCSR(kerns,images):
csr = CSR(kerns, spmat.indices[:spmat.size], spmat.indptr, spmat.shape)
return structured_dot(csr, images.T)
out = buildgraphCSR(kerns,images)
f = theano.function([kerns,images], out)
# compute theano output
kernvals = spmat.data[:spmat.size]
imvals = 1.0 * numpy.arange(bsize*spmat.shape[1]).reshape(bsize,spmat.shape[1])
outvals = f(kernvals,imvals)
# compare to scipy
c = spmat * (imvals.T)
assert _is_dense(c)
assert numpy.all(outvals == c)
for sparse_dtype in typenames:
for dense_dtype in typenames:
output_dtype = theano.scalar.upcast(sparse_dtype, dense_dtype)
#print 'output_dtype = ', output_dtype
#print '** sparse_dtype = ', sparse_dtype
#print '** dense_dtype = ', dense_dtype
# iterate for a few different random graph patterns
for i in range(10):
spmat = sp.csc_matrix((4,6), dtype=sparse_dtype)
for i in range(5):
# set non-zeros in random locations (row x, col y)
x = numpy.floor(numpy.random.rand()*spmat.shape[0])
y = numpy.floor(numpy.random.rand()*spmat.shape[1])
spmat[x,y] = numpy.random.rand()*10
spmat = sp.csc_matrix(spmat)
kerns = tensor.Tensor(sparse_dtype, broadcastable=[False])('kerns')
images = tensor.Tensor(dense_dtype, broadcastable=[False, False])('images')
#print 'kerns.dtype = ', kerns.dtype
#print 'images.dtype = ', images.dtype
##
# Test compressed-sparse column matrices ###
##
# build symbolic theano graph
def buildgraphCSC(kerns,images):
csc = CSC(kerns, spmat.indices[:spmat.size], spmat.indptr, spmat.shape)
assert csc.type.dtype == sparse_dtype
return structured_dot(csc, images.T)
out = buildgraphCSC(kerns,images)
f = theano.function([kerns,images], out)
# compute theano outputs
#print 'spmat.data', spmat.data.dtype.num
kernvals = numpy.array(spmat.data[:spmat.size])
#print 'kdtype', kernvals.dtype, kernvals.shape, kernvals.ndim, kernvals.dtype.num
#print 'type of kernvals = ', kernvals.dtype
imvals = 1.0 * numpy.array(numpy.arange(bsize*spmat.shape[1]).\
reshape(bsize,spmat.shape[1]), dtype=dense_dtype)
outvals = f(kernvals,imvals)
# compare to scipy
c = spmat * (imvals.T)
assert _is_dense(c)
assert str(outvals.dtype) == output_dtype
assert numpy.all(numpy.abs(outvals -
numpy.array(c, dtype=output_dtype)) < 1e-4)
#if sparse_dtype.startswith('float') and dense_dtype.startswith('float'):
#utt.verify_grad(buildgraphCSC, [kernvals,imvals])
def notest(self):
##
# Test compressed-sparse row matrices ###
##
spmat = spmat.tocsr()
# build theano graph
def buildgraphCSR(kerns,images):
csr = CSR(kerns, spmat.indices[:spmat.size], spmat.indptr, spmat.shape)
assert csr.type.dtype == sparse_dtype
return structured_dot(csr, images.T)
out = buildgraphCSR(kerns,images)
f = theano.function([kerns,images], out)
# compute theano output
kernvals = spmat.data[:spmat.size]
imvals = 1.0 * numpy.arange(bsize*spmat.shape[1]).reshape(bsize,spmat.shape[1])
outvals = f(kernvals,imvals)
# compare to scipy
c = spmat * (imvals.T)
assert _is_dense(c)
assert str(outvals.dtype) == output_dtype
if not numpy.all(numpy.abs(outvals - numpy.array(c, dtype=output_dtype)) < 1e-5):
print numpy.abs(outvals - numpy.array(c, dtype=output_dtype))
assert numpy.all(numpy.abs(outvals -
numpy.array(c, dtype=output_dtype)) < 1e-4)
# we could test more, but hopefully this suffices?
if sparse_dtype.startswith('float') and dense_dtype.startswith('float'):
utt.verify_grad( buildgraphCSR, [kernvals,imvals])
def test_opt_unpack(self):
kerns = tensor.Tensor(dtype='int64', broadcastable=[False])('kerns')
spmat = sp.csc_matrix((4,6), dtype='int64')
for i in range(5):
# set non-zeros in random locations (row x, col y)
x = numpy.floor(numpy.random.rand()*spmat.shape[0])
y = numpy.floor(numpy.random.rand()*spmat.shape[1])
spmat[x,y] = numpy.random.rand()*10
spmat = sp.csc_matrix(spmat)
images = tensor.Tensor(dtype='float32', broadcastable=[False, False])('images')
cscmat = CSC(kerns, spmat.indices[:spmat.size], spmat.indptr, spmat.shape)
f = theano.function([kerns, images], structured_dot(cscmat, images.T))
sdcscpresent = False
for node in f.maker.env.toposort():
print node.op
assert not isinstance(node.op, CSM)
assert not isinstance(node.op, CSMProperties)
if isinstance(f.maker.env.toposort()[1].op, StructuredDotCSC):
sdcscpresent = True
assert sdcscpresent
kernvals = numpy.array(spmat.data[:spmat.size])
#print 'kdtype', kernvals.dtype, kernvals.shape, kernvals.ndim, kernvals.dtype.num
#print 'type of kernvals = ', kernvals.dtype
bsize = 3
imvals = 1.0 * numpy.array(numpy.arange(bsize*spmat.shape[1]).\
reshape(bsize,spmat.shape[1]), dtype='float32')
outvals = f(kernvals,imvals)
print outvals
def test_opt_ones(self):
spmat = sp.csc_matrix((4,6), dtype='int64')
for i in range(5):
# set 1s in random locations (row x, col y)
x = numpy.floor(numpy.random.rand()*spmat.shape[0])
y = numpy.floor(numpy.random.rand()*spmat.shape[1])
spmat[x,y] = 1
spmat = sp.csc_matrix(spmat)
images = tensor.Tensor(dtype='float32', broadcastable=[False, False])('images')
f = theano.function([images], structured_dot(spmat, images.T))
sdones_present = False
for i, node in enumerate(f.maker.env.toposort()):
print ' ', i, node.op
if isinstance(node.op, StructuredDotCSC1):
sdones_present = True
assert sdones_present
#print 'kdtype', kernvals.dtype, kernvals.shape, kernvals.ndim, kernvals.dtype.num
#print 'type of kernvals = ', kernvals.dtype
bsize = 3
imvals = 1.0 * numpy.array(numpy.arange(bsize*spmat.shape[1]).\
reshape(bsize,spmat.shape[1]), dtype='float32')
outvals = f(imvals)
print outvals
if __name__ == '__main__':
unittest.main()
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论