Mostly gradient for CSR matrix (both for py and c linker), along with update

test file. + misc. tweaks
上级 b594140c
......@@ -294,7 +294,10 @@ class CSM(gof.Op):
def perform(self, node, (data, indices, indptr, shape), (out,)):
"""Build a csc_matrix"""
#assert len(data.flatten()) == len(indices.flatten())
data = data[self.map] if self.map!=None else data
# for efficiency, if remap does nothing, then do not apply it
if map is not None and all(map==numpy.arange(numpy.size(map))):
data = data[self.map]
if len(shape) != 2:
raise ValueError('Shape should be an array of length 2')
......@@ -706,6 +709,7 @@ class StructuredDot(gof.Op):
#gb = a.T x g_out
return structured_dot_grad(a, b, g_out), structured_dot(a.T,g_out)
_structured_dot = StructuredDot()
def structured_dot(x, y):
"""
@todo: Maybe the triple-transposition formulation (when x is dense)
......@@ -880,7 +884,7 @@ class StructuredDotGrad(gof.Op):
raise TypeError()
_structured_dot_grad = StructuredDotGrad()
class StructureDotGradCSC(gof.Op):
class StructuredDotGradCSC(gof.Op):
def make_node(self, a_indices, a_indptr, b, g_ab):
return gof.Apply(self, [a_indices, a_indptr, b, g_ab], [tensor.tensor(b.dtype, (False,))])
def perform(self, node, (a_indices, a_indptr, b, g_ab), (out,)):
......@@ -940,31 +944,145 @@ class StructureDotGradCSC(gof.Op):
const npy_int32 * __restrict__ indptr = (npy_int32 *)%(_indptr)s->data;
const npy_int32 * __restrict__ indices = (npy_int32 *)%(_indices)s->data;
// loop over columns
for (npy_int32 j = 0; j < N; ++j)
{
// extract j-th row of dense matrix
const npy_double * __restrict__ d_row = (double *)(%(_d)s->data + %(_d)s->strides[0] * j);
if(j >= %(_d)s->dimensions[0]) {PyErr_SetString(PyExc_NotImplementedError, "G"); %(fail)s;}
// for each non-null value in the sparse column
for (npy_int32 i_idx = indptr[j * Sindptr]; i_idx < indptr[(j+1) * Sindptr]; ++i_idx)
{
// extract row index of non-null value
npy_int32 i = indices[i_idx * Sindices];
// extract corresponding row in gradient
const npy_double * __restrict__ g_row = (npy_double *)(%(_g)s->data + %(_g)s->strides[0] * i);
double ip = 0.0;
// make sure that row index is not bigger than actual number of rows
// Note: wouldn't the above operation fail if that were the case ?
// when would this ever be true anyway ?
if (i >= %(_g)s->dimensions[0])
{PyErr_SetString(PyExc_NotImplementedError, "H"); %(fail)s;}
// perform dot product of dense and sparse rows
for(int k = 0; k < K; ++k)
{
ip += d_row[k * Sd1] * g_row[k*Sg1];
}
// write resulting gradient to sparse output
((double * __restrict__)(%(_zout)s->data + i_idx * %(_zout)s->strides[0]))[0] = ip;
}
}
}
"""% dict(locals(), **sub)
_sdgcsc = StructureDotGradCSC()
_sdgcsc = StructuredDotGradCSC()
class StructuredDotGradCSR(gof.Op):
def make_node(self, a_indices, a_indptr, b, g_ab):
return gof.Apply(self, [a_indices, a_indptr, b, g_ab], [tensor.tensor(b.dtype, (False,))])
def perform(self, node, (a_indices, a_indptr, b, g_ab), (out,)):
g_a_data = numpy.zeros(a_indices.shape, dtype=g_ab.dtype)
for i in xrange(len(a_indptr)-1): # loop over rows
ind0 = a_indptr[i]
ind1 = a_indptr[i+1]
for j_idx in xrange(ind0, ind1): # loop over values in that row (columns)
j = a_indices[j_idx]
# grad is dot product of i-th row of gradient with j-th row of b
g_a_data[j_idx] = numpy.dot(g_ab[i], b[j])
out[0] = g_a_data
def c_code(self, node, name, (_indices, _indptr, _d, _g), (_zout, ), sub):
return """
if (%(_d)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(d) != 2"); %(fail)s;}
if (%(_g)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(g) != 2"); %(fail)s;}
if (%(_indices)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1"); %(fail)s;}
if (%(_indptr)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1"); %(fail)s;}
if( %(_indices)s->descr->type_num != PyArray_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;}
if( %(_indptr)s->descr->type_num != PyArray_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if( %(_d)s->descr->type_num != PyArray_DOUBLE)
{PyErr_SetString(PyExc_NotImplementedError, "d's dtype not NPY_DOUBLE"); %(fail)s;}
if( %(_g)s->descr->type_num != PyArray_DOUBLE)
{PyErr_SetString(PyExc_NotImplementedError, "g's dtype not NPY_DOUBLE"); %(fail)s;}
if( %(_d)s->dimensions[1] != %(_g)s->dimensions[1])
{PyErr_SetString(PyExc_NotImplementedError, "d and g have different numbers of columns"); %(fail)s;}
if (!%(_zout)s)
{
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, %(_indices)s->dimensions, %(_g)s->descr->type_num);
}
if (%(_zout)s->dimensions[0] != %(_indices)s->dimensions[0])
{
PyErr_SetString(PyExc_NotImplementedError, "somehow _zout got the wrong size.. and I don't know how to resize it.");
%(fail)s;
}
{ //makes it compile even though labels jump over variable definitions.
npy_intp nnz = %(_indices)s->dimensions[0];
// extract number of rows
npy_intp N = %(_indptr)s->dimensions[0]-1; //TODO: error checking with this
npy_intp Sindices = %(_indices)s->strides[0]/%(_indices)s->descr->elsize;
npy_intp Sindptr = %(_indptr)s->strides[0]/%(_indptr)s->descr->elsize;
const npy_intp Sd1 = %(_d)s->strides[1]/%(_d)s->descr->elsize;
const npy_intp Sg1 = %(_g)s->strides[1]/%(_g)s->descr->elsize;
const npy_intp K = %(_d)s->dimensions[1];
const npy_int32 * __restrict__ indptr = (npy_int32 *)%(_indptr)s->data;
const npy_int32 * __restrict__ indices = (npy_int32 *)%(_indices)s->data;
// loop over rows
for (npy_int32 i = 0; i < N; ++i)
{
// for each non-null value in the sparse row
for (npy_int32 j_idx = indptr[i * Sindptr]; j_idx < indptr[(i+1) * Sindptr]; ++j_idx)
{
// extract column index of non-null value
npy_int32 j = indices[j_idx * Sindices];
// extract j-th row of dense matrix
const npy_double * __restrict__ d_row = (double *)(%(_d)s->data + %(_d)s->strides[0] * j);
if(j >= %(_d)s->dimensions[0]) {PyErr_SetString(PyExc_NotImplementedError, "G"); %(fail)s;}
// extract corresponding row in gradient
const npy_double * __restrict__ g_row = (npy_double *)(%(_g)s->data + %(_g)s->strides[0] * i);
double ip = 0.0;
// make sure that row index is not bigger than actual number of rows
// Note: wouldn't the above operation fail if that were the case ?
// when would this ever be true anyway ?
if (i >= %(_g)s->dimensions[0])
{PyErr_SetString(PyExc_NotImplementedError, "H"); %(fail)s;}
// perform dot product of dense and sparse rows
for(int k = 0; k < K; ++k)
{
ip += d_row[k * Sd1] * g_row[k*Sg1];
}
// write resulting gradient to sparse output
((double * __restrict__)(%(_zout)s->data + j_idx * %(_zout)s->strides[0]))[0] = ip;
}
}
}
"""% dict(locals(), **sub)
_sdgcsr = StructuredDotGradCSR()
def structured_dot_grad(sparse_A, dense_B, ga):
#TODO: 1. move this switch to be a specialization of structuredDotGrad
......@@ -972,10 +1090,14 @@ def structured_dot_grad(sparse_A, dense_B, ga):
if 0:
return _structured_dot_grad(sparse_A, dense_B, ga)
else:
if sparse_A.type.format == 'csc':
g_A_data = _sdgcsc(csm_indices(sparse_A),\
if sparse_A.type.format in ('csc','csr'):
sdgcsx = _sdgcsc if sparse_A.type.format == 'csc' else _sdgcsr
CSx = CSC if sparse_A.type.format == 'csc' else CSR
g_A_data = sdgcsx(csm_indices(sparse_A),\
csm_indptr(sparse_A), dense_B, ga)
return CSC(g_A_data, csm_indices(sparse_A),\
return CSx(g_A_data, csm_indices(sparse_A),\
csm_indptr(sparse_A),\
csm_shape(sparse_A))
else:
......
from theano.sparse import *
import random
import unittest
import theano
from theano import compile
from theano import gradient
from theano import gof
from theano.sparse.basic import _is_dense, _is_sparse, _is_dense_result, _is_sparse_result
from theano.sparse.basic import _mtypes, _mtype_to_str
import random
from theano import gof
def eval_outputs(outputs):
return compile.function([], outputs)()[0]
......@@ -228,7 +230,7 @@ class test_true_dot(unittest.TestCase):
x.data = x.data.T
y.data = y.data.T
# zop = true_dot(y, x)
zop = true_dot(y, x)
zop = transpose(true_dot(y, x))
self.failUnless(_is_sparse_result(zop))
z = eval_outputs([zop])
......@@ -304,5 +306,59 @@ class test_true_dot(unittest.TestCase):
self.failUnless(origloss > loss)
import scipy.sparse as sp
class test_structureddot(unittest.TestCase):
def test_structuredot(self):
#bsize = 5
#spmat = sp.csc_matrix((8,15))
#spmat[1,2] = 3
#spmat[4,7] = 6
#spmat[2,7] = 72
#spmat[1,9] = 2
#spmat[7,12] = 1
#spmat[4,2] = 7
bsize = 2
spmat = sp.csc_matrix((5,5))
spmat[1,2] = 1
spmat[0,1] = 2
spmat[0,2] = 3
kerns = tensor.dvector()
images = tensor.dmatrix()
def buildgraphCSC(kerns,images):
csc = CSC(kerns, spmat.indices[:spmat.size], spmat.indptr, spmat.shape)
return structured_dot(csc, images.T)
out = buildgraphCSC(kerns,images)
for mode in 'FAST_COMPILE','FAST_RUN':
f = theano.function([kerns,images], out, mode=mode)
kernvals = spmat.data[:spmat.size]
imvals = 1.0 * numpy.arange(bsize*spmat.shape[1]).reshape(bsize,spmat.shape[1])
outvals = f(kernvals,imvals)
assert numpy.all(outvals == spmat.dot(imvals.T).todense())
tensor.verify_grad(None, buildgraphCSC, [kernvals,imvals], mode=mode)
spmat = spmat.tocsr()
def buildgraphCSR(kerns,images):
csr = CSR(kerns, spmat.indices[:spmat.size], spmat.indptr, spmat.shape)
return structured_dot(csr, images.T)
out = buildgraphCSR(kerns,images)
for mode in 'FAST_COMPILE','FAST_RUN':
f = theano.function([kerns,images], out, mode=mode)
kernvals = spmat.data[:spmat.size]
imvals = 1.0 * numpy.arange(bsize*spmat.shape[1]).reshape(bsize,spmat.shape[1])
outvals = f(kernvals,imvals)
assert numpy.all(outvals == spmat.dot(imvals.T).todense())
tensor.verify_grad(None, buildgraphCSR, [kernvals,imvals], mode=mode)
if __name__ == '__main__':
unittest.main()
......@@ -543,11 +543,11 @@ class GemmLocalOptimizer(LocalOptimizer):
# TODO: This could be an equilibriumOptmizer, but I don't know how to combine an OpKeyOptimizer and
# an EquilibriumOptimizer.
compile.optdb.register('inplace_gemm_0', OpKeyOptimizer(GemmLocalOptimizer(),
failure_callback=GemmLocalOptimizer.failure_callback), 70.00, 'fast_run', 'inplace')
failure_callback=GemmLocalOptimizer.failure_callback), 70.00, 'fast_run', 'inplace', 'gemm')
compile.optdb.register('inplace_gemm_1', OpKeyOptimizer(GemmLocalOptimizer(),
failure_callback=GemmLocalOptimizer.failure_callback), 70.01, 'fast_run', 'inplace')
failure_callback=GemmLocalOptimizer.failure_callback), 70.01, 'fast_run', 'inplace', 'gemm')
compile.optdb.register('inplace_gemm_2', OpKeyOptimizer(GemmLocalOptimizer(),
failure_callback=GemmLocalOptimizer.failure_callback), 70.02, 'fast_run', 'inplace')
failure_callback=GemmLocalOptimizer.failure_callback), 70.02, 'fast_run', 'inplace', 'gemm')
class Dot22(GemmRelated):
"""Compute a matrix-matrix product.
......
......@@ -588,6 +588,7 @@ def mul_calculate(num, denum, aslist = False):
return v
local_mul_canonizer = Canonizer(T.mul, T.div, T.inv, mul_calculate, False)
register_canonicalize(local_mul_canonizer, name = 'local_mul_canonizer')
@gof.local_optimizer([T.neg])
def local_neg_to_mul(node):
......@@ -693,7 +694,6 @@ def local_mul_specialize(node):
return False
register_specialize(local_mul_specialize)
register_canonicalize(local_mul_canonizer, name = 'local_mul_canonizer')
# neg_to_mul = out2in(gof.LocalOptGroup(local_neg_to_mul))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论