Mostly gradient for CSR matrix (both for py and c linker), along with update

test file. + misc. tweaks
上级 b594140c
...@@ -294,7 +294,10 @@ class CSM(gof.Op): ...@@ -294,7 +294,10 @@ class CSM(gof.Op):
def perform(self, node, (data, indices, indptr, shape), (out,)): def perform(self, node, (data, indices, indptr, shape), (out,)):
"""Build a csc_matrix""" """Build a csc_matrix"""
#assert len(data.flatten()) == len(indices.flatten()) #assert len(data.flatten()) == len(indices.flatten())
data = data[self.map] if self.map!=None else data
# for efficiency, if remap does nothing, then do not apply it
if map is not None and all(map==numpy.arange(numpy.size(map))):
data = data[self.map]
if len(shape) != 2: if len(shape) != 2:
raise ValueError('Shape should be an array of length 2') raise ValueError('Shape should be an array of length 2')
...@@ -706,6 +709,7 @@ class StructuredDot(gof.Op): ...@@ -706,6 +709,7 @@ class StructuredDot(gof.Op):
#gb = a.T x g_out #gb = a.T x g_out
return structured_dot_grad(a, b, g_out), structured_dot(a.T,g_out) return structured_dot_grad(a, b, g_out), structured_dot(a.T,g_out)
_structured_dot = StructuredDot() _structured_dot = StructuredDot()
def structured_dot(x, y): def structured_dot(x, y):
""" """
@todo: Maybe the triple-transposition formulation (when x is dense) @todo: Maybe the triple-transposition formulation (when x is dense)
...@@ -880,7 +884,7 @@ class StructuredDotGrad(gof.Op): ...@@ -880,7 +884,7 @@ class StructuredDotGrad(gof.Op):
raise TypeError() raise TypeError()
_structured_dot_grad = StructuredDotGrad() _structured_dot_grad = StructuredDotGrad()
class StructureDotGradCSC(gof.Op): class StructuredDotGradCSC(gof.Op):
def make_node(self, a_indices, a_indptr, b, g_ab): def make_node(self, a_indices, a_indptr, b, g_ab):
return gof.Apply(self, [a_indices, a_indptr, b, g_ab], [tensor.tensor(b.dtype, (False,))]) return gof.Apply(self, [a_indices, a_indptr, b, g_ab], [tensor.tensor(b.dtype, (False,))])
def perform(self, node, (a_indices, a_indptr, b, g_ab), (out,)): def perform(self, node, (a_indices, a_indptr, b, g_ab), (out,)):
...@@ -940,31 +944,145 @@ class StructureDotGradCSC(gof.Op): ...@@ -940,31 +944,145 @@ class StructureDotGradCSC(gof.Op):
const npy_int32 * __restrict__ indptr = (npy_int32 *)%(_indptr)s->data; const npy_int32 * __restrict__ indptr = (npy_int32 *)%(_indptr)s->data;
const npy_int32 * __restrict__ indices = (npy_int32 *)%(_indices)s->data; const npy_int32 * __restrict__ indices = (npy_int32 *)%(_indices)s->data;
// loop over columns
for (npy_int32 j = 0; j < N; ++j) for (npy_int32 j = 0; j < N; ++j)
{ {
// extract j-th row of dense matrix
const npy_double * __restrict__ d_row = (double *)(%(_d)s->data + %(_d)s->strides[0] * j); const npy_double * __restrict__ d_row = (double *)(%(_d)s->data + %(_d)s->strides[0] * j);
if(j >= %(_d)s->dimensions[0]) {PyErr_SetString(PyExc_NotImplementedError, "G"); %(fail)s;} if(j >= %(_d)s->dimensions[0]) {PyErr_SetString(PyExc_NotImplementedError, "G"); %(fail)s;}
// for each non-null value in the sparse column
for (npy_int32 i_idx = indptr[j * Sindptr]; i_idx < indptr[(j+1) * Sindptr]; ++i_idx) for (npy_int32 i_idx = indptr[j * Sindptr]; i_idx < indptr[(j+1) * Sindptr]; ++i_idx)
{ {
// extract row index of non-null value
npy_int32 i = indices[i_idx * Sindices]; npy_int32 i = indices[i_idx * Sindices];
// extract corresponding row in gradient
const npy_double * __restrict__ g_row = (npy_double *)(%(_g)s->data + %(_g)s->strides[0] * i); const npy_double * __restrict__ g_row = (npy_double *)(%(_g)s->data + %(_g)s->strides[0] * i);
double ip = 0.0; double ip = 0.0;
// make sure that row index is not bigger than actual number of rows
// Note: wouldn't the above operation fail if that were the case ?
// when would this ever be true anyway ?
if (i >= %(_g)s->dimensions[0]) if (i >= %(_g)s->dimensions[0])
{PyErr_SetString(PyExc_NotImplementedError, "H"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "H"); %(fail)s;}
// perform dot product of dense and sparse rows
for(int k = 0; k < K; ++k) for(int k = 0; k < K; ++k)
{ {
ip += d_row[k * Sd1] * g_row[k*Sg1]; ip += d_row[k * Sd1] * g_row[k*Sg1];
} }
// write resulting gradient to sparse output
((double * __restrict__)(%(_zout)s->data + i_idx * %(_zout)s->strides[0]))[0] = ip; ((double * __restrict__)(%(_zout)s->data + i_idx * %(_zout)s->strides[0]))[0] = ip;
} }
} }
} }
"""% dict(locals(), **sub) """% dict(locals(), **sub)
_sdgcsc = StructureDotGradCSC() _sdgcsc = StructuredDotGradCSC()
class StructuredDotGradCSR(gof.Op):
def make_node(self, a_indices, a_indptr, b, g_ab):
return gof.Apply(self, [a_indices, a_indptr, b, g_ab], [tensor.tensor(b.dtype, (False,))])
def perform(self, node, (a_indices, a_indptr, b, g_ab), (out,)):
g_a_data = numpy.zeros(a_indices.shape, dtype=g_ab.dtype)
for i in xrange(len(a_indptr)-1): # loop over rows
ind0 = a_indptr[i]
ind1 = a_indptr[i+1]
for j_idx in xrange(ind0, ind1): # loop over values in that row (columns)
j = a_indices[j_idx]
# grad is dot product of i-th row of gradient with j-th row of b
g_a_data[j_idx] = numpy.dot(g_ab[i], b[j])
out[0] = g_a_data
def c_code(self, node, name, (_indices, _indptr, _d, _g), (_zout, ), sub):
return """
if (%(_d)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(d) != 2"); %(fail)s;}
if (%(_g)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(g) != 2"); %(fail)s;}
if (%(_indices)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1"); %(fail)s;}
if (%(_indptr)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1"); %(fail)s;}
if( %(_indices)s->descr->type_num != PyArray_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;}
if( %(_indptr)s->descr->type_num != PyArray_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if( %(_d)s->descr->type_num != PyArray_DOUBLE)
{PyErr_SetString(PyExc_NotImplementedError, "d's dtype not NPY_DOUBLE"); %(fail)s;}
if( %(_g)s->descr->type_num != PyArray_DOUBLE)
{PyErr_SetString(PyExc_NotImplementedError, "g's dtype not NPY_DOUBLE"); %(fail)s;}
if( %(_d)s->dimensions[1] != %(_g)s->dimensions[1])
{PyErr_SetString(PyExc_NotImplementedError, "d and g have different numbers of columns"); %(fail)s;}
if (!%(_zout)s)
{
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, %(_indices)s->dimensions, %(_g)s->descr->type_num);
}
if (%(_zout)s->dimensions[0] != %(_indices)s->dimensions[0])
{
PyErr_SetString(PyExc_NotImplementedError, "somehow _zout got the wrong size.. and I don't know how to resize it.");
%(fail)s;
}
{ //makes it compile even though labels jump over variable definitions.
npy_intp nnz = %(_indices)s->dimensions[0];
// extract number of rows
npy_intp N = %(_indptr)s->dimensions[0]-1; //TODO: error checking with this
npy_intp Sindices = %(_indices)s->strides[0]/%(_indices)s->descr->elsize;
npy_intp Sindptr = %(_indptr)s->strides[0]/%(_indptr)s->descr->elsize;
const npy_intp Sd1 = %(_d)s->strides[1]/%(_d)s->descr->elsize;
const npy_intp Sg1 = %(_g)s->strides[1]/%(_g)s->descr->elsize;
const npy_intp K = %(_d)s->dimensions[1];
const npy_int32 * __restrict__ indptr = (npy_int32 *)%(_indptr)s->data;
const npy_int32 * __restrict__ indices = (npy_int32 *)%(_indices)s->data;
// loop over rows
for (npy_int32 i = 0; i < N; ++i)
{
// for each non-null value in the sparse row
for (npy_int32 j_idx = indptr[i * Sindptr]; j_idx < indptr[(i+1) * Sindptr]; ++j_idx)
{
// extract column index of non-null value
npy_int32 j = indices[j_idx * Sindices];
// extract j-th row of dense matrix
const npy_double * __restrict__ d_row = (double *)(%(_d)s->data + %(_d)s->strides[0] * j);
if(j >= %(_d)s->dimensions[0]) {PyErr_SetString(PyExc_NotImplementedError, "G"); %(fail)s;}
// extract corresponding row in gradient
const npy_double * __restrict__ g_row = (npy_double *)(%(_g)s->data + %(_g)s->strides[0] * i);
double ip = 0.0;
// make sure that row index is not bigger than actual number of rows
// Note: wouldn't the above operation fail if that were the case ?
// when would this ever be true anyway ?
if (i >= %(_g)s->dimensions[0])
{PyErr_SetString(PyExc_NotImplementedError, "H"); %(fail)s;}
// perform dot product of dense and sparse rows
for(int k = 0; k < K; ++k)
{
ip += d_row[k * Sd1] * g_row[k*Sg1];
}
// write resulting gradient to sparse output
((double * __restrict__)(%(_zout)s->data + j_idx * %(_zout)s->strides[0]))[0] = ip;
}
}
}
"""% dict(locals(), **sub)
_sdgcsr = StructuredDotGradCSR()
def structured_dot_grad(sparse_A, dense_B, ga): def structured_dot_grad(sparse_A, dense_B, ga):
#TODO: 1. move this switch to be a specialization of structuredDotGrad #TODO: 1. move this switch to be a specialization of structuredDotGrad
...@@ -972,10 +1090,14 @@ def structured_dot_grad(sparse_A, dense_B, ga): ...@@ -972,10 +1090,14 @@ def structured_dot_grad(sparse_A, dense_B, ga):
if 0: if 0:
return _structured_dot_grad(sparse_A, dense_B, ga) return _structured_dot_grad(sparse_A, dense_B, ga)
else: else:
if sparse_A.type.format == 'csc': if sparse_A.type.format in ('csc','csr'):
g_A_data = _sdgcsc(csm_indices(sparse_A),\
csm_indptr(sparse_A), dense_B, ga) sdgcsx = _sdgcsc if sparse_A.type.format == 'csc' else _sdgcsr
return CSC(g_A_data, csm_indices(sparse_A),\ CSx = CSC if sparse_A.type.format == 'csc' else CSR
g_A_data = sdgcsx(csm_indices(sparse_A),\
csm_indptr(sparse_A), dense_B, ga)
return CSx(g_A_data, csm_indices(sparse_A),\
csm_indptr(sparse_A),\ csm_indptr(sparse_A),\
csm_shape(sparse_A)) csm_shape(sparse_A))
else: else:
......
from theano.sparse import * from theano.sparse import *
import random
import unittest import unittest
import theano
from theano import compile from theano import compile
from theano import gradient from theano import gradient
from theano import gof
from theano.sparse.basic import _is_dense, _is_sparse, _is_dense_result, _is_sparse_result from theano.sparse.basic import _is_dense, _is_sparse, _is_dense_result, _is_sparse_result
from theano.sparse.basic import _mtypes, _mtype_to_str from theano.sparse.basic import _mtypes, _mtype_to_str
import random
from theano import gof
def eval_outputs(outputs): def eval_outputs(outputs):
return compile.function([], outputs)()[0] return compile.function([], outputs)()[0]
...@@ -228,7 +230,7 @@ class test_true_dot(unittest.TestCase): ...@@ -228,7 +230,7 @@ class test_true_dot(unittest.TestCase):
x.data = x.data.T x.data = x.data.T
y.data = y.data.T y.data = y.data.T
# zop = true_dot(y, x) zop = true_dot(y, x)
zop = transpose(true_dot(y, x)) zop = transpose(true_dot(y, x))
self.failUnless(_is_sparse_result(zop)) self.failUnless(_is_sparse_result(zop))
z = eval_outputs([zop]) z = eval_outputs([zop])
...@@ -304,5 +306,59 @@ class test_true_dot(unittest.TestCase): ...@@ -304,5 +306,59 @@ class test_true_dot(unittest.TestCase):
self.failUnless(origloss > loss) self.failUnless(origloss > loss)
import scipy.sparse as sp
class test_structureddot(unittest.TestCase):
def test_structuredot(self):
#bsize = 5
#spmat = sp.csc_matrix((8,15))
#spmat[1,2] = 3
#spmat[4,7] = 6
#spmat[2,7] = 72
#spmat[1,9] = 2
#spmat[7,12] = 1
#spmat[4,2] = 7
bsize = 2
spmat = sp.csc_matrix((5,5))
spmat[1,2] = 1
spmat[0,1] = 2
spmat[0,2] = 3
kerns = tensor.dvector()
images = tensor.dmatrix()
def buildgraphCSC(kerns,images):
csc = CSC(kerns, spmat.indices[:spmat.size], spmat.indptr, spmat.shape)
return structured_dot(csc, images.T)
out = buildgraphCSC(kerns,images)
for mode in 'FAST_COMPILE','FAST_RUN':
f = theano.function([kerns,images], out, mode=mode)
kernvals = spmat.data[:spmat.size]
imvals = 1.0 * numpy.arange(bsize*spmat.shape[1]).reshape(bsize,spmat.shape[1])
outvals = f(kernvals,imvals)
assert numpy.all(outvals == spmat.dot(imvals.T).todense())
tensor.verify_grad(None, buildgraphCSC, [kernvals,imvals], mode=mode)
spmat = spmat.tocsr()
def buildgraphCSR(kerns,images):
csr = CSR(kerns, spmat.indices[:spmat.size], spmat.indptr, spmat.shape)
return structured_dot(csr, images.T)
out = buildgraphCSR(kerns,images)
for mode in 'FAST_COMPILE','FAST_RUN':
f = theano.function([kerns,images], out, mode=mode)
kernvals = spmat.data[:spmat.size]
imvals = 1.0 * numpy.arange(bsize*spmat.shape[1]).reshape(bsize,spmat.shape[1])
outvals = f(kernvals,imvals)
assert numpy.all(outvals == spmat.dot(imvals.T).todense())
tensor.verify_grad(None, buildgraphCSR, [kernvals,imvals], mode=mode)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -543,11 +543,11 @@ class GemmLocalOptimizer(LocalOptimizer): ...@@ -543,11 +543,11 @@ class GemmLocalOptimizer(LocalOptimizer):
# TODO: This could be an equilibriumOptmizer, but I don't know how to combine an OpKeyOptimizer and # TODO: This could be an equilibriumOptmizer, but I don't know how to combine an OpKeyOptimizer and
# an EquilibriumOptimizer. # an EquilibriumOptimizer.
compile.optdb.register('inplace_gemm_0', OpKeyOptimizer(GemmLocalOptimizer(), compile.optdb.register('inplace_gemm_0', OpKeyOptimizer(GemmLocalOptimizer(),
failure_callback=GemmLocalOptimizer.failure_callback), 70.00, 'fast_run', 'inplace') failure_callback=GemmLocalOptimizer.failure_callback), 70.00, 'fast_run', 'inplace', 'gemm')
compile.optdb.register('inplace_gemm_1', OpKeyOptimizer(GemmLocalOptimizer(), compile.optdb.register('inplace_gemm_1', OpKeyOptimizer(GemmLocalOptimizer(),
failure_callback=GemmLocalOptimizer.failure_callback), 70.01, 'fast_run', 'inplace') failure_callback=GemmLocalOptimizer.failure_callback), 70.01, 'fast_run', 'inplace', 'gemm')
compile.optdb.register('inplace_gemm_2', OpKeyOptimizer(GemmLocalOptimizer(), compile.optdb.register('inplace_gemm_2', OpKeyOptimizer(GemmLocalOptimizer(),
failure_callback=GemmLocalOptimizer.failure_callback), 70.02, 'fast_run', 'inplace') failure_callback=GemmLocalOptimizer.failure_callback), 70.02, 'fast_run', 'inplace', 'gemm')
class Dot22(GemmRelated): class Dot22(GemmRelated):
"""Compute a matrix-matrix product. """Compute a matrix-matrix product.
......
...@@ -588,6 +588,7 @@ def mul_calculate(num, denum, aslist = False): ...@@ -588,6 +588,7 @@ def mul_calculate(num, denum, aslist = False):
return v return v
local_mul_canonizer = Canonizer(T.mul, T.div, T.inv, mul_calculate, False) local_mul_canonizer = Canonizer(T.mul, T.div, T.inv, mul_calculate, False)
register_canonicalize(local_mul_canonizer, name = 'local_mul_canonizer')
@gof.local_optimizer([T.neg]) @gof.local_optimizer([T.neg])
def local_neg_to_mul(node): def local_neg_to_mul(node):
...@@ -693,7 +694,6 @@ def local_mul_specialize(node): ...@@ -693,7 +694,6 @@ def local_mul_specialize(node):
return False return False
register_specialize(local_mul_specialize) register_specialize(local_mul_specialize)
register_canonicalize(local_mul_canonizer, name = 'local_mul_canonizer')
# neg_to_mul = out2in(gof.LocalOptGroup(local_neg_to_mul)) # neg_to_mul = out2in(gof.LocalOptGroup(local_neg_to_mul))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论