提交 9f85a888 authored 作者: James Bergstra's avatar James Bergstra

merge

...@@ -208,8 +208,8 @@ class CSMProperties(gof.Op): ...@@ -208,8 +208,8 @@ class CSMProperties(gof.Op):
"""Extract all of .data .indices and .indptr""" """Extract all of .data .indices and .indptr"""
view_map = {0:[0],1:[0],2:[0],3:[0]} view_map = {0:[0],1:[0],2:[0],3:[0]}
def __init__(self, map=None): def __init__(self, kmap=None):
self.map = map self.kmap = kmap
def make_node(self, csm): def make_node(self, csm):
csm = as_sparse(csm) csm = as_sparse(csm)
...@@ -218,7 +218,7 @@ class CSMProperties(gof.Op): ...@@ -218,7 +218,7 @@ class CSMProperties(gof.Op):
[data, tensor.ivector(), tensor.ivector(), tensor.ivector()]) [data, tensor.ivector(), tensor.ivector(), tensor.ivector()])
def perform(self, node, (csm,), out): def perform(self, node, (csm,), out):
out[0][0] = csm.data if self.map is None else csm.data[self.map] out[0][0] = csm.data if self.kmap is None else csm.data[self.kmap]
out[1][0] = numpy.asarray(csm.indices, dtype='int32') out[1][0] = numpy.asarray(csm.indices, dtype='int32')
out[2][0] = numpy.asarray(csm.indptr, dtype='int32') out[2][0] = numpy.asarray(csm.indptr, dtype='int32')
out[3][0] = numpy.asarray(csm.shape, dtype='int32') out[3][0] = numpy.asarray(csm.shape, dtype='int32')
...@@ -243,23 +243,23 @@ class CSM(gof.Op): ...@@ -243,23 +243,23 @@ class CSM(gof.Op):
view_map = {0:[0]} #should view the other inputs too, but viewing multiple inputs is not view_map = {0:[0]} #should view the other inputs too, but viewing multiple inputs is not
#currently supported by the destroyhandler #currently supported by the destroyhandler
def __init__(self, format, map=None): def __init__(self, format, kmap=None):
if format not in ('csr', 'csc'): if format not in ('csr', 'csc'):
raise ValueError("format must be one of: 'csr', 'csc'", format) raise ValueError("format must be one of: 'csr', 'csc'", format)
self.format = format self.format = format
# for efficiency, if remap does nothing, then do not apply it # for efficiency, if remap does nothing, then do not apply it
if map is not None and all(map==numpy.arange(numpy.size(map))): if kmap is not None and all(kmap==numpy.arange(numpy.size(kmap))):
map = None kmap = None
self.map = map self.kmap = kmap
def __eq__(self, other): def __eq__(self, other):
return type(other) is CSM \ return type(other) is CSM \
and other.format == self.format and numpy.all(other.map==self.map) and other.format == self.format and numpy.all(other.kmap==self.kmap)
def __hash__(self): def __hash__(self):
return hash(type(self)) ^ hash(self.format) ^ hash(numpy.str(self.map)) return hash(type(self)) ^ hash(self.format) ^ hash(numpy.str(self.kmap))
def make_node(self, data, indices, indptr, shape): def make_node(self, data, indices, indptr, shape):
"""Build a SparseResult from the internal parametrization """Build a SparseResult from the internal parametrization
...@@ -294,12 +294,17 @@ class CSM(gof.Op): ...@@ -294,12 +294,17 @@ class CSM(gof.Op):
def perform(self, node, (data, indices, indptr, shape), (out,)): def perform(self, node, (data, indices, indptr, shape), (out,)):
"""Build a csc_matrix""" """Build a csc_matrix"""
#assert len(data.flatten()) == len(indices.flatten()) #assert len(data.flatten()) == len(indices.flatten())
data = data[self.map] if self.map!=None else data
# for efficiency, if remap does nothing, then do not apply it
if self.kmap is not None:
data = data[self.kmap]
if len(shape) != 2: if len(shape) != 2:
raise ValueError('Shape should be an array of length 2') raise ValueError('Shape should be an array of length 2')
if data.shape != indices.shape: if data.shape != indices.shape and numpy.size(data) != numpy.size(self.kmap):
raise ValueError('data indices shape mismatch', (data.shape, indices.shape)) errmsg = 'Data (shape '+`data.shape`+' must have the same number of elements '+\
'as indices (shape'+`indices.shape`+') or elements as kmap ('+`numpy.size(self.kmap)`+')'
raise ValueError(errmsg)
if self.format == 'csc': if self.format == 'csc':
out[0] = sparse.csc_matrix((data, indices.copy(), indptr.copy()), out[0] = sparse.csc_matrix((data, indices.copy(), indptr.copy()),
numpy.asarray(shape), numpy.asarray(shape),
...@@ -315,26 +320,26 @@ class CSM(gof.Op): ...@@ -315,26 +320,26 @@ class CSM(gof.Op):
def grad(self, (data, indices, indptr, shape), (g_out,)): def grad(self, (data, indices, indptr, shape), (g_out,)):
"""Return a gradient on the data vector""" """Return a gradient on the data vector"""
#unpack the data vector and wrap it as a 1d Tensor #unpack the data vector and wrap it as a 1d Tensor
g_data = csm_grad(self.map)(data, csm_data(g_out),csm_indices(g_out)) g_data = csm_grad(self.kmap)(data, csm_data(g_out),csm_indices(g_out))
return [g_data, None, None, None] return [g_data, None, None, None]
CSC = CSM('csc') CSC = CSM('csc')
CSR = CSM('csr') CSR = CSM('csr')
class CSMGrad(gof.op.Op): class CSMGrad(gof.op.Op):
def __init__(self, map=None): def __init__(self, kmap=None):
self.map = map self.kmap = kmap
def make_node(self, data, gout_data, gout_indices): def make_node(self, data, gout_data, gout_indices):
g_data = data.type() g_data = data.type()
return gof.Apply(self, [data, gout_data, gout_indices], [g_data]) return gof.Apply(self, [data, gout_data, gout_indices], [g_data])
def perform(self, node, (data, gout_data, gout_indices), (g_data,)): def perform(self, node, (data, gout_data, gout_indices), (g_data,)):
if self.map is None: if self.kmap is None:
g_data[0] = gout_data g_data[0] = gout_data
else: else:
grad = numpy.zeros_like(data) grad = numpy.zeros_like(data)
grad[self.map] = gout_data grad[self.kmap] = gout_data
g_data[0] = grad g_data[0] = grad
csm_grad = CSMGrad csm_grad = CSMGrad
...@@ -706,6 +711,7 @@ class StructuredDot(gof.Op): ...@@ -706,6 +711,7 @@ class StructuredDot(gof.Op):
#gb = a.T x g_out #gb = a.T x g_out
return structured_dot_grad(a, b, g_out), structured_dot(a.T,g_out) return structured_dot_grad(a, b, g_out), structured_dot(a.T,g_out)
_structured_dot = StructuredDot() _structured_dot = StructuredDot()
def structured_dot(x, y): def structured_dot(x, y):
""" """
@todo: Maybe the triple-transposition formulation (when x is dense) @todo: Maybe the triple-transposition formulation (when x is dense)
...@@ -880,7 +886,7 @@ class StructuredDotGrad(gof.Op): ...@@ -880,7 +886,7 @@ class StructuredDotGrad(gof.Op):
raise TypeError() raise TypeError()
_structured_dot_grad = StructuredDotGrad() _structured_dot_grad = StructuredDotGrad()
class StructureDotGradCSC(gof.Op): class StructuredDotGradCSC(gof.Op):
def make_node(self, a_indices, a_indptr, b, g_ab): def make_node(self, a_indices, a_indptr, b, g_ab):
return gof.Apply(self, [a_indices, a_indptr, b, g_ab], [tensor.tensor(b.dtype, (False,))]) return gof.Apply(self, [a_indices, a_indptr, b, g_ab], [tensor.tensor(b.dtype, (False,))])
def perform(self, node, (a_indices, a_indptr, b, g_ab), (out,)): def perform(self, node, (a_indices, a_indptr, b, g_ab), (out,)):
...@@ -940,31 +946,145 @@ class StructureDotGradCSC(gof.Op): ...@@ -940,31 +946,145 @@ class StructureDotGradCSC(gof.Op):
const npy_int32 * __restrict__ indptr = (npy_int32 *)%(_indptr)s->data; const npy_int32 * __restrict__ indptr = (npy_int32 *)%(_indptr)s->data;
const npy_int32 * __restrict__ indices = (npy_int32 *)%(_indices)s->data; const npy_int32 * __restrict__ indices = (npy_int32 *)%(_indices)s->data;
// loop over columns
for (npy_int32 j = 0; j < N; ++j) for (npy_int32 j = 0; j < N; ++j)
{ {
// extract j-th row of dense matrix
const npy_double * __restrict__ d_row = (double *)(%(_d)s->data + %(_d)s->strides[0] * j); const npy_double * __restrict__ d_row = (double *)(%(_d)s->data + %(_d)s->strides[0] * j);
if(j >= %(_d)s->dimensions[0]) {PyErr_SetString(PyExc_NotImplementedError, "G"); %(fail)s;} if(j >= %(_d)s->dimensions[0]) {PyErr_SetString(PyExc_NotImplementedError, "G"); %(fail)s;}
// for each non-null value in the sparse column
for (npy_int32 i_idx = indptr[j * Sindptr]; i_idx < indptr[(j+1) * Sindptr]; ++i_idx) for (npy_int32 i_idx = indptr[j * Sindptr]; i_idx < indptr[(j+1) * Sindptr]; ++i_idx)
{ {
// extract row index of non-null value
npy_int32 i = indices[i_idx * Sindices]; npy_int32 i = indices[i_idx * Sindices];
// extract corresponding row in gradient
const npy_double * __restrict__ g_row = (npy_double *)(%(_g)s->data + %(_g)s->strides[0] * i); const npy_double * __restrict__ g_row = (npy_double *)(%(_g)s->data + %(_g)s->strides[0] * i);
double ip = 0.0; double ip = 0.0;
// make sure that row index is not bigger than actual number of rows
// Note: wouldn't the above operation fail if that were the case ?
// when would this ever be true anyway ?
if (i >= %(_g)s->dimensions[0]) if (i >= %(_g)s->dimensions[0])
{PyErr_SetString(PyExc_NotImplementedError, "H"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "H"); %(fail)s;}
// perform dot product of dense and sparse rows
for(int k = 0; k < K; ++k) for(int k = 0; k < K; ++k)
{ {
ip += d_row[k * Sd1] * g_row[k*Sg1]; ip += d_row[k * Sd1] * g_row[k*Sg1];
} }
// write resulting gradient to sparse output
((double * __restrict__)(%(_zout)s->data + i_idx * %(_zout)s->strides[0]))[0] = ip; ((double * __restrict__)(%(_zout)s->data + i_idx * %(_zout)s->strides[0]))[0] = ip;
} }
} }
} }
"""% dict(locals(), **sub) """% dict(locals(), **sub)
_sdgcsc = StructureDotGradCSC() _sdgcsc = StructuredDotGradCSC()
class StructuredDotGradCSR(gof.Op):
def make_node(self, a_indices, a_indptr, b, g_ab):
return gof.Apply(self, [a_indices, a_indptr, b, g_ab], [tensor.tensor(b.dtype, (False,))])
def perform(self, node, (a_indices, a_indptr, b, g_ab), (out,)):
g_a_data = numpy.zeros(a_indices.shape, dtype=g_ab.dtype)
for i in xrange(len(a_indptr)-1): # loop over rows
ind0 = a_indptr[i]
ind1 = a_indptr[i+1]
for j_idx in xrange(ind0, ind1): # loop over values in that row (columns)
j = a_indices[j_idx]
# grad is dot product of i-th row of gradient with j-th row of b
g_a_data[j_idx] = numpy.dot(g_ab[i], b[j])
out[0] = g_a_data
def c_code(self, node, name, (_indices, _indptr, _d, _g), (_zout, ), sub):
return """
if (%(_d)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(d) != 2"); %(fail)s;}
if (%(_g)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(g) != 2"); %(fail)s;}
if (%(_indices)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1"); %(fail)s;}
if (%(_indptr)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1"); %(fail)s;}
if( %(_indices)s->descr->type_num != PyArray_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;}
if( %(_indptr)s->descr->type_num != PyArray_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if( %(_d)s->descr->type_num != PyArray_DOUBLE)
{PyErr_SetString(PyExc_NotImplementedError, "d's dtype not NPY_DOUBLE"); %(fail)s;}
if( %(_g)s->descr->type_num != PyArray_DOUBLE)
{PyErr_SetString(PyExc_NotImplementedError, "g's dtype not NPY_DOUBLE"); %(fail)s;}
if( %(_d)s->dimensions[1] != %(_g)s->dimensions[1])
{PyErr_SetString(PyExc_NotImplementedError, "d and g have different numbers of columns"); %(fail)s;}
if (!%(_zout)s)
{
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, %(_indices)s->dimensions, %(_g)s->descr->type_num);
}
if (%(_zout)s->dimensions[0] != %(_indices)s->dimensions[0])
{
PyErr_SetString(PyExc_NotImplementedError, "somehow _zout got the wrong size.. and I don't know how to resize it.");
%(fail)s;
}
{ //makes it compile even though labels jump over variable definitions.
npy_intp nnz = %(_indices)s->dimensions[0];
// extract number of rows
npy_intp N = %(_indptr)s->dimensions[0]-1; //TODO: error checking with this
npy_intp Sindices = %(_indices)s->strides[0]/%(_indices)s->descr->elsize;
npy_intp Sindptr = %(_indptr)s->strides[0]/%(_indptr)s->descr->elsize;
const npy_intp Sd1 = %(_d)s->strides[1]/%(_d)s->descr->elsize;
const npy_intp Sg1 = %(_g)s->strides[1]/%(_g)s->descr->elsize;
const npy_intp K = %(_d)s->dimensions[1];
const npy_int32 * __restrict__ indptr = (npy_int32 *)%(_indptr)s->data;
const npy_int32 * __restrict__ indices = (npy_int32 *)%(_indices)s->data;
// loop over rows
for (npy_int32 i = 0; i < N; ++i)
{
// for each non-null value in the sparse row
for (npy_int32 j_idx = indptr[i * Sindptr]; j_idx < indptr[(i+1) * Sindptr]; ++j_idx)
{
// extract column index of non-null value
npy_int32 j = indices[j_idx * Sindices];
// extract j-th row of dense matrix
const npy_double * __restrict__ d_row = (double *)(%(_d)s->data + %(_d)s->strides[0] * j);
if(j >= %(_d)s->dimensions[0]) {PyErr_SetString(PyExc_NotImplementedError, "G"); %(fail)s;}
// extract corresponding row in gradient
const npy_double * __restrict__ g_row = (npy_double *)(%(_g)s->data + %(_g)s->strides[0] * i);
double ip = 0.0;
// make sure that row index is not bigger than actual number of rows
// Note: wouldn't the above operation fail if that were the case ?
// when would this ever be true anyway ?
if (i >= %(_g)s->dimensions[0])
{PyErr_SetString(PyExc_NotImplementedError, "H"); %(fail)s;}
// perform dot product of dense and sparse rows
for(int k = 0; k < K; ++k)
{
ip += d_row[k * Sd1] * g_row[k*Sg1];
}
// write resulting gradient to sparse output
((double * __restrict__)(%(_zout)s->data + j_idx * %(_zout)s->strides[0]))[0] = ip;
}
}
}
"""% dict(locals(), **sub)
_sdgcsr = StructuredDotGradCSR()
def structured_dot_grad(sparse_A, dense_B, ga): def structured_dot_grad(sparse_A, dense_B, ga):
#TODO: 1. move this switch to be a specialization of structuredDotGrad #TODO: 1. move this switch to be a specialization of structuredDotGrad
...@@ -972,10 +1092,14 @@ def structured_dot_grad(sparse_A, dense_B, ga): ...@@ -972,10 +1092,14 @@ def structured_dot_grad(sparse_A, dense_B, ga):
if 0: if 0:
return _structured_dot_grad(sparse_A, dense_B, ga) return _structured_dot_grad(sparse_A, dense_B, ga)
else: else:
if sparse_A.type.format == 'csc': if sparse_A.type.format in ('csc','csr'):
g_A_data = _sdgcsc(csm_indices(sparse_A),\
sdgcsx = _sdgcsc if sparse_A.type.format == 'csc' else _sdgcsr
CSx = CSC if sparse_A.type.format == 'csc' else CSR
g_A_data = sdgcsx(csm_indices(sparse_A),\
csm_indptr(sparse_A), dense_B, ga) csm_indptr(sparse_A), dense_B, ga)
return CSC(g_A_data, csm_indices(sparse_A),\ return CSx(g_A_data, csm_indices(sparse_A),\
csm_indptr(sparse_A),\ csm_indptr(sparse_A),\
csm_shape(sparse_A)) csm_shape(sparse_A))
else: else:
......
from theano.sparse import * from theano.sparse import *
import random
import unittest import unittest
import theano
from theano import compile from theano import compile
from theano import gradient from theano import gradient
from theano import gof
from theano.sparse.basic import _is_dense, _is_sparse, _is_dense_result, _is_sparse_result from theano.sparse.basic import _is_dense, _is_sparse, _is_dense_result, _is_sparse_result
from theano.sparse.basic import _mtypes, _mtype_to_str from theano.sparse.basic import _mtypes, _mtype_to_str
import random
from theano import gof
def eval_outputs(outputs): def eval_outputs(outputs):
return compile.function([], outputs)()[0] return compile.function([], outputs)()[0]
...@@ -228,7 +230,7 @@ class test_true_dot(unittest.TestCase): ...@@ -228,7 +230,7 @@ class test_true_dot(unittest.TestCase):
x.data = x.data.T x.data = x.data.T
y.data = y.data.T y.data = y.data.T
# zop = true_dot(y, x) zop = true_dot(y, x)
zop = transpose(true_dot(y, x)) zop = transpose(true_dot(y, x))
self.failUnless(_is_sparse_result(zop)) self.failUnless(_is_sparse_result(zop))
z = eval_outputs([zop]) z = eval_outputs([zop])
...@@ -304,5 +306,59 @@ class test_true_dot(unittest.TestCase): ...@@ -304,5 +306,59 @@ class test_true_dot(unittest.TestCase):
self.failUnless(origloss > loss) self.failUnless(origloss > loss)
import scipy.sparse as sp
class test_structureddot(unittest.TestCase):
def test_structuredot(self):
#bsize = 5
#spmat = sp.csc_matrix((8,15))
#spmat[1,2] = 3
#spmat[4,7] = 6
#spmat[2,7] = 72
#spmat[1,9] = 2
#spmat[7,12] = 1
#spmat[4,2] = 7
bsize = 2
spmat = sp.csc_matrix((5,5))
spmat[1,2] = 1
spmat[0,1] = 2
spmat[0,2] = 3
kerns = tensor.dvector()
images = tensor.dmatrix()
def buildgraphCSC(kerns,images):
csc = CSC(kerns, spmat.indices[:spmat.size], spmat.indptr, spmat.shape)
return structured_dot(csc, images.T)
out = buildgraphCSC(kerns,images)
for mode in 'FAST_COMPILE','FAST_RUN':
f = theano.function([kerns,images], out, mode=mode)
kernvals = spmat.data[:spmat.size]
imvals = 1.0 * numpy.arange(bsize*spmat.shape[1]).reshape(bsize,spmat.shape[1])
outvals = f(kernvals,imvals)
assert numpy.all(outvals == spmat.dot(imvals.T).todense())
tensor.verify_grad(None, buildgraphCSC, [kernvals,imvals], mode=mode)
spmat = spmat.tocsr()
def buildgraphCSR(kerns,images):
csr = CSR(kerns, spmat.indices[:spmat.size], spmat.indptr, spmat.shape)
return structured_dot(csr, images.T)
out = buildgraphCSR(kerns,images)
for mode in 'FAST_COMPILE','FAST_RUN':
f = theano.function([kerns,images], out, mode=mode)
kernvals = spmat.data[:spmat.size]
imvals = 1.0 * numpy.arange(bsize*spmat.shape[1]).reshape(bsize,spmat.shape[1])
outvals = f(kernvals,imvals)
assert numpy.all(outvals == spmat.dot(imvals.T).todense())
tensor.verify_grad(None, buildgraphCSR, [kernvals,imvals], mode=mode)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -543,11 +543,11 @@ class GemmLocalOptimizer(LocalOptimizer): ...@@ -543,11 +543,11 @@ class GemmLocalOptimizer(LocalOptimizer):
# TODO: This could be an equilibriumOptmizer, but I don't know how to combine an OpKeyOptimizer and # TODO: This could be an equilibriumOptmizer, but I don't know how to combine an OpKeyOptimizer and
# an EquilibriumOptimizer. # an EquilibriumOptimizer.
compile.optdb.register('inplace_gemm_0', OpKeyOptimizer(GemmLocalOptimizer(), compile.optdb.register('inplace_gemm_0', OpKeyOptimizer(GemmLocalOptimizer(),
failure_callback=GemmLocalOptimizer.failure_callback), 70.00, 'fast_run', 'inplace') failure_callback=GemmLocalOptimizer.failure_callback), 70.00, 'fast_run', 'inplace', 'gemm')
compile.optdb.register('inplace_gemm_1', OpKeyOptimizer(GemmLocalOptimizer(), compile.optdb.register('inplace_gemm_1', OpKeyOptimizer(GemmLocalOptimizer(),
failure_callback=GemmLocalOptimizer.failure_callback), 70.01, 'fast_run', 'inplace') failure_callback=GemmLocalOptimizer.failure_callback), 70.01, 'fast_run', 'inplace', 'gemm')
compile.optdb.register('inplace_gemm_2', OpKeyOptimizer(GemmLocalOptimizer(), compile.optdb.register('inplace_gemm_2', OpKeyOptimizer(GemmLocalOptimizer(),
failure_callback=GemmLocalOptimizer.failure_callback), 70.02, 'fast_run', 'inplace') failure_callback=GemmLocalOptimizer.failure_callback), 70.02, 'fast_run', 'inplace', 'gemm')
class Dot22(GemmRelated): class Dot22(GemmRelated):
"""Compute a matrix-matrix product. """Compute a matrix-matrix product.
......
...@@ -588,6 +588,7 @@ def mul_calculate(num, denum, aslist = False): ...@@ -588,6 +588,7 @@ def mul_calculate(num, denum, aslist = False):
return v return v
local_mul_canonizer = Canonizer(T.mul, T.div, T.inv, mul_calculate, False) local_mul_canonizer = Canonizer(T.mul, T.div, T.inv, mul_calculate, False)
register_canonicalize(local_mul_canonizer, name = 'local_mul_canonizer')
@gof.local_optimizer([T.neg]) @gof.local_optimizer([T.neg])
def local_neg_to_mul(node): def local_neg_to_mul(node):
...@@ -693,7 +694,6 @@ def local_mul_specialize(node): ...@@ -693,7 +694,6 @@ def local_mul_specialize(node):
return False return False
register_specialize(local_mul_specialize) register_specialize(local_mul_specialize)
register_canonicalize(local_mul_canonizer, name = 'local_mul_canonizer')
# neg_to_mul = out2in(gof.LocalOptGroup(local_neg_to_mul)) # neg_to_mul = out2in(gof.LocalOptGroup(local_neg_to_mul))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论