提交 28716425 authored 作者: nouiz's avatar nouiz

Merge pull request #627 from ynd/csm_grad_fix

Fix crash and corruption bug in CSM's grad
...@@ -698,10 +698,12 @@ class CSM(gof.Op): ...@@ -698,10 +698,12 @@ class CSM(gof.Op):
indptr.copy()), shape.copy(), indptr.copy()), shape.copy(),
copy=False) copy=False)
def grad(self, (data, indices, indptr, shape), (g_out,)): def grad(self, (x_data, x_indices, x_indptr, x_shape), (g_out,)):
"""Return a gradient on the data vector""" """Return a gradient on the data vector"""
g_data, g_indices, g_indptr, g_shape = csm_properties(g_out)
#unpack the data vector and wrap it as a 1d TensorType #unpack the data vector and wrap it as a 1d TensorType
g_data = csm_grad(self.kmap)(data, csm_data(g_out), csm_indices(g_out)) g_data = csm_grad(self.kmap)(x_data, x_indices, x_indptr, x_shape,
g_data, g_indices, g_indptr, g_shape)
return [g_data, None, None, None] return [g_data, None, None, None]
def infer_shape(self, node, shapes): def infer_shape(self, node, shapes):
...@@ -717,6 +719,19 @@ CSR = CSM('csr') ...@@ -717,6 +719,19 @@ CSR = CSM('csr')
class CSMGrad(gof.op.Op): class CSMGrad(gof.op.Op):
"""
This Op computes the gradient of the CSM Op. CSM creates a matrix from
data, indices, and ind_ptr vectors; it's gradient is the gradient of
the data vector only. There are two complexities to calculate this gradient:
1. The gradient may be sparser than the input matrix defined by (data,
indices, ind_ptr). In this case, the data vector of the gradient will have
less elements than the data vector of the input because sparse formats
remove 0s. Since we are only returning the gradient of the data vector, the
relevant 0s need to be added back.
2. The elements in the sparse dimension are not guaranteed to be sorted.
Therefore, the input data vector may have a different order than the
gradient data vector.
"""
def __init__(self, kmap=None): def __init__(self, kmap=None):
self.kmap = kmap self.kmap = kmap
if self.kmap is None: if self.kmap is None:
...@@ -733,17 +748,37 @@ class CSMGrad(gof.op.Op): ...@@ -733,17 +748,37 @@ class CSMGrad(gof.op.Op):
self.__class__.__name__, self.__class__.__name__,
self.kmap) self.kmap)
def make_node(self, data, gout_data, gout_indices): def make_node(self, x_data, x_indices, x_indptr, x_shape,
g_data = gout_data.type() g_data, g_indices, g_indptr, g_shape):
return gof.Apply(self, [data, gout_data, gout_indices], [g_data]) gout_data = g_data.type()
return gof.Apply(self, [x_data, x_indices, x_indptr, x_shape,
g_data, g_indices, g_indptr, g_shape], [gout_data])
def perform(self, node, (data, gout_data, gout_indices), (g_data,)): def perform(self, node, (x_data, x_indices, x_indptr, x_shape,
g_data, g_indices, g_indptr, g_shape), (g_out,)):
if len(x_indptr) - 1 == x_shape[0]:
sp_dim = x_shape[1]
else:
sp_dim = x_shape[0]
g_row = numpy.zeros(sp_dim, dtype=g_data.dtype)
gout_data = numpy.zeros_like(x_data)
for i in range(len(x_indptr) - 1):
for j_ptr in range(g_indptr[i], g_indptr[i + 1]):
g_row[g_indices[j_ptr]] += g_data[j_ptr]
for j_ptr in range(x_indptr[i], x_indptr[i + 1]):
gout_data[j_ptr] = g_row[x_indices[j_ptr]]
for j_ptr in range(g_indptr[i], g_indptr[i + 1]):
g_row[g_indices[j_ptr]] = 0
if self.kmap is None: if self.kmap is None:
g_data[0] = gout_data g_out[0] = gout_data
else: else:
grad = numpy.zeros_like(data) grad = numpy.zeros_like(x_data)
grad[self.kmap] = gout_data grad[self.kmap] = gout_data
g_data[0] = grad g_out[0] = grad
def infer_shape(self, node, shapes): def infer_shape(self, node, shapes):
if self.kmap is None: if self.kmap is None:
...@@ -754,6 +789,137 @@ class CSMGrad(gof.op.Op): ...@@ -754,6 +789,137 @@ class CSMGrad(gof.op.Op):
csm_grad = CSMGrad csm_grad = CSMGrad
class CSMGradC(gof.Op):
def __eq__(self, other):
return (type(self) == type(other))
def __hash__(self):
return hash(type(self))
def __str__(self):
return self.__class__.__name__
def make_node(self, a_val, a_ind, a_ptr, a_dim, b_val, b_ind, b_ptr, b_dim):
return gof.Apply(self, [a_val, a_ind, a_ptr, a_dim,
b_val, b_ind, b_ptr, b_dim], [b_val.type()])
def c_code(self, node, name, (a_val, a_ind, a_ptr, a_dim,
b_val, b_ind, b_ptr, b_dim), (z,), sub):
# retrieve dtype number
typenum_z = node.outputs[0].type.dtype_specs()[-1]
if node.inputs[0].type.dtype in ('complex64', 'complex128'):
raise NotImplementedError('Complex types are not supported for a_val')
if node.inputs[3].type.dtype in ('complex64', 'complex128'):
raise NotImplementedError('Complex types are not supported for b_val')
return """
if (%(a_val)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_val) != 1"); %(fail)s;}
if (%(a_ind)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ind) != 1"); %(fail)s;}
if (%(a_ptr)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ptr) != 1"); %(fail)s;}
if (%(b_val)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_val) != 1"); %(fail)s;}
if (%(b_ind)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_ind) != 1"); %(fail)s;}
if (%(b_ptr)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(b_ptr) != 1"); %(fail)s;}
if (%(a_ind)s->descr->type_num != PyArray_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32"); %(fail)s;}
if (%(a_ptr)s->descr->type_num != PyArray_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32"); %(fail)s;}
if (%(b_ind)s->descr->type_num != PyArray_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "b_ind dtype not INT32"); %(fail)s;}
if (%(b_ptr)s->descr->type_num != PyArray_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "b_ptr dtype not INT32"); %(fail)s;}
if (%(a_val)s->dimensions[0] != %(a_ind)s->dimensions[0])
{PyErr_SetString(PyExc_NotImplementedError, "a_val and a_ind have different lengths"); %(fail)s;}
if (%(b_val)s->dimensions[0] != %(b_ind)s->dimensions[0])
{PyErr_SetString(PyExc_NotImplementedError, "b_val and b_ind have different lengths"); %(fail)s;}
if (%(a_ptr)s->dimensions[0] != %(b_ptr)s->dimensions[0])
{PyErr_SetString(PyExc_NotImplementedError, "a_ptr and b_ptr have different lengths"); %(fail)s;}
if ((!%(z)s) || (%(z)s->dimensions[0] != %(a_val)s->dimensions[0]))
{
{Py_XDECREF(%(z)s);}
npy_intp dims[] = {0};
dims[0] = %(a_val)s->dimensions[0];
%(z)s = (PyArrayObject*) PyArray_SimpleNew(1, dims, %(typenum_z)s);
}
{
// sparse array has size MxK, dense KxN, output MxN
npy_intp M = %(a_ptr)s->dimensions[0] - 1;
npy_intp a_dim_0 = ((npy_int32 *)%(a_dim)s->data)[0];
npy_intp a_dim_1 = ((npy_int32 *)%(a_dim)s->data)[1];
npy_intp sp_dim = (M == a_dim_0)?a_dim_1:a_dim_0;
// strides tell you how many bytes to skip to go to next column/row entry
npy_intp Sz = %(z)s->strides[0] / %(z)s->descr->elsize;
npy_intp Sa_val = %(a_val)s->strides[0] / %(a_val)s->descr->elsize;
npy_intp Sa_ind = %(a_ind)s->strides[0] / %(a_ind)s->descr->elsize;
npy_intp Sa_ptr = %(a_ptr)s->strides[0] / %(a_ptr)s->descr->elsize;
npy_intp Sb_val = %(b_val)s->strides[0] / %(b_val)s->descr->elsize;
npy_intp Sb_ind = %(b_ind)s->strides[0] / %(b_ind)s->descr->elsize;
npy_intp Sb_ptr = %(b_ptr)s->strides[0] / %(b_ptr)s->descr->elsize;
// pointers to access actual data in the arrays passed as params.
dtype_%(z)s* __restrict__ Dz = (dtype_%(z)s*)%(z)s->data;
const dtype_%(a_val)s* __restrict__ Da_val = (dtype_%(a_val)s*)%(a_val)s->data;
const npy_int32 * __restrict__ Da_ind = (npy_int32*)%(a_ind)s->data;
const npy_int32 * __restrict__ Da_ptr = (npy_int32*)%(a_ptr)s->data;
const dtype_%(b_val)s* __restrict__ Db_val = (dtype_%(b_val)s*)%(b_val)s->data;
const npy_int32 * __restrict__ Db_ind = (npy_int32*)%(b_ind)s->data;
const npy_int32 * __restrict__ Db_ptr = (npy_int32*)%(b_ptr)s->data;
npy_intp nnz = %(a_ind)s->dimensions[0];
dtype_%(b_val)s b_row[sp_dim];
//clear the output array
for (npy_int64 i = 0; i < nnz; ++i)
{
Dz[i*Sz] = 0;
}
memset(b_row, 0, sp_dim*sizeof(dtype_%(b_val)s));
// loop over inner dimension
for (npy_int64 m = 0; m < M; ++m)
{
for (npy_int32 j_ptr = Db_ptr[m * Sb_ptr];
j_ptr < Db_ptr[(m + 1) * Sb_ptr]; j_ptr++) {
b_row[Db_ind[j_ptr * Sb_ind]] += Db_val[j_ptr*Sb_val];
}
for (npy_int32 j_ptr = Da_ptr[m * Sa_ptr];
j_ptr < Da_ptr[(m + 1) * Sa_ptr]; j_ptr++) {
Dz[j_ptr*Sz] = b_row[Da_ind[j_ptr * Sa_ind]];
}
for (npy_int32 j_ptr = Db_ptr[m * Sb_ptr];
j_ptr < Db_ptr[(m + 1) * Sb_ptr]; j_ptr++) {
b_row[Db_ind[j_ptr * Sb_ind]] = 0;
}
}
}
""" % dict(locals(), **sub)
def c_code_cache_version(self):
return (3,)
csm_grad_c = CSMGradC()
@gof.local_optimizer([csm_grad(None)])
def local_csm_grad_c(node):
""" csm_grad(None) -> csm_grad_c """
if node.op == csm_grad(None):
return [csm_grad_c(*node.inputs)]
return False
register_specialize(local_csm_grad_c)
# #
# Conversion # Conversion
# #
......
...@@ -22,7 +22,7 @@ from theano.sparse.basic import _is_dense_variable, _is_sparse_variable ...@@ -22,7 +22,7 @@ from theano.sparse.basic import _is_dense_variable, _is_sparse_variable
from theano.sparse.basic import verify_grad_sparse from theano.sparse.basic import verify_grad_sparse
from theano.sparse import (as_sparse_variable, CSC, CSR, CSM, CSMProperties, from theano.sparse import (as_sparse_variable, CSC, CSR, CSM, CSMProperties,
csm_properties) csm_properties)
from theano.sparse import SparseType, CSMGrad from theano.sparse import SparseType, CSMGrad, CSMGradC
from theano.sparse import StructuredDot, StructuredDotCSC from theano.sparse import StructuredDot, StructuredDotCSC
from theano.sparse import StructuredDotGradCSC, StructuredDotGradCSR from theano.sparse import StructuredDotGradCSC, StructuredDotGradCSR
from theano.sparse import AddSS, AddSD, MulSS, MulSD, Transpose, Neg, Remove0 from theano.sparse import AddSS, AddSD, MulSS, MulSD, Transpose, Neg, Remove0
...@@ -184,7 +184,7 @@ class SparseInferShapeTester(utt.InferShapeTester): ...@@ -184,7 +184,7 @@ class SparseInferShapeTester(utt.InferShapeTester):
[out], [out],
[spm.data, spm.indices, spm.indptr, [spm.data, spm.indices, spm.indptr,
spm.shape], spm.shape],
CSMGrad (CSMGrad, CSMGradC)
) )
def test_transpose(self): def test_transpose(self):
...@@ -616,6 +616,107 @@ class test_csm_properties(unittest.TestCase): ...@@ -616,6 +616,107 @@ class test_csm_properties(unittest.TestCase):
assert numpy.all(shape == spmat.shape) assert numpy.all(shape == spmat.shape)
class test_csm(unittest.TestCase):
def setUp(self):
utt.seed_rng()
def test_csm_grad(self):
sp_types = {'csc': sp.csc_matrix,
'csr': sp.csr_matrix}
for format in ['csc', 'csr']:
for dtype in ['float32', 'float64']:
spmat = sp_types[format](random_lil((4, 3), dtype, 3))
verify_grad_sparse(lambda x: CSM(format)(x, spmat.indices,
spmat.indptr, numpy.asarray(spmat.shape, 'int32')),
[spmat.data], structured=True)
def test_csm_sparser(self):
"""
Test support for gradients sparser than the input.
"""
sp_types = {'csc': sp.csc_matrix,
'csr': sp.csr_matrix}
for format in ['csc', 'csr']:
for dtype in ['float32', 'float64']:
x = tensor.tensor(dtype=dtype, broadcastable=(False,))
y = tensor.ivector()
z = tensor.ivector()
s = tensor.ivector()
a = as_sparse_variable(sp_types[format](random_lil((4, 3),
dtype, 1)))
f = theano.function([x, y, z, s], tensor.grad(dense_from_sparse(
a * CSM(format)(x, y, z, s)).sum(), x))
spmat = sp_types[format](random_lil((4, 3), dtype, 3))
res = f(spmat.data, spmat.indices, spmat.indptr,
numpy.asarray(spmat.shape, 'int32'))
assert len(spmat.data) == len(res)
def test_csm_unsorted(self):
"""
Test support for gradients of unsorted inputs.
"""
sp_types = {'csc': sp.csc_matrix,
'csr': sp.csr_matrix}
for format in ['csr', 'csc', ]:
for dtype in ['float32', 'float64']:
x = tensor.tensor(dtype=dtype, broadcastable=(False,))
y = tensor.ivector()
z = tensor.ivector()
s = tensor.ivector()
# Sparse advanced indexing produces unsorted sparse matrices
a = sp_types[format]([[1,2,1], [1,2,1], [1,2,1], [1,2,1]],
dtype=dtype)[range(4)]
# Make sure it's unsorted
assert not a.has_sorted_indices
a = as_sparse_variable(a)
f = theano.function([x, y, z, s], tensor.grad(tensor.sum(
dense_from_sparse(a * CSM(format)(x, y, z, s))), x))
spmat = sp_types[format](random_lil((4, 3), dtype,
12))[range(4)]
assert not spmat.has_sorted_indices
res = f(spmat.data, spmat.indices, spmat.indptr,
numpy.asarray(spmat.shape, 'int32'))
col1 = sp_types[format]((res, spmat.indices, spmat.indptr),
shape=numpy.asarray(spmat.shape, 'int32'))[:, 1].data
assert numpy.all(col1 == 2)
def test_csm(self):
sp_types = {'csc': sp.csc_matrix,
'csr': sp.csr_matrix}
for format in ['csc', 'csr']:
for dtype in ['float32', 'float64']:
x = tensor.tensor(dtype=dtype, broadcastable=(False,))
y = tensor.ivector()
z = tensor.ivector()
s = tensor.ivector()
f = theano.function([x, y, z, s], CSM(format)(x, y, z, s))
spmat = sp_types[format](random_lil((4, 3), dtype, 3))
res = f(spmat.data, spmat.indices, spmat.indptr,
numpy.asarray(spmat.shape, 'int32'))
assert numpy.all(res.data == spmat.data)
assert numpy.all(res.indices == spmat.indices)
assert numpy.all(res.indptr == spmat.indptr)
assert numpy.all(res.shape == spmat.shape)
class test_structureddot(unittest.TestCase): class test_structureddot(unittest.TestCase):
def setUp(self): def setUp(self):
utt.seed_rng() utt.seed_rng()
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论