提交 8e9ebc8f authored 作者: abergeron's avatar abergeron

Merge pull request #1727 from nouiz/sparse

Fix Sparse grad crash and implement mixed dtype in sparse Mul/Add
...@@ -1654,13 +1654,12 @@ class AddSS(gof.op.Op): ...@@ -1654,13 +1654,12 @@ class AddSS(gof.op.Op):
def make_node(self, x, y): def make_node(self, x, y):
x, y = map(as_sparse_variable, [x, y]) x, y = map(as_sparse_variable, [x, y])
if x.type.dtype != y.type.dtype: out_dtype = scalar.upcast(x.type.dtype, y.type.dtype)
raise NotImplementedError()
if x.type.format != y.type.format: if x.type.format != y.type.format:
raise NotImplementedError() raise NotImplementedError()
return gof.Apply(self, return gof.Apply(self,
[x, y], [x, y],
[SparseType(dtype=x.type.dtype, [SparseType(dtype=out_dtype,
format=x.type.format format=x.type.format
).make_variable()]) ).make_variable()])
...@@ -1742,97 +1741,34 @@ class AddSD(gof.op.Op): ...@@ -1742,97 +1741,34 @@ class AddSD(gof.op.Op):
:note: The grad implemented is structured on `x`. :note: The grad implemented is structured on `x`.
""" """
def __init__(self, inplace=False, *args, **kwargs): def __init__(self, *args, **kwargs):
gof.Op.__init__(self, *args, **kwargs) gof.Op.__init__(self, *args, **kwargs)
#Should we do inplace addition or not ?
self.inplace = inplace
if self.inplace:
self.destroy_map = {0: [3]}
def __eq__(self, other): def __eq__(self, other):
return (type(self) == type(other)) and self.inplace == other.inplace return (type(self) == type(other))
def __hash__(self): def __hash__(self):
return hash(type(self)) ^ hash(self.inplace) return hash(type(self))
def __str__(self): def __str__(self):
if self.inplace:
return self.__class__.__name__ + '{inplace}'
return self.__class__.__name__ return self.__class__.__name__
def make_node(self, x, y): def make_node(self, x, y):
x, y = as_sparse_variable(x), tensor.as_tensor_variable(y) x, y = as_sparse_variable(x), tensor.as_tensor_variable(y)
out_dtype = scalar.upcast(x.type.dtype, y.type.dtype)
if x.type.dtype != y.type.dtype:
raise NotImplementedError(
"AddSD support inputs with the same dtype only."
" You passed %s and %s inputs dtype." % (x.type.dtype,
y.type.dtype))
indices, indptr, data = csm_indices(x), csm_indptr(x), csm_data(x)
# We either use CSC or CSR depending on the format of input
self.format = x.format
# The magic number two here arises because L{scipy.sparse} # The magic number two here arises because L{scipy.sparse}
# objects must be matrices (have dimension 2) # objects must be matrices (have dimension 2)
assert y.type.ndim == 2 assert y.type.ndim == 2
return gof.Apply(self, return gof.Apply(self,
[data, indices, indptr, y], [x, y],
[tensor.TensorType(dtype=y.type.dtype, [tensor.TensorType(dtype=out_dtype,
broadcastable=y.type.broadcastable broadcastable=y.type.broadcastable
).make_variable()]) ).make_variable()])
def c_code(self, node, name, (_data, _indices, _indptr, y), (z, ), sub): def perform(self, node, (x, y), (out, )):
inplace = int(self.inplace)
format = {'csc': 0, 'csr': 1}[self.format]
code = """
Py_XDECREF(%(z)s);
if (!%(inplace)s){
%(z)s = (PyArrayObject *) PyArray_NewCopy(%(y)s, NPY_CORDER);
}else{
%(z)s = %(y)s;
Py_XINCREF(%(z)s);
}
npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1;
const npy_int32 * __restrict__ indptr = (npy_int32 *)PyArray_DATA(%(_indptr)s);
const npy_int32 * __restrict__ indices = (npy_int32*)PyArray_DATA(%(_indices)s);
const dtype_%(_data)s* __restrict__ data = (dtype_%(_data)s*)PyArray_DATA(%(_data)s);
dtype_%(y)s* ydata = (dtype_%(y)s*)PyArray_DATA(%(y)s);
dtype_%(z)s* zdata = (dtype_%(z)s*)PyArray_DATA(%(z)s);
int Yi = PyArray_STRIDES(%(y)s)[0]/PyArray_DESCR(%(y)s)->elsize;
int Yj = PyArray_STRIDES(%(y)s)[1]/PyArray_DESCR(%(y)s)->elsize;
npy_int32 pos;
if (%(format)s == 0){
for (npy_int32 col = 0; col < N; ++col){
for (npy_int32 ind = indptr[col]; ind < indptr[col+1]; ++ind){
npy_int32 row = indices[ind];
pos = row * Yi + col * Yj;
zdata[pos] = ydata[pos] + data[ind];
}
}
}else{
for (npy_int32 row = 0; row < N; ++row){
for (npy_int32 ind = indptr[row]; ind < indptr[row+1]; ++ind){
npy_int32 col = indices[ind];
pos = row * Yi + col * Yj;
zdata[pos] = ydata[pos] + data[ind];
}
}
}
""" % dict(locals(), **sub)
return code
def perform(self, node, (data, indices, indptr, y), (out, )):
assert _is_dense(y) assert _is_dense(y)
if self.format == 'csr':
x = scipy.sparse.csr_matrix((data, indices, indptr), shape=y.shape)
elif self.format == 'csc':
x = scipy.sparse.csc_matrix((data, indices, indptr), shape=y.shape)
# The asarray is needed as in some case, this return a # The asarray is needed as in some case, this return a
# numpy.matrixlib.defmatrix.matrix object and not an ndarray. # numpy.matrixlib.defmatrix.matrix object and not an ndarray.
out[0] = theano._asarray(x + y, dtype=node.outputs[0].type.dtype) out[0] = theano._asarray(x + y, dtype=node.outputs[0].type.dtype)
...@@ -1843,10 +1779,8 @@ class AddSD(gof.op.Op): ...@@ -1843,10 +1779,8 @@ class AddSD(gof.op.Op):
return sp_ones_like(x) * gz, gz return sp_ones_like(x) * gz, gz
def infer_shape(self, node, shapes): def infer_shape(self, node, shapes):
return [shapes[3]] return [shapes[1]]
def c_code_cache_version(self):
return (1,)
add_s_d = AddSD() add_s_d = AddSD()
...@@ -1983,11 +1917,16 @@ class MulSS(gof.op.Op): ...@@ -1983,11 +1917,16 @@ class MulSS(gof.op.Op):
def make_node(self, x, y): def make_node(self, x, y):
x, y = as_sparse_variable(x), as_sparse_variable(y) x, y = as_sparse_variable(x), as_sparse_variable(y)
if x.type != y.type: out_dtype = scalar.upcast(x.type.dtype, y.type.dtype)
if x.type.format != y.type.format:
raise NotImplementedError( raise NotImplementedError(
"MulSS not supported for differing types. " "MulSS not supported for differing types. "
"Got %s and %s." % (str(x.type), str(y.type))) "Got %s and %s." % (str(x.type), str(y.type)))
return gof.Apply(self, [x, y], [x.type()]) return gof.Apply(self, [x, y],
[SparseType(dtype=out_dtype,
format=x.type.format
)()])
def perform(self, node, (x, y), (out, )): def perform(self, node, (x, y), (out, )):
assert _is_sparse(x) and _is_sparse(y) assert _is_sparse(x) and _is_sparse(y)
...@@ -2031,23 +1970,25 @@ class MulSD(gof.op.Op): ...@@ -2031,23 +1970,25 @@ class MulSD(gof.op.Op):
# upcast the tensor. Is the cast of sparse done implemented? # upcast the tensor. Is the cast of sparse done implemented?
dtype = scalar.upcast(x.type.dtype, y.type.dtype) dtype = scalar.upcast(x.type.dtype, y.type.dtype)
if y.type.dtype != dtype:
y = tensor.cast(y, dtype)
if x.type.dtype != y.type.dtype:
raise NotImplementedError(
"MulSD not implemented for different input dtypes. "
"Got %s and %s." % (x.type.dtype, y.type.dtype))
# The magic number two here arises because L{scipy.sparse} # The magic number two here arises because L{scipy.sparse}
# objects must be matrices (have dimension 2) # objects must be matrices (have dimension 2)
# Broadcasting of the sparse matrix is not supported. # Broadcasting of the sparse matrix is not supported.
assert y.type.ndim <= 2 # We support nd == 0 used by grad of SpSum()
return gof.Apply(self, [x, y], [x.type()]) assert y.type.ndim in [0, 2]
out = SparseType(dtype=dtype,
format=x.type.format)()
return gof.Apply(self, [x, y], [out])
def perform(self, node, (x, y), (out, )): def perform(self, node, (x, y), (out, )):
assert _is_sparse(x) and _is_dense(y) assert _is_sparse(x) and _is_dense(y)
if len(y.shape) == 0: if len(y.shape) == 0:
out[0] = x.copy() out_dtype = node.outputs[0].dtype
if x.dtype == out_dtype:
z = x.copy()
else:
z = x.astype(out_dtype)
out[0] = z
out[0].data *= y out[0].data *= y
elif len(y.shape) == 1: elif len(y.shape) == 1:
raise NotImplementedError() # RowScale / ColScale raise NotImplementedError() # RowScale / ColScale
...@@ -2057,12 +1998,16 @@ class MulSD(gof.op.Op): ...@@ -2057,12 +1998,16 @@ class MulSD(gof.op.Op):
# TODO: change runtime from O(M*N) to O(nonzeros) # TODO: change runtime from O(M*N) to O(nonzeros)
M, N = x.shape M, N = x.shape
assert x.shape == y.shape assert x.shape == y.shape
out_dtype = node.outputs[0].dtype
if x.format == 'csc': if x.format == 'csc':
x_data = x.data x_data = x.data
indices = x.indices indices = x.indices
indptr = x.indptr indptr = x.indptr
if x.dtype == out_dtype:
z = x.copy() z = x.copy()
else:
z = x.astype(out_dtype)
z_data = z.data z_data = z.data
for j in xrange(0, N): for j in xrange(0, N):
...@@ -2074,7 +2019,10 @@ class MulSD(gof.op.Op): ...@@ -2074,7 +2019,10 @@ class MulSD(gof.op.Op):
x_data = x.data x_data = x.data
indices = x.indices indices = x.indices
indptr = x.indptr indptr = x.indptr
if x.dtype == out_dtype:
z = x.copy() z = x.copy()
else:
z = x.astype(out_dtype)
z_data = z.data z_data = z.data
for i in xrange(0, M): for i in xrange(0, M):
......
...@@ -8,7 +8,8 @@ from theano import gof, scalar, tensor ...@@ -8,7 +8,8 @@ from theano import gof, scalar, tensor
from theano.tensor import blas from theano.tensor import blas
from theano.sparse import (CSC, CSR, csm_properties, from theano.sparse import (CSC, CSR, csm_properties,
register_specialize, register_specialize,
csm_grad, usmm) csm_grad, usmm, csm_indices, csm_indptr,
csm_data)
from theano.sparse import basic as sparse from theano.sparse import basic as sparse
_is_sparse_variable = sparse._is_sparse_variable _is_sparse_variable = sparse._is_sparse_variable
...@@ -49,30 +50,148 @@ theano.compile.optdb.register('local_inplace_remove0', ...@@ -49,30 +50,148 @@ theano.compile.optdb.register('local_inplace_remove0',
gof.TopoOptimizer(local_inplace_remove0, gof.TopoOptimizer(local_inplace_remove0,
failure_callback=gof.TopoOptimizer.warn_inplace), failure_callback=gof.TopoOptimizer.warn_inplace),
60, 'fast_run', 'inplace') 60, 'fast_run', 'inplace')
class AddSD_ccode(gof.op.Op):
"""Add a sparse and a dense matrix.
:param x: A sparse matrix.
:param y: A dense matrix
:return: `x`+`y`
:note: The grad implemented is structured on `x`.
"""
def __init__(self, format, inplace=False, *args, **kwargs):
gof.Op.__init__(self, *args, **kwargs)
#Should we do inplace addition or not ?
self.inplace = inplace
self.format = format
if self.inplace:
self.destroy_map = {0: [3]}
def __eq__(self, other):
return (type(self) == type(other) and
self.inplace == other.inplace and
self.format == other.format)
def __hash__(self):
return hash(type(self)) ^ hash(self.inplace) ^ hash(self.format)
def __str__(self):
inp = ''
if self.inplace:
inp = ',inplace'
return "%s{%s%s}" % (self.__class__.__name__,
self.format, inp)
def make_node(self, x, y):
x, y = sparse.as_sparse_variable(x), tensor.as_tensor_variable(y)
out_dtype = scalar.upcast(x.type.dtype, y.type.dtype)
if self.inplace:
assert out_dtype == y.dtype
indices, indptr, data = csm_indices(x), csm_indptr(x), csm_data(x)
# We either use CSC or CSR depending on the format of input
assert self.format == x.type.format
# The magic number two here arises because L{scipy.sparse}
# objects must be matrices (have dimension 2)
assert y.type.ndim == 2
out = tensor.TensorType(dtype=out_dtype,
broadcastable=y.type.broadcastable)()
return gof.Apply(self,
[data, indices, indptr, y],
[out])
def c_code(self, node, name, (_data, _indices, _indptr, y), (z, ), sub):
inplace = int(self.inplace)
format = {'csc': 0, 'csr': 1}[self.format]
out_typenum = node.outputs[0].type.dtype_specs()[2]
code = """
Py_XDECREF(%(z)s);
if (!%(inplace)s){
if(PyArray_TYPE(%(y)s) != %(out_typenum)s){
%(z)s = (PyArrayObject *) PyArray_FromArray(%(y)s, PyArray_DescrFromType(%(out_typenum)s), 0);
}else{
%(z)s = (PyArrayObject *) PyArray_NewCopy(%(y)s, NPY_CORDER);
}
}else{
%(z)s = %(y)s;
Py_XINCREF(%(z)s);
}
npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1;
const npy_int32 * __restrict__ indptr = (npy_int32 *)PyArray_DATA(%(_indptr)s);
const npy_int32 * __restrict__ indices = (npy_int32*)PyArray_DATA(%(_indices)s);
const dtype_%(_data)s* __restrict__ data = (dtype_%(_data)s*)PyArray_DATA(%(_data)s);
dtype_%(y)s* ydata = (dtype_%(y)s*)PyArray_DATA(%(y)s);
dtype_%(z)s* zdata = (dtype_%(z)s*)PyArray_DATA(%(z)s);
int Yi = PyArray_STRIDES(%(y)s)[0]/PyArray_DESCR(%(y)s)->elsize;
int Yj = PyArray_STRIDES(%(y)s)[1]/PyArray_DESCR(%(y)s)->elsize;
npy_int32 pos;
if (%(format)s == 0){
for (npy_int32 col = 0; col < N; ++col){
for (npy_int32 ind = indptr[col]; ind < indptr[col+1]; ++ind){
npy_int32 row = indices[ind];
pos = row * Yi + col * Yj;
zdata[pos] = ydata[pos] + data[ind];
}
}
}else{
for (npy_int32 row = 0; row < N; ++row){
for (npy_int32 ind = indptr[row]; ind < indptr[row+1]; ++ind){
npy_int32 col = indices[ind];
pos = row * Yi + col * Yj;
zdata[pos] = ydata[pos] + data[ind];
}
}
}
""" % dict(locals(), **sub)
return code
def infer_shape(self, node, shapes):
return [shapes[3]]
def c_code_cache_version(self):
return (1,)
@gof.local_optimizer([sparse.AddSD]) @gof.local_optimizer([sparse.AddSD])
def local_inplace_addsd(node): def local_inplace_addsd_ccode(node):
""" """
Optimization to insert inplace versions of AddSD. Optimization to insert inplace versions of AddSD.
""" """
if isinstance(node.op, sparse.AddSD) and not node.op.inplace: if isinstance(node.op, sparse.AddSD) and theano.config.cxx:
inputs = node.inputs[:3] + [node.inputs[3].shape] out_dtype = scalar.upcast(*node.inputs)
fmt = node.op.format if out_dtype != node.inputs[1].dtype:
if fmt == 'csc': return
x = sparse.CSC(*inputs) new_node = AddSD_ccode(format=node.inputs[0].type.format,
elif fmt == 'csr': inplace=True)(*node.inputs)
x = sparse.CSR(*inputs)
else:
raise NotImplementedError('Sparse format %s is not supported' % fmt)
new_op = node.op.__class__(inplace=True)
new_node = new_op(x, node.inputs[3])
return [new_node] return [new_node]
return False return False
theano.compile.optdb.register('local_inplace_addsd', theano.compile.optdb.register('local_inplace_addsd_ccode',
gof.TopoOptimizer(local_inplace_addsd, gof.TopoOptimizer(local_inplace_addsd_ccode,
failure_callback=gof.TopoOptimizer.warn_inplace), failure_callback=gof.TopoOptimizer.warn_inplace),
60, 'fast_run', 'inplace') 60, 'fast_run', 'inplace')
@gof.local_optimizer([sparse.AddSD])
def local_addsd_ccode(node):
"""
Convert AddSD to faster AddSD_ccode.
"""
if isinstance(node.op, sparse.AddSD) and theano.config.cxx:
new_node = AddSD_ccode(format=node.inputs[0].type.format)(*node.inputs)
return [new_node]
return False
theano.compile.optdb.register('local_addsd_ccode',
gof.TopoOptimizer(local_addsd_ccode),
#Must be after local_inplace_addsd_ccode at 60
61, 'fast_run')
class StructuredDotCSC(gof.Op): class StructuredDotCSC(gof.Op):
"""Structured Dot CSC is like dot, except that only the """Structured Dot CSC is like dot, except that only the
gradient wrt non-zero elements of the sparse matrix gradient wrt non-zero elements of the sparse matrix
...@@ -1139,6 +1258,9 @@ def local_mul_s_d(node): ...@@ -1139,6 +1258,9 @@ def local_mul_s_d(node):
mul_s_d_csx = mul_s_d_csr mul_s_d_csx = mul_s_d_csr
else: else:
raise NotImplemented() raise NotImplemented()
if x.dtype != y.dtype:
#mul_s_d_csx don't support that case
return
c_data = mul_s_d_csx(sparse.csm_data(svar), c_data = mul_s_d_csx(sparse.csm_data(svar),
sparse.csm_indices(svar), sparse.csm_indices(svar),
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论