提交 0421c6b0 authored 作者: nouiz's avatar nouiz

Merge pull request #1091 from aboSamoor/grad_advinc_subtensor

Speed up the gradient of AdvancedSubtensor1 WIP
...@@ -20,6 +20,7 @@ from theano.sparse.utils import hash_from_sparse ...@@ -20,6 +20,7 @@ from theano.sparse.utils import hash_from_sparse
import theano.tests.unittest_tools as utt import theano.tests.unittest_tools as utt
from theano.gradient import grad_not_implemented from theano.gradient import grad_not_implemented
from theano.sparse.type import SparseType, _is_sparse from theano.sparse.type import SparseType, _is_sparse
from numpy.lib.stride_tricks import as_strided
sparse_formats = ['csc', 'csr'] sparse_formats = ['csc', 'csr']
...@@ -1710,31 +1711,94 @@ class AddSD(gof.op.Op): ...@@ -1710,31 +1711,94 @@ class AddSD(gof.op.Op):
:note: The grad implemented is structured on `x`. :note: The grad implemented is structured on `x`.
""" """
def __init__(self, inplace=False, *args, **kwargs):
gof.Op.__init__(self, *args, **kwargs)
#Should we do inplace addition or not ?
self.inplace = inplace
if self.inplace:
self.destroy_map = {0: [3]}
def __eq__(self, other): def __eq__(self, other):
return (type(self) == type(other)) return (type(self) == type(other)) and self.inplace == other.inplace
def __hash__(self): def __hash__(self):
return hash(type(self)) return hash(type(self)) ^ hash(self.inplace)
def __str__(self): def __str__(self):
if self.inplace:
return self.__class__.__name__ + '{inplace}'
return self.__class__.__name__ return self.__class__.__name__
def make_node(self, x, y): def make_node(self, x, y):
x, y = as_sparse_variable(x), tensor.as_tensor_variable(y) x, y = as_sparse_variable(x), tensor.as_tensor_variable(y)
if x.type.dtype != y.type.dtype: if x.type.dtype != y.type.dtype:
raise NotImplementedError() raise NotImplementedError()
indices, indptr, data = csm_indices(x), csm_indptr(x), csm_data(x)
# We either use CSC or CSR depending on the format of input
self.format = x.format
# The magic number two here arises because L{scipy.sparse} # The magic number two here arises because L{scipy.sparse}
# objects must be matrices (have dimension 2) # objects must be matrices (have dimension 2)
assert y.type.ndim == 2 assert y.type.ndim == 2
return gof.Apply(self, return gof.Apply(self,
[x, y], [data, indices, indptr, y],
[tensor.TensorType(dtype=y.type.dtype, [tensor.TensorType(dtype=y.type.dtype,
broadcastable=y.type.broadcastable broadcastable=y.type.broadcastable
).make_variable()]) ).make_variable()])
def perform(self, node, (x, y), (out, )): def c_code(self, node, name, (_data, _indices, _indptr, y), (z, ), sub):
assert _is_sparse(x) and _is_dense(y) inplace = int(self.inplace)
format = {'csc': 0, 'csr':1}[self.format]
code = """
Py_XDECREF(%(z)s);
if (!%(inplace)s){
%(z)s = (PyArrayObject *) PyArray_NewCopy(%(y)s, NPY_CORDER);
}else{
%(z)s = %(y)s;
Py_XINCREF(%(z)s);
}
npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1;
const npy_int32 * __restrict__ indptr = (npy_int32 *)%(_indptr)s->data;
const npy_int32 * __restrict__ indices = (npy_int32*)%(_indices)s->data;
const dtype_%(_data)s* __restrict__ data = (dtype_%(_data)s*)%(_data)s->data;
dtype_%(y)s* ydata = (dtype_%(y)s*)PyArray_DATA(%(y)s);
dtype_%(z)s* zdata = (dtype_%(z)s*)PyArray_DATA(%(z)s);
int Yi = PyArray_STRIDES(%(y)s)[0]/PyArray_DESCR(%(y)s)->elsize;
int Yj = PyArray_STRIDES(%(y)s)[1]/PyArray_DESCR(%(y)s)->elsize;
npy_int32 pos;
if (%(format)s == 0){
for (npy_int32 col = 0; col < N; ++col){
for (npy_int32 ind = indptr[col]; ind < indptr[col+1]; ++ind){
npy_int32 row = indices[ind];
pos = row * Yi + col * Yj;
zdata[pos] = ydata[pos] + data[ind];
}
}
}else{
for (npy_int32 row = 0; row < N; ++row){
for (npy_int32 ind = indptr[row]; ind < indptr[row+1]; ++ind){
npy_int32 col = indices[ind];
pos = row * Yi + col * Yj;
zdata[pos] = ydata[pos] + data[ind];
}
}
}
""" % dict(locals(), **sub)
return code
def perform(self, node, (data, indices, indptr, y), (out, )):
assert _is_dense(y)
if self.format == 'csr':
x = scipy.sparse.csr_matrix((data, indices, indptr), shape = y.shape)
elif self.format == 'csc':
x = scipy.sparse.csc_matrix((data, indices, indptr), shape = y.shape)
# The asarray is needed as in some case, this return a # The asarray is needed as in some case, this return a
# numpy.matrixlib.defmatrix.matrix object and not an ndarray. # numpy.matrixlib.defmatrix.matrix object and not an ndarray.
out[0] = theano._asarray(x + y, dtype=node.outputs[0].type.dtype) out[0] = theano._asarray(x + y, dtype=node.outputs[0].type.dtype)
...@@ -1745,7 +1809,7 @@ class AddSD(gof.op.Op): ...@@ -1745,7 +1809,7 @@ class AddSD(gof.op.Op):
return sp_ones_like(x) * gz, gz return sp_ones_like(x) * gz, gz
def infer_shape(self, node, shapes): def infer_shape(self, node, shapes):
return [shapes[0]] return [shapes[3]]
add_s_d = AddSD() add_s_d = AddSD()
...@@ -3227,3 +3291,68 @@ class Usmm(gof.op.Op): ...@@ -3227,3 +3291,68 @@ class Usmm(gof.op.Op):
out[0] = rval out[0] = rval
usmm = Usmm() usmm = Usmm()
class ConstructSparseFromList(gof.Op):
"""Constructs a sparse matrix out of a list of 2-D matrix rows"""
def __hash__(self):
return hash((type(self)))
def __eq__(self, other):
return (type(self) == type(other))
def __str__(self):
return self.__class__.__name__
def make_node(self, x, y, ilist):
x_ = theano.tensor.as_tensor_variable(x)
y_ = theano.tensor.as_tensor_variable(y)
ilist_ = theano.tensor.as_tensor_variable(ilist)
if ilist_.type.dtype[:3] not in ('int', 'uin'):
raise TypeError('index must be integers')
if ilist_.type.ndim != 1:
raise TypeError('index must be vector')
if x_.type.ndim == 0:
raise TypeError('cannot index into a scalar')
if y_.type.ndim > x_.type.ndim:
raise TypeError('cannot construct sparse matrix as dimensions differ')
return gof.Apply(self, [x_, y_, ilist_], [theano.sparse.csc_matrix(dtype=x.dtype)])
def perform(self, node, inp, out_):
x, values, idx = inp
out, = out_
rows, cols = values.shape
assert rows == len(idx)
indptr = numpy.arange(cols + 1) * rows
indices = as_strided(idx,
strides=(0, idx.strides[0]),
shape = (cols, idx.shape[0])).flatten()
data = values.T.flatten()
out[0] = scipy.sparse.csc_matrix((data, indices, indptr), shape=x.shape,
dtype=x.dtype)
def infer_shape(self, node, ishapes):
x, y, ilist = ishapes
return [x]
def R_op(self, inputs, eval_points):
if None in eval_points[:2]:
return [None]
return self.make_node(eval_points[0], eval_points[1],
*inputs[2:]).outputs
def connection_pattern(self, node):
rval = [[True], [True], [False]]
return rval
def grad(self, inputs, grads):
g_output, = grads
x, y = inputs[:2]
idx_list = inputs[2:]
gx = g_output
gy = theano.tensor.advanced_subtensor1(g_output, *idx_list)
return [gx, gy] + [DisconnectedType()()] * len(idx_list)
...@@ -36,15 +36,40 @@ def local_inplace_remove0(node): ...@@ -36,15 +36,40 @@ def local_inplace_remove0(node):
""" """
Optimization to insert inplace versions of Remove0. Optimization to insert inplace versions of Remove0.
""" """
# If inplace is not enabled, enable it and replace that op with a
# new op which has inplace enabled
if isinstance(node.op, sparse.Remove0) and not node.op.inplace: if isinstance(node.op, sparse.Remove0) and not node.op.inplace:
new_op = node.op.__class__(inplace=True) new_op = node.op.__class__(inplace=True)
new_node = new_op(*node.inputs) new_node = new_op(*node.inputs)
return [new_node] return [new_node]
return False return False
theano.compile.optdb.register('local_inplace_remove0', theano.compile.optdb.register('local_inplace_remove0',
gof.TopoOptimizer(local_inplace_remove0, gof.TopoOptimizer(local_inplace_remove0,
failure_callback=gof.TopoOptimizer.warn_inplace), failure_callback=gof.TopoOptimizer.warn_inplace),
60, 'fast_run', 'inplace') 60, 'fast_run', 'inplace')
@gof.local_optimizer([None])
def local_inplace_addsd(node):
"""
Optimization to insert inplace versions of AddSD.
"""
if isinstance(node.op, sparse.AddSD) and not node.op.inplace:
inputs = node.inputs[:3] + [node.inputs[3].shape]
fmt = node.op.format
if fmt == 'csc':
x = sparse.CSC(*inputs)
elif fmt == 'csr':
x = sparse.CSR(*inputs)
else:
raise NotImplementedError('Sparse format %s is not supported' % fmt)
new_op = node.op.__class__(inplace=True)
new_node = new_op(x, node.inputs[3])
return [new_node]
return False
theano.compile.optdb.register('local_inplace_addsd',
gof.TopoOptimizer(local_inplace_addsd,
failure_callback=gof.TopoOptimizer.warn_inplace),
60, 'fast_run', 'inplace')
class StructuredDotCSC(gof.Op): class StructuredDotCSC(gof.Op):
......
...@@ -49,6 +49,9 @@ continuous_dtypes = map(str, scal.continuous_types) ...@@ -49,6 +49,9 @@ continuous_dtypes = map(str, scal.continuous_types)
discrete_dtypes = map(str, scal.discrete_types) discrete_dtypes = map(str, scal.discrete_types)
all_dtypes = map(str, scal.all_types) all_dtypes = map(str, scal.all_types)
# Do a lazy import of the sparse module
sparse_module_ref = None
class ShapeError(Exception): class ShapeError(Exception):
"""Raised when the shape cannot be computed.""" """Raised when the shape cannot be computed."""
...@@ -619,7 +622,7 @@ class TensorType(Type): ...@@ -619,7 +622,7 @@ class TensorType(Type):
Inf entries. (Used in `DebugMode`) Inf entries. (Used in `DebugMode`)
""" """
def __init__(self, dtype, broadcastable, name=None): def __init__(self, dtype, broadcastable, name=None, sparse_grad=False):
"""Initialize self.dtype and self.broadcastable. """Initialize self.dtype and self.broadcastable.
:Parameters: :Parameters:
...@@ -644,6 +647,7 @@ class TensorType(Type): ...@@ -644,6 +647,7 @@ class TensorType(Type):
self.dtype_specs() # error checking is done there self.dtype_specs() # error checking is done there
self.name = name self.name = name
self.numpy_dtype = numpy.dtype(self.dtype) self.numpy_dtype = numpy.dtype(self.dtype)
self.sparse_grad = sparse_grad
def filter(self, data, strict=False, allow_downcast=None): def filter(self, data, strict=False, allow_downcast=None):
"""Convert `data` to something which can be associated to a """Convert `data` to something which can be associated to a
...@@ -6524,10 +6528,16 @@ class AdvancedSubtensor1(Op): ...@@ -6524,10 +6528,16 @@ class AdvancedSubtensor1(Op):
return rval return rval
def grad(self, inputs, grads): def grad(self, inputs, grads):
global sparse_module_ref
gz, = grads gz, = grads
assert len(inputs) == 2 assert len(inputs) == 2
rval1 = [advanced_inc_subtensor1(zeros_like(inputs[0]), gz, inputs[1])] if inputs[0].type.sparse_grad:
if sparse_module_ref is None:
import theano.sparse as sparse_module_ref
rval1 = [sparse_module_ref.ConstructSparseFromList()((inputs[0]), gz, inputs[1])]
else:
rval1 = [advanced_inc_subtensor1(zeros_like(inputs[0]), gz, inputs[1])]
return rval1 + [DisconnectedType()()] * (len(inputs) - 1) return rval1 + [DisconnectedType()()] * (len(inputs) - 1)
def R_op(self, inputs, eval_points): def R_op(self, inputs, eval_points):
...@@ -6629,11 +6639,7 @@ class AdvancedIncSubtensor1(Op): ...@@ -6629,11 +6639,7 @@ class AdvancedIncSubtensor1(Op):
def connection_pattern(self, node): def connection_pattern(self, node):
rval = [[True], [True]] rval = [[True], [True], [False]]
for ipt in node.inputs[2:]:
rval.append([False])
return rval return rval
def grad(self, inputs, grads): def grad(self, inputs, grads):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论