提交 a961de79 authored 作者: Rami Al-Rfou's avatar Rami Al-Rfou

enable local inplace optimization and only works for the python version

上级 ea3d6101
...@@ -1715,16 +1715,19 @@ class AddSD(gof.op.Op): ...@@ -1715,16 +1715,19 @@ class AddSD(gof.op.Op):
gof.Op.__init__(self, *args, **kwargs) gof.Op.__init__(self, *args, **kwargs)
self.inplace = inplace self.inplace = inplace
if self.inplace: if self.inplace:
self.destroy_map = {0: [0]} self.destroy_map = {0: [3]}
def __eq__(self, other): def __eq__(self, other):
return (type(self) == type(other)) return (type(self) == type(other)) and self.inplace == other.inplace
def __hash__(self): def __hash__(self):
return hash(type(self)) return hash(type(self)) ^ hash(self.inplace)
def __str__(self): def __str__(self):
if self.inplace:
return self.__class__.__name__ + '{inplace}'
return self.__class__.__name__ return self.__class__.__name__
def make_node(self, x, y): def make_node(self, x, y):
x, y = as_sparse_variable(x), tensor.as_tensor_variable(y) x, y = as_sparse_variable(x), tensor.as_tensor_variable(y)
...@@ -1733,14 +1736,15 @@ class AddSD(gof.op.Op): ...@@ -1733,14 +1736,15 @@ class AddSD(gof.op.Op):
# The magic number two here arises because L{scipy.sparse} # The magic number two here arises because L{scipy.sparse}
# objects must be matrices (have dimension 2) # objects must be matrices (have dimension 2)
indices, indptr, data = csm_indices(x), csm_indptr(x), csm_data(x) indices, indptr, data = csm_indices(x), csm_indptr(x), csm_data(x)
self.format = x.format
assert y.type.ndim == 2 assert y.type.ndim == 2
return gof.Apply(self, return gof.Apply(self,
[indices, indptr, data, y], [data, indices, indptr, y],
[tensor.TensorType(dtype=y.type.dtype, [tensor.TensorType(dtype=y.type.dtype,
broadcastable=y.type.broadcastable broadcastable=y.type.broadcastable
).make_variable()]) ).make_variable()])
def c_code(self, node, name, (_indices, _indptr, _data, y), (z, ), sub): def cc_code(self, node, name, (_data, _indices, _indptr, y), (z, ), sub):
code = """ code = """
npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1; npy_intp N = PyArray_DIMS(%(_indptr)s)[0]-1;
...@@ -1766,35 +1770,27 @@ class AddSD(gof.op.Op): ...@@ -1766,35 +1770,27 @@ class AddSD(gof.op.Op):
""" % dict(locals(), **sub) """ % dict(locals(), **sub)
return code return code
def perform(self, node, (indices, indptr, data, y), (out, )): def perform(self, node, (data, indices, indptr, y), (out, )):
format = 'csc'
self.inplace = True
assert _is_dense(y) assert _is_dense(y)
if format == 'csc':
x = scipy.sparse.csc_matrix( (data,indices,indptr), shape=y.shape)
elif format == 'csr':
x = scipy.sparse.csr_matrix( (data,indices,indptr), shape=y.shape)
else:
x = scipy.sparse.coo_matrix( (data,indices,indptr), shape=y.shape)
if self.inplace: if self.inplace:
if x.format == 'csc': if self.format == 'csc':
for c in xrange(x.shape[1]): for c in xrange(y.shape[1]):
low = x.indptr[c] low = indptr[c]
high = x.indptr[c+1] high = indptr[c+1]
for ind in xrange(low, high): for ind in xrange(low, high):
y[(x.indices[ind], c)] += x.data[ind] y[(indices[ind], c)] += data[ind]
elif x.format == 'csr': elif self.format == 'csr':
for r in xrange(x.shape[0]): for r in xrange(y.shape[0]):
low = x.indptr[r] low = indptr[r]
high = x.indptr[r+1] high = indptr[r+1]
for ind in xrange(low, high): for ind in xrange(low, high):
y[(r, x.indices[ind])] += x.data[ind] y[(r, indices[ind])] += data[ind]
else:
coo_x = x.tocoo(copy=False)
for row, col, data in izip(coo_x.row, coo_x.col, coo_x.data):
y[(row,col)] += data
out[0] = y out[0] = y
else: else:
if self.format == 'csr':
x = scipy.sparse.csr_matrix( (data,indices,indptr), shape=y.shape)
elif self.format == 'csc':
x = scipy.sparse.csc_matrix( (data,indices,indptr), shape=y.shape)
# The asarray is needed as in some case, this return a # The asarray is needed as in some case, this return a
# numpy.matrixlib.defmatrix.matrix object and not an ndarray. # numpy.matrixlib.defmatrix.matrix object and not an ndarray.
out[0] = theano._asarray(x + y, dtype=node.outputs[0].type.dtype) out[0] = theano._asarray(x + y, dtype=node.outputs[0].type.dtype)
......
...@@ -45,6 +45,28 @@ theano.compile.optdb.register('local_inplace_remove0', ...@@ -45,6 +45,28 @@ theano.compile.optdb.register('local_inplace_remove0',
gof.TopoOptimizer(local_inplace_remove0, gof.TopoOptimizer(local_inplace_remove0,
failure_callback=gof.TopoOptimizer.warn_inplace), failure_callback=gof.TopoOptimizer.warn_inplace),
60, 'fast_run', 'inplace') 60, 'fast_run', 'inplace')
@gof.local_optimizer([None])
def local_inplace_addsd(node):
"""
Optimization to insert inplace versions of Remove0.
"""
if isinstance(node.op, sparse.AddSD) and not node.op.inplace:
inputs = node.inputs[:3] + [node.inputs[3].shape]
fmt = node.op.format
if fmt == 'csc':
x = sparse.CSC(*inputs)
elif fmt == 'csr':
x = sparse.CSR(*inputs)
else:
raise NotImplementedError('Sparse format %s is not supported' % fmt)
new_op = node.op.__class__(inplace=True)
new_node = new_op(x, node.inputs[3])
return [new_node]
return False
theano.compile.optdb.register('local_inplace_addsd',
gof.TopoOptimizer(local_inplace_addsd,
failure_callback=gof.TopoOptimizer.warn_inplace),
60, 'fast_run', 'inplace')
class StructuredDotCSC(gof.Op): class StructuredDotCSC(gof.Op):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论