提交 58f2b986 authored 作者: Frederic Bastien's avatar Frederic Bastien
...@@ -9,6 +9,7 @@ DIRECTORY LAYOUT ...@@ -9,6 +9,7 @@ DIRECTORY LAYOUT
* tensor depends upon scalar * tensor depends upon scalar
* sparse depends upon tensor * sparse depends upon tensor
* sandbox can depends on everything else * sandbox can depends on everything else
* Theano/examples are copy of the example on the wiki.
* benchmark and examples are in the distribution, not the package * benchmark and examples are in the distribution, not the package
* Tests are distributed and are part of the package, i.e. fall in the appropriate submodules * Tests are distributed and are part of the package, i.e. fall in the appropriate submodules
* Documentation scripts go into doc/scripts/ * Documentation scripts go into doc/scripts/
...@@ -16,7 +17,6 @@ DIRECTORY LAYOUT ...@@ -16,7 +17,6 @@ DIRECTORY LAYOUT
to the directory) to the directory)
* Open questions for packaging: * Open questions for packaging:
* Do we want all scripts in bin/, or can they go in doc/scripts ? * Do we want all scripts in bin/, or can they go in doc/scripts ?
BUGS BUGS
* 0.1 bugs in trac * 0.1 bugs in trac
......
...@@ -123,3 +123,23 @@ def __src_version__(): ...@@ -123,3 +123,23 @@ def __src_version__():
return __src_version__.rval return __src_version__.rval
### This is defined here because it is designed to work across symbolic datatypes
# (Sparse and Tensor)
def dot(l, r):
"""Return a symbolic matrix/dot product between l and r """
rval = NotImplemented
if rval == NotImplemented and hasattr(l, '__dot__'):
try:
rval = l.__dot__(r)
except Exception, e0:
rval = NotImplemented
if rval == NotImplemented and hasattr(r, '__rdot__'):
try:
rval = r.__rdot__(l)
except Exception, e1:
rval = NotImplemented
if rval == NotImplemented:
raise NotImplementedError("Dot failed for the following reaons:", (e0, e1))
return rval
import numpy import numpy
import scipy.sparse as sp
from .. import gof from .. import gof
def check_equal(x, y): def check_equal(x, y):
...@@ -8,6 +9,13 @@ def check_equal(x, y): ...@@ -8,6 +9,13 @@ def check_equal(x, y):
shape if x and y are numpy.ndarray instances). Used internally. shape if x and y are numpy.ndarray instances). Used internally.
""" """
x, y = x[0], y[0] x, y = x[0], y[0]
# TODO: bug in current scipy, two sparse matrices are never equal, remove when moving to 0.7
if sp.issparse(x):
x = x.todense()
if sp.issparse(y):
y = y.todense()
if isinstance(x, numpy.ndarray) and isinstance(y, numpy.ndarray): if isinstance(x, numpy.ndarray) and isinstance(y, numpy.ndarray):
if x.dtype != y.dtype or x.shape != y.shape or numpy.any(abs(x - y) > 1e-10): if x.dtype != y.dtype or x.shape != y.shape or numpy.any(abs(x - y) > 1e-10):
raise Exception("Output mismatch.", {'performlinker': x, 'clinker': y}) raise Exception("Output mismatch.", {'performlinker': x, 'clinker': y})
......
...@@ -6,11 +6,17 @@ To read about different sparse formats, see U{http://www-users.cs.umn.edu/~saad/ ...@@ -6,11 +6,17 @@ To read about different sparse formats, see U{http://www-users.cs.umn.edu/~saad/
@todo: Automatic methods for determining best sparse format? @todo: Automatic methods for determining best sparse format?
""" """
import sys
import numpy import numpy
from scipy import sparse from scipy import sparse
from .. import gof from .. import gof
from .. import tensor from .. import tensor
from .. import compile
#TODO: move this decorator to the compile submodule
def register_specialize(lopt, *tags, **kwargs):
compile.optdb['specialize'].register((kwargs and kwargs.pop('name')) or lopt.__name__, lopt, 'fast_run', *tags)
""" Types of sparse matrices to use for testing """ """ Types of sparse matrices to use for testing """
...@@ -98,6 +104,9 @@ def value(x): ...@@ -98,6 +104,9 @@ def value(x):
except TypeError: except TypeError:
raise TypeError("Could not convert %s to Sparse" % x, type(x)) raise TypeError("Could not convert %s to Sparse" % x, type(x))
def sp_ones_like(x):
data, indices, indptr, shape = csm_properties(x) #TODO: don't restrict to CSM formats
return CSM(format=x.format)(tensor.ones_like(data), indices, indptr, shape)
class Sparse(gof.Type): class Sparse(gof.Type):
...@@ -163,21 +172,179 @@ class Sparse(gof.Type): ...@@ -163,21 +172,179 @@ class Sparse(gof.Type):
def __repr__(self): def __repr__(self):
return "Sparse[%s, %s]" % (str(self.dtype), str(self.format)) return "Sparse[%s, %s]" % (str(self.dtype), str(self.format))
csc_matrix = Sparse(format='csc')
csr_matrix = Sparse(format='csr')
class _sparse_py_operators: class _sparse_py_operators:
T = property(lambda self: transpose(self), doc = "Return aliased transpose of self (read-only)") T = property(lambda self: transpose(self), doc = "Return aliased transpose of self (read-only)")
def __add__(left, right): return add(left, right) def __add__(left, right): return add(left, right)
def __radd__(right, left): return add(left, right) def __radd__(right, left): return add(left, right)
def __mul__(left, right): return mul(left, right)
def __rmul__(left, right): return mul(left, right)
#extra pseudo-operator symbols
def __dot__(left, right): return structured_dot(left, right)
def __rdot__(right, left): return structured_dot(left, right)
class SparseResult(gof.Result, _sparse_py_operators): class SparseResult(gof.Result, _sparse_py_operators):
pass dtype = property(lambda self: self.type.dtype)
format = property(lambda self: self.type.format)
class SparseConstant(gof.Constant, _sparse_py_operators): class SparseConstant(gof.Constant, _sparse_py_operators):
pass dtype = property(lambda self: self.type.dtype)
format = property(lambda self: self.type.format)
class SparseValue(gof.Value, _sparse_py_operators): class SparseValue(gof.Value, _sparse_py_operators):
pass dtype = property(lambda self: self.type.dtype)
format = property(lambda self: self.type.format)
# CONSTRUCTION
class CSMProperties(gof.Op):
"""Extract all of .data .indices and .indptr"""
view_map = {0:[0],1:[0],2:[0],3:[0]}
def __init__(self, map=None):
self.map = map
def make_node(self, csm):
csm = as_sparse(csm)
data = tensor.Tensor(dtype=csm.type.dtype, broadcastable = (False,)).make_result()
return gof.Apply(self, [csm],
[data, tensor.ivector(), tensor.ivector(), tensor.ivector()])
def perform(self, node, (csm,), out):
out[0][0] = csm.data if self.map is None else csm.data[self.map]
out[1][0] = numpy.asarray(csm.indices, dtype='int32')
out[2][0] = numpy.asarray(csm.indptr, dtype='int32')
out[3][0] = numpy.asarray(csm.shape, dtype='int32')
# TODO FIX THIS
def grad(self, (csm,), g):
assert [gg is None for gg in g[1:]]
data, indices, indptr, shape = csm_properties(csm)
if csm.format == 'csc':
return [CSM('csc')(g_data, indices, indptr, shape)]
else:
return [CSR('csm')(g_data, indices, indptr, shape)]
def csm_properties(csm): return CSMProperties()(csm)
def csm_data(csm): return csm_properties(csm)[0]
def csm_indices(csm): return csm_properties(csm)[1]
def csm_indptr(csm): return csm_properties(csm)[2]
def csm_shape(csm): return csm_properties(csm)[3]
class CSM(gof.Op):
"""Construct a CSC or CSR matrix from the internal representation """
view_map = {0:[0]} #should view the other inputs too, but viewing multiple inputs is not
#currently supported by the destroyhandler
def __init__(self, format, map=None):
if format not in ('csr', 'csc'):
raise ValueError("format must be one of: 'csr', 'csc'", format)
self.format = format
# for efficiency, if remap does nothing, then do not apply it
if map is not None and all(map==numpy.arange(numpy.size(map))):
map = None
self.map = map
def __eq__(self, other):
return type(other) is CSM \
and other.format == self.format and other.map==self.map
def __hash__(self):
return hash(CSM) ^ hash(self.format) ^ hash(numpy.str(self.map))
def make_node(self, data, indices, indptr, shape):
"""Build a SparseResult from the internal parametrization
:param data:
:param indices:
:param indptr:
:type data: 1-d tensor
:type indices: 1-d tensor of ints
:type indptr: 1-d tensor of ints
"""
data = tensor.as_tensor(data)
indices = tensor.as_tensor(indices)
indptr = tensor.as_tensor(indptr)
shape = tensor.as_tensor(shape)
if data.type.ndim != 1:
raise TypeError('data argument must be a vector', data.type)
if indices.type not in tensor.int_vector_types:
raise TypeError('indices must be vector of integers')
if indptr.type not in tensor.int_vector_types:
raise TypeError('indices must be vector of integers')
if shape.type not in tensor.int_vector_types:
raise TypeError('n_rows must be integer type')
return gof.Apply(self,
[data, indices, indptr, shape],
[Sparse(dtype = data.type.dtype,
format = self.format).make_result()])
def perform(self, node, (data, indices, indptr, shape), (out,)):
"""Build a csc_matrix"""
#assert len(data.flatten()) == len(indices.flatten())
data = data[self.map] if self.map!=None else data
if len(shape) != 2:
raise ValueError('Shape should be an array of length 2')
if data.shape != indices.shape:
raise ValueError('data indices shape mismatch', (data.shape, indices.shape))
if self.format == 'csc':
out[0] = sparse.csc_matrix((data, indices.copy(), indptr.copy()),
numpy.asarray(shape),
copy = False #1000*len(data.flatten())
)
else:
assert self.format == 'csr'
out[0] = sparse.csr_matrix((data, indices.copy(), indptr.copy()),
shape.copy(),
copy = False #1000*len(data.flatten())
)
def grad(self, (data, indices, indptr, shape), (g_out,)):
"""Return a gradient on the data vector"""
#unpack the data vector and wrap it as a 1d Tensor
g_data = csm_grad(self.map)(data, csm_data(g_out),csm_indices(g_out))
return [g_data, None, None, None]
CSC = CSM('csc')
CSR = CSM('csr')
class CSMGrad(gof.op.Op):
def __init__(self, map=None):
self.map = map
def make_node(self, data, gout_data, gout_indices):
g_data = data.type()
return gof.Apply(self, [data, gout_data, gout_indices], [g_data])
def perform(self, node, (data, gout_data, gout_indices), (g_data,)):
if self.map is None:
g_data[0] = gout_data
else:
grad = numpy.zeros_like(data)
grad[self.map] = gout_data
g_data[0] = grad
csm_grad = CSMGrad
@gof.local_optimizer([csm_properties])
def skip_pack_csc01(node):
"""if we find csm_properties(CSM(*args)), then we can replace that with the *args
directly"""
if node.op == csm_properties:
csm, = node.inputs
if csm.owner and (csm.owner.op == CSC or csm.owner.op == CSR):
return csm.owner.inputs
return False
register_specialize(skip_pack_csc01)
...@@ -186,6 +353,7 @@ class SparseValue(gof.Value, _sparse_py_operators): ...@@ -186,6 +353,7 @@ class SparseValue(gof.Value, _sparse_py_operators):
# #
# convert a sparse matrix to an ndarray # convert a sparse matrix to an ndarray
class DenseFromSparse(gof.op.Op): class DenseFromSparse(gof.op.Op):
sparse_grad = True
def make_node(self, x): def make_node(self, x):
x = as_sparse(x) x = as_sparse(x)
return gof.Apply(self, return gof.Apply(self,
...@@ -193,9 +361,12 @@ class DenseFromSparse(gof.op.Op): ...@@ -193,9 +361,12 @@ class DenseFromSparse(gof.op.Op):
[tensor.Tensor(dtype = x.type.dtype, [tensor.Tensor(dtype = x.type.dtype,
broadcastable = (False, False)).make_result()]) broadcastable = (False, False)).make_result()])
def perform(self, node, (x, ), (out, )): def perform(self, node, (x, ), (out, )):
out[0] = numpy.asarray(x.todense()) out[0] = x.toarray()
def grad(self, (x, ), (gz, )): def grad(self, (x, ), (gz, )):
return SparseFromDense(x.type.format)(gz), if self.sparse_grad:
return [sp_ones_like(x) * gz]
else:
return [SparseFromDense(x.type.format)(gz)]
dense_from_sparse = DenseFromSparse() dense_from_sparse = DenseFromSparse()
class SparseFromDense(gof.op.Op): class SparseFromDense(gof.op.Op):
...@@ -246,6 +417,7 @@ class AddSS(gof.op.Op): ...@@ -246,6 +417,7 @@ class AddSS(gof.op.Op):
if x.type.dtype != y.type.dtype: if x.type.dtype != y.type.dtype:
raise NotImplementedError() raise NotImplementedError()
if x.type.format != y.type.format: if x.type.format != y.type.format:
print x.type.format, y.type.format
raise NotImplementedError() raise NotImplementedError()
return gof.Apply(self, return gof.Apply(self,
[x, y], [x, y],
...@@ -296,8 +468,103 @@ def add(x,y): ...@@ -296,8 +468,103 @@ def add(x,y):
elif y_is_sparse_result and not x_is_sparse_result: return add_s_d(y,x) elif y_is_sparse_result and not x_is_sparse_result: return add_s_d(y,x)
else: raise NotImplementedError() else: raise NotImplementedError()
class MulSS(gof.op.Op):
''' Elementwise multiply a sparse and a ndarray '''
def make_node(self, x, y):
x, y = as_sparse(x), as_sparse(y)
if x.type != y.type:
raise NotImplementedError()
return gof.Apply(self, [x, y], [x.type()])
def perform(self, node, (x, y), (out, )):
assert _is_sparse(x) and _is_sparse(y)
assert len(x.shape) == 2
assert y.shape == x.shape
if (numpy.all(y.indptr == x.indptr) and numpy.all(y.indices == x.indices)):
out[0] = y.copy()
out[0].data *= x.data
else:
raise NotImplementedError() #RowScale / ColScale
def grad(self, (x, y), (gz,)):
return y * gz, x * gz
mul_s_s = MulSS()
class MulSD(gof.op.Op):
''' Elementwise multiply a sparse and a ndarray '''
def make_node(self, x, y):
x, y = as_sparse(x), tensor.as_tensor(y)
if x.type.dtype != y.type.dtype:
raise NotImplementedError()
# The magic number two here arises because L{scipy.sparse}
# objects must be matrices (have dimension 2)
# Broadcasting of the sparse matrix is not supported.
assert y.type.ndim <= 2
return gof.Apply(self, [x, y], [x.type()])
def perform(self, node, (x, y), (out, )):
assert _is_sparse(x) and _is_dense(y)
if len(y.shape) == 0:
out[0] = x.copy()
out[0].data *= y
elif len(y.shape) == 1:
raise NotImplementedError() #RowScale / ColScale
elif len(y.shape) == 2:
#if we have enough memory to fit y, maybe we can fit x.asarray() too?
#TODO: change runtime from O(M*N) to O(nonzeros)
M, N = x.shape
assert x.shape == y.shape
if x.format == 'csc':
x_data = x.data
indices = x.indices
indptr = x.indptr
z = x.copy()
z_data = z.data
for j in xrange(0, N):
for i_idx in xrange(indptr[j], indptr[j+1]):
i = indices[i_idx]
z_data[i_idx] *= y[i,j]
out[0] = z
elif x.format == 'csr':
x_data = x.data
indices = x.indices
indptr = x.indptr
z = x.copy()
z_data = z.data
for i in xrange(0, M):
for j_idx in xrange(indptr[i], indptr[i+1]):
j = indices[j_idx]
z_data[j_idx] *= y[i,j]
out[0] = z
else:
print >> sys.stderr, "WARNING: crappy implementation of MulSD", x.format
out[0] = type(x)(x.toarray() * y)
def grad(self, (x, y), (gz,)):
assert _is_sparse_result(x) and _is_dense_result(y)
assert _is_sparse_result(gz)
return y * gz, x * gz
mul_s_d = MulSD()
def mul(x,y):
"""
Multiply (elementwise) two matrices, at least one of which is sparse.
"""
if hasattr(x, 'getnnz'): x = as_sparse(x)
if hasattr(y, 'getnnz'): y = as_sparse(y)
class Dot(gof.op.Op): x_is_sparse_result = _is_sparse_result(x)
y_is_sparse_result = _is_sparse_result(y)
assert x_is_sparse_result or y_is_sparse_result
if x_is_sparse_result and y_is_sparse_result: return mul_s_s(x,y)
elif x_is_sparse_result and not y_is_sparse_result: return mul_s_d(x,y)
elif y_is_sparse_result and not x_is_sparse_result: return mul_s_d(y,x)
else: raise NotImplementedError()
###############
#
# TrueDot
#
class TrueDot(gof.op.Op):
""" """
Attributes: Attributes:
grad_preserves_dense - a boolean flags [default: True]. grad_preserves_dense - a boolean flags [default: True].
...@@ -350,7 +617,7 @@ class Dot(gof.op.Op): ...@@ -350,7 +617,7 @@ class Dot(gof.op.Op):
def __hash__(self): def __hash__(self):
return hash(self.grad_preserves_dense) return hash(self.grad_preserves_dense)
def dot(x, y, grad_preserves_dense=True): def true_dot(x, y, grad_preserves_dense=True):
""" """
@todo: Maybe the triple-transposition formulation (when x is dense) @todo: Maybe the triple-transposition formulation (when x is dense)
is slow. See if there is a direct way to do this. is slow. See if there is a direct way to do this.
...@@ -367,3 +634,319 @@ def dot(x, y, grad_preserves_dense=True): ...@@ -367,3 +634,319 @@ def dot(x, y, grad_preserves_dense=True):
else: else:
assert y_is_sparse_result assert y_is_sparse_result
return transpose(Dot(grad_preserves_dense)(y.T, x.T)) return transpose(Dot(grad_preserves_dense)(y.T, x.T))
###############
#
# StructuredDot
#
class StructuredDot(gof.Op):
"""Structured Dot is like dot, except that only the gradient wrt non-zero elements of the
sparse matrix A are calculated and propagated.
The output is presumed to be a dense matrix, and is represented by a Tensor instance.
"""
def make_node(self, a, b):
assert a.type.dtype == b.type.dtype
if type(a) is not SparseResult:
raise TypeError('First argument must be of type SparseResult');
return gof.Apply(self, [a,b], [tensor.tensor(a.type.dtype, (False, False))])
def perform(self, node, (a,b), (out,)):
if a.shape[1] != b.shape[0]:
raise ValueError('shape mismatch in StructuredDot.perform', (a.shape, b.shape))
if b.shape[0] == 1:
raise NotImplemented('ERROR: scipy.csc_matrix dot has bug with singleton dimensions')
result = a.dot(b)
# sparse dot generates sparse matrix, unless output has single dimension
if sparse.issparse(result):
result = result.toarray()
assert isinstance(result, numpy.ndarray)
# dot of an NxM sparse matrix, with a Mx1 dense matrix, returns vector not matrix
if result.ndim == 1:
result = numpy.expand_dims(result,1)
elif result.ndim != 2:
raise Exception('Output of structured dot should be a matrix (ndim=2)')
assert result.ndim == 2
## Commenting this out because result should be a numpy.ndarray since the assert above
## (JB 20090109)
#out[0] = numpy.asarray(result) #TODO: fix this really bad implementation
#
out[0] = result
def grad(self, (a,b), (g_out,)):
#a is sparse, b is dense, g_out is dense
#ga = g_out x b.T
#gb = a.T x g_out
return structured_dot_grad(a, b, g_out), structured_dot(a.T,g_out)
_structured_dot = StructuredDot()
def structured_dot(x, y):
"""
@todo: Maybe the triple-transposition formulation (when x is dense)
is slow. See if there is a direct way to do this.
"""
if hasattr(x, 'getnnz'): x = as_sparse(x)
if hasattr(y, 'getnnz'): y = as_sparse(y)
x_is_sparse_result = _is_sparse_result(x)
y_is_sparse_result = _is_sparse_result(y)
if not x_is_sparse_result and not y_is_sparse_result:
raise TypeError('structured_dot requires at least one sparse argument')
if x_is_sparse_result:
return _structured_dot(x, y)
else:
assert y_is_sparse_result
return _structured_dot(y.T, x.T).T
class StructuredDotCSC(gof.Op):
def make_node(self, a_val, a_ind, a_ptr, a_nrows, b):
assert a_val.type.dtype == b.type.dtype
r = gof.Apply(self, [a_val, a_ind, a_ptr, a_nrows, b],
[tensor.tensor(a_val.type.dtype, (False, False))])
return r
def perform(self, node, (a_val, a_ind, a_ptr, a_nrows, b), (out,)):
a = sparse.csc_matrix((a_val, a_ind, a_ptr),
(a_nrows, b.shape[0]),
copy = False)
out[0] = numpy.asarray(a.dot(b).todense())
def c_code(self, node, name, (a_val, a_ind, a_ptr, a_nrows, b), (z,), sub):
return """
if (%(a_val)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_val) != 1"); %(fail)s;}
if (%(a_ind)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ind) != 1"); %(fail)s;}
if (%(a_ptr)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(a_ptr) != 1"); %(fail)s;}
if (%(a_nrows)s->nd != 0) {PyErr_SetString(PyExc_NotImplementedError, "rank(nrows) != 0"); %(fail)s;}
if (%(b)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(b) != 2"); %(fail)s;}
if (%(a_val)s->descr->type_num != PyArray_DOUBLE)
{PyErr_SetString(PyExc_NotImplementedError, "a_val dtype not NPY_DOUBLE"); %(fail)s;}
if (%(a_ind)s->descr->type_num != PyArray_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "a_ind dtype not INT32"); %(fail)s;}
if (%(a_ptr)s->descr->type_num != PyArray_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "a_ptr dtype not INT32"); %(fail)s;}
if (%(a_nrows)s->descr->type_num != PyArray_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "a_nrows dtype not INT32"); %(fail)s;}
if (%(b)s->descr->type_num != PyArray_DOUBLE)
{PyErr_SetString(PyExc_NotImplementedError, "b's dtype not NPY_DOUBLE"); %(fail)s;}
if (%(a_val)s->dimensions[0] != %(a_ind)s->dimensions[0])
{PyErr_SetString(PyExc_NotImplementedError, "a_val and a_ind have different lengths"); %(fail)s;}
if (%(a_ptr)s->dimensions[0] != %(b)s->dimensions[0]+1)
{PyErr_SetString(PyExc_NotImplementedError, "a's number of columns doesn't match b's rows"); %(fail)s;}
if ((!%(z)s)
|| (%(z)s->dimensions[0] != ((npy_int32 *)%(a_nrows)s->data)[0])
|| (%(z)s->dimensions[1] != %(b)s->dimensions[1])
)
{
if (%(z)s) Py_DECREF(%(z)s);
npy_intp dims[] = {0,0};
dims[0] = ((npy_int32 *)%(a_nrows)s->data)[0];
dims[1] = %(b)s->dimensions[1];
%(z)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, %(b)s->descr->type_num);
}
{
//the output array has size M x N
npy_intp M = %(z)s->dimensions[0];
npy_intp N = %(z)s->dimensions[1];
npy_intp K = %(b)s->dimensions[0];
npy_intp Szm = %(z)s->strides[0] / %(z)s->descr->elsize;
npy_intp Szn = %(z)s->strides[1] / %(z)s->descr->elsize;
//npy_intp Sbm = %(b)s->strides[0] / %(b)s->descr->elsize;
npy_intp Sbn = %(b)s->strides[1] / %(b)s->descr->elsize;
npy_intp Sval = %(a_val)s->strides[0] / %(a_val)s->descr->elsize;
npy_intp Sind = %(a_ind)s->strides[0] / %(a_ind)s->descr->elsize;
npy_intp Sptr = %(a_ptr)s->strides[0] / %(a_ptr)s->descr->elsize;
npy_double * __restrict__ Dz = (npy_double*)%(z)s->data;
//const npy_double * __restrict__ Db = (npy_double*)%(b)s->data;
const npy_double * __restrict__ Dval = (npy_double*)%(a_val)s->data;
const npy_int32 * __restrict__ Dind = (npy_int32*)%(a_ind)s->data;
const npy_int32 * __restrict__ Dptr = (npy_int32*)%(a_ptr)s->data;
//npy_intp nnz = %(a_ind)s->dimensions[0];
//clear the output array
for (npy_intp m = 0; m < M; ++m)
{
for (npy_intp n = 0; n < N; ++n)
{
Dz[m*Szm + n*Szn] = 0.0;
}
}
//iterate over the sparse array, making the most of an entry wherever we find it.
//
// Normal matrix matrix multiply:
// for m
// for n
// for k
// z[m,n] += a[m,k] * b[k,n]
// Here instead:
// for k
// for m (sparse)
// for n
// z[m,n] += a[m,k] * b[k,n]
for (npy_int32 k = 0; k < K; ++k)
{
const npy_double * __restrict__ bk = (double *)(%(b)s->data + %(b)s->strides[0] * k);
for (npy_int32 m_idx = Dptr[k * Sptr]; m_idx < Dptr[(k+1) * Sptr]; ++m_idx)
{
npy_int32 m = Dind[m_idx * Sind];
const double Amk = Dval[m_idx * Sval];
npy_double * __restrict__ zm = (npy_double *)(%(z)s->data + %(z)s->strides[0] * m);
if (m >= %(z)s->dimensions[0])
{PyErr_SetString(PyExc_NotImplementedError, "illegal row index in a"); %(fail)s;}
for(npy_int32 n = 0; n < N; ++n)
{
zm[n*Szn] += Amk * bk[n*Sbn];
}
}
}
}
"""% dict(locals(), **sub)
sd_csc = StructuredDotCSC()
#TODO: register a specialization to replace StructuredDot -> StructuredDotCSC
class StructuredDotGrad(gof.Op):
def make_node(self, a, b, g_ab):
return gof.Apply(self, [a, b, g_ab], [a.type()])
def perform(self, node, (a, b, g_ab), (out,)):
g_a_data = a.data.copy()
if a.format == 'csc':
for j in xrange(len(a.indptr)-1):
ind0 = a.indptr[j]
ind1 = a.indptr[j+1]
for i_idx in xrange(ind0, ind1):
i = a.indices[i_idx]
#v = a.data[i_idx]
#print (i, j, v)
g_a_data[i_idx] = numpy.dot(g_ab[i], b[j])
out[0] = sparse.csc_matrix((g_a_data, a.indices.copy(), a.indptr.copy()),
a.shape, copy=False)
elif a.format == 'csr':
raise NotImplementedError()
else:
raise TypeError()
_structured_dot_grad = StructuredDotGrad()
class StructureDotGradCSC(gof.Op):
def make_node(self, a_indices, a_indptr, b, g_ab):
return gof.Apply(self, [a_indices, a_indptr, b, g_ab], [tensor.tensor(b.dtype, (False,))])
def perform(self, node, (a_indices, a_indptr, b, g_ab), (out,)):
g_a_data = numpy.zeros(a_indices.shape, dtype=g_ab.dtype)
for j in xrange(len(a_indptr)-1):
ind0 = a_indptr[j]
ind1 = a_indptr[j+1]
for i_idx in xrange(ind0, ind1):
i = a_indices[i_idx]
g_a_data[i_idx] = numpy.dot(g_ab[i], b[j])
out[0] = g_a_data
def c_code(self, node, name, (_indices, _indptr, _d, _g), (_zout, ), sub):
return """
if (%(_d)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(d) != 2"); %(fail)s;}
if (%(_g)s->nd != 2) {PyErr_SetString(PyExc_NotImplementedError, "rank(g) != 2"); %(fail)s;}
if (%(_indices)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indices) != 1"); %(fail)s;}
if (%(_indptr)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(indptr) != 1"); %(fail)s;}
if( %(_indices)s->descr->type_num != PyArray_INT32) {
PyErr_SetString(PyExc_NotImplementedError, "C"); %(fail)s;}
if( %(_indptr)s->descr->type_num != PyArray_INT32)
{PyErr_SetString(PyExc_NotImplementedError, "D"); %(fail)s;}
if( %(_d)s->descr->type_num != PyArray_DOUBLE)
{PyErr_SetString(PyExc_NotImplementedError, "d's dtype not NPY_DOUBLE"); %(fail)s;}
if( %(_g)s->descr->type_num != PyArray_DOUBLE)
{PyErr_SetString(PyExc_NotImplementedError, "g's dtype not NPY_DOUBLE"); %(fail)s;}
if( %(_d)s->dimensions[1] != %(_g)s->dimensions[1])
{PyErr_SetString(PyExc_NotImplementedError, "d and g have different numbers of columns"); %(fail)s;}
if (!%(_zout)s)
{
%(_zout)s = (PyArrayObject*) PyArray_SimpleNew(1, %(_indices)s->dimensions, %(_g)s->descr->type_num);
}
if (%(_zout)s->dimensions[0] != %(_indices)s->dimensions[0])
{
PyErr_SetString(PyExc_NotImplementedError, "somehow _zout got the wrong size.. and I don't know how to resize it.");
%(fail)s;
}
{ //makes it compile even though labels jump over variable definitions.
npy_intp nnz = %(_indices)s->dimensions[0];
npy_intp N = %(_indptr)s->dimensions[0]-1; //TODO: error checking with this
npy_intp Sindices = %(_indices)s->strides[0]/%(_indices)s->descr->elsize;
npy_intp Sindptr = %(_indptr)s->strides[0]/%(_indptr)s->descr->elsize;
const npy_intp Sd1 = %(_d)s->strides[1]/%(_d)s->descr->elsize;
const npy_intp Sg1 = %(_g)s->strides[1]/%(_g)s->descr->elsize;
const npy_intp K = %(_d)s->dimensions[1];
const npy_int32 * __restrict__ indptr = (npy_int32 *)%(_indptr)s->data;
const npy_int32 * __restrict__ indices = (npy_int32 *)%(_indices)s->data;
for (npy_int32 j = 0; j < N; ++j)
{
const npy_double * __restrict__ d_row = (double *)(%(_d)s->data + %(_d)s->strides[0] * j);
if(j >= %(_d)s->dimensions[0]) {PyErr_SetString(PyExc_NotImplementedError, "G"); %(fail)s;}
for (npy_int32 i_idx = indptr[j * Sindptr]; i_idx < indptr[(j+1) * Sindptr]; ++i_idx)
{
npy_int32 i = indices[i_idx * Sindices];
const npy_double * __restrict__ g_row = (npy_double *)(%(_g)s->data + %(_g)s->strides[0] * i);
double ip = 0.0;
if (i >= %(_g)s->dimensions[0])
{PyErr_SetString(PyExc_NotImplementedError, "H"); %(fail)s;}
for(int k = 0; k < K; ++k)
{
ip += d_row[k * Sd1] * g_row[k*Sg1];
}
((double * __restrict__)(%(_zout)s->data + i_idx * %(_zout)s->strides[0]))[0] = ip;
}
}
}
"""% dict(locals(), **sub)
_sdgcsc = StructureDotGradCSC()
def structured_dot_grad(sparse_A, dense_B, ga):
#TODO: 1. move this switch to be a specialization of structuredDotGrad
# 2. implement StructuredDotGrad.grad()
if 0:
return _structured_dot_grad(sparse_A, dense_B, ga)
else:
if sparse_A.type.format == 'csc':
g_A_data = _sdgcsc(csm_indices(sparse_A),\
csm_indptr(sparse_A), dense_B, ga)
return CSC(g_A_data, csm_indices(sparse_A),\
csm_indptr(sparse_A),\
csm_shape(sparse_A))
else:
raise NotImplementedError()
...@@ -274,9 +274,10 @@ class test_dot(unittest.TestCase): ...@@ -274,9 +274,10 @@ class test_dot(unittest.TestCase):
for epoch in xrange(50): for epoch in xrange(50):
y, loss, gw = trainfn(x, w) y, loss, gw = trainfn(x, w)
w = w - (lr * gw) w = w - (lr * gw)
print loss
self.failUnless(origloss > loss) self.failUnless(origloss > loss)
self.failUnless('1.0543172285' == str(loss)) self.failUnless('1.05191241115' == str(loss))
def test_graph_bprop_rand(self): def test_graph_bprop_rand(self):
for i in range(10): for i in range(10):
......
...@@ -490,13 +490,38 @@ class _tensor_py_operators: ...@@ -490,13 +490,38 @@ class _tensor_py_operators:
# def __ixor__(self, other): return _xor_inplace(self, other) # def __ixor__(self, other): return _xor_inplace(self, other)
#ARITHMETIC - NORMAL #ARITHMETIC - NORMAL
def __add__(self,other): return add(self,other) def __add__(self,other):
def __sub__(self,other): return sub(self,other) try:
def __mul__(self,other): return mul(self,other) return add(self,other)
def __div__(self,other): return div(self,other) except Exception, e:
def __pow__(self,other): return pow(self,other) return NotImplemented
def __mod__(self,other): return mod(self,other) def __sub__(self,other):
try:
return sub(self,other)
except Exception, e:
return NotImplemented
def __mul__(self,other):
try:
return mul(self,other)
except Exception, e:
return NotImplemented
def __div__(self,other):
try:
return div(self,other)
except Exception, e:
return NotImplemented
def __pow__(self,other):
try:
return pow(self,other)
except Exception, e:
return NotImplemented
def __mod__(self,other):
try:
return mod(self,other)
except Exception, e:
return NotImplemented
# ##### DON"T USE THESE BECAUSE INPLACE OPS SHOULD BE INSERTED BY OPTIMIZATION ONLY
# #ARITHMETIC - INPLACE # #ARITHMETIC - INPLACE
# def __iadd__(self,other): return _add_inplace(self,other) # def __iadd__(self,other): return _add_inplace(self,other)
# def __isub__(self,other): return _sub_inplace(self,other) # def __isub__(self,other): return _sub_inplace(self,other)
...@@ -552,6 +577,11 @@ class _tensor_py_operators: ...@@ -552,6 +577,11 @@ class _tensor_py_operators:
""" The dtype of this tensor. """ """ The dtype of this tensor. """
#extra pseudo-operator symbols
def __dot__(left, right): return dot(left, right)
def __rdot__(right, left): return dot(left, right)
class TensorResult(Result, _tensor_py_operators): class TensorResult(Result, _tensor_py_operators):
"""Subclass to add the tensor operators to the basic `Result` class.""" """Subclass to add the tensor operators to the basic `Result` class."""
...@@ -2013,7 +2043,7 @@ class TensorDot(Op): ...@@ -2013,7 +2043,7 @@ class TensorDot(Op):
axesdim, x.type.ndim, y.type.ndim) axesdim, x.type.ndim, y.type.ndim)
outdim = x.type.ndim + y.type.ndim - 2*axesdim outdim = x.type.ndim + y.type.ndim - 2*axesdim
output = tensor(dtype=x.dtype, broadcastable=[None]*outdim); output = tensor(dtype=x.dtype, broadcastable=[False]*outdim);
return Apply(self, inputs=[x,y], outputs=[output,]) return Apply(self, inputs=[x,y], outputs=[output,])
def perform(self, node, (x, y), (z,)): def perform(self, node, (x, y), (z,)):
...@@ -2064,13 +2094,14 @@ outer = Outer() ...@@ -2064,13 +2094,14 @@ outer = Outer()
# Gradient # Gradient
######################### #########################
def grad(cost, wrt, g_cost=None): def grad(cost, wrt, g_cost=None, consider_constant=[]):
""" """
@type cost: L{Result} @type cost: L{Result}
@type wrt: L{Result} or list of L{Result}s. @type wrt: L{Result} or list of L{Result}s.
@type g_cost: L{Result} broadcastable to size of I{cost}, or None @type g_cost: L{Result} broadcastable to size of I{cost}, or None
@param g_cost: an expression for the gradient through cost. The default is @param g_cost: an expression for the gradient through cost. The default is
{{{ones_like(cost)}}} {{{ones_like(cost)}}}
@param consider_constant: a list of expressions not to backpropagate through
@rtype: L{Result} or list of L{Result}s (depending upon I{wrt}) @rtype: L{Result} or list of L{Result}s (depending upon I{wrt})
@return: symbolic expression of gradient of I{cost} with respect to I{wrt}. @return: symbolic expression of gradient of I{cost} with respect to I{wrt}.
...@@ -2086,7 +2117,7 @@ def grad(cost, wrt, g_cost=None): ...@@ -2086,7 +2117,7 @@ def grad(cost, wrt, g_cost=None):
if g_cost is None: if g_cost is None:
g_cost = ones_like(cost) g_cost = ones_like(cost)
inputs = gof.graph.inputs([cost]) inputs = gof.graph.inputs([cost])
gmap = gradient.grad_sources_inputs([(cost, g_cost)], inputs) gmap = gradient.grad_sources_inputs([(cost, g_cost)], inputs + consider_constant)
def zero(p): def zero(p):
return TensorConstant( return TensorConstant(
...@@ -2131,8 +2162,10 @@ class numeric_grad: ...@@ -2131,8 +2162,10 @@ class numeric_grad:
shapes = [p.shape for p in apt] shapes = [p.shape for p in apt]
dtypes = [str(p.dtype) for p in apt] dtypes = [str(p.dtype) for p in apt]
if not dtypes == [dtypes[0]] * len(apt): # TODO: remove this eventually (why was this here in the first place ?)
raise TypeError('All function arguments must have same dtype') # In the case of CSM, the arguments are a mixture of floats and integers...
#if not dtypes == [dtypes[0]] * len(apt):
#raise TypeError('All function arguments must have same dtype')
total_size = __builtin__.sum(prod(sh) for sh in shapes) total_size = __builtin__.sum(prod(sh) for sh in shapes)
...@@ -2189,7 +2222,8 @@ def verify_grad(testcase, op, pt, n_tests=1, rng=numpy.random, eps=1.0e-7, tol=0 ...@@ -2189,7 +2222,8 @@ def verify_grad(testcase, op, pt, n_tests=1, rng=numpy.random, eps=1.0e-7, tol=0
testcase.failUnless(analytic gradient matches finite-diff gradient) testcase.failUnless(analytic gradient matches finite-diff gradient)
:param pt: the list of numpy.ndarrays to use as inputs to the op :param pt: the list of numpy.ndarrays to use as inputs to the op
:param op: something that behaves like an Op instance. :param op: something that behaves like an Op instance with a single output
(can be a python function combining multiple ops)
:param testcase: the thing to call `fail` on if things go awry. :param testcase: the thing to call `fail` on if things go awry.
""" """
...@@ -2251,4 +2285,3 @@ def verify_grad(testcase, op, pt, n_tests=1, rng=numpy.random, eps=1.0e-7, tol=0 ...@@ -2251,4 +2285,3 @@ def verify_grad(testcase, op, pt, n_tests=1, rng=numpy.random, eps=1.0e-7, tol=0
verify_grad.E_grad = 'gradient error exceeded tolerance' verify_grad.E_grad = 'gradient error exceeded tolerance'
"""This error is raised when a gradient is calculated, but incorrect.""" """This error is raised when a gradient is calculated, but incorrect."""
...@@ -285,6 +285,23 @@ def local_inplace_setsubtensor(node): ...@@ -285,6 +285,23 @@ def local_inplace_setsubtensor(node):
return False return False
compile.optdb.register('inplace_setsubtensor', TopoOptimizer(local_inplace_setsubtensor), 60, 'fast_run', 'inplace') #DEBUG compile.optdb.register('inplace_setsubtensor', TopoOptimizer(local_inplace_setsubtensor), 60, 'fast_run', 'inplace') #DEBUG
##################
# Reshape opts #
##################
@gof.local_optimizer([None, None])
def local_reshape_chain(node):
"""
Reshape(Reshape(shape1),shape2) -> Reshape(shape2)
"""
if not opt.check_chain(node, T.Reshape, T.Reshape):
return False
return [node.op(node.inputs[0].owner.inputs[0], node.inputs[1])]
register_canonicalize(local_reshape_chain)
################## ##################
# Middleman cuts # # Middleman cuts #
################## ##################
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论