提交 34836892 authored 作者: Yann N. Dauphin's avatar Yann N. Dauphin

Merge branch 'ynd_sparse' of https://github.com/dwf/Theano into sparse

Conflicts: theano/sparse/basic.py
...@@ -91,5 +91,17 @@ if sys.version_info[:2] < (2,6): ...@@ -91,5 +91,17 @@ if sys.version_info[:2] < (2,6):
for j in range(i+1, r): for j in range(i+1, r):
indices[j] = indices[j-1] + 1 indices[j] = indices[j-1] + 1
yield tuple(pool[i] for i in indices) yield tuple(pool[i] for i in indices)
def product(*args, **kwds):
# product('ABCD', 'xy') --> Ax Ay Bx By Cx Cy Dx Dy
# product(range(2), repeat=3) --> 000 001 010 011 100 101 110 111
pools = map(tuple, args) * kwds.get('repeat', 1)
result = [[]]
for pool in pools:
result = [x+[y] for x in result for y in pool]
for prod in result:
yield tuple(prod)
else: else:
from itertools import combinations from itertools import combinations, product
...@@ -1451,8 +1451,11 @@ class StructuredDotGradCSR(gof.Op): ...@@ -1451,8 +1451,11 @@ class StructuredDotGradCSR(gof.Op):
} }
"""% dict(locals(), **sub) """% dict(locals(), **sub)
sdg_csr = StructuredDotGradCSR() sdg_csr = StructuredDotGradCSR()
class Dot(gof.op.Op): class Dot(gof.op.Op):
""" """
Operation for efficiently calculating the dot product when Operation for efficiently calculating the dot product when
...@@ -1461,13 +1464,13 @@ class Dot(gof.op.Op): ...@@ -1461,13 +1464,13 @@ class Dot(gof.op.Op):
""" """
def __eq__(self, other): def __eq__(self, other):
return type(self) == type(other) return type(self) == type(other)
def __hash__(self): def __hash__(self):
return hash(type(self)) return hash(type(self))
def __ne__(self, other): def __ne__(self, other):
return not (self == other) return not (self == other)
def infer_shape(self, node, shapes): def infer_shape(self, node, shapes):
xshp, yshp = shapes xshp, yshp = shapes
x, y = node.inputs x, y = node.inputs
...@@ -1480,56 +1483,66 @@ class Dot(gof.op.Op): ...@@ -1480,56 +1483,66 @@ class Dot(gof.op.Op):
if x.ndim == 1 and y.ndim == 1: if x.ndim == 1 and y.ndim == 1:
return [()] return [()]
raise NotImplementedError() raise NotImplementedError()
def make_node(self, x, y): def make_node(self, x, y):
dtype_out = scalar.upcast(x.type.dtype, y.type.dtype) dtype_out = scalar.upcast(x.type.dtype, y.type.dtype)
if not _is_sparse_variable(x) and not _is_sparse_variable(y): if not _is_sparse_variable(x) and not _is_sparse_variable(y):
raise TypeError(x) raise TypeError(x)
return gof.Apply(self, [x, y], [tensor.tensor(dtype=dtype_out, return gof.Apply(self, [x, y], [tensor.tensor(dtype=dtype_out,
broadcastable=(False, False))]) broadcastable=(False, False))])
def perform(self, node, (x, y), (out, )): def perform(self, node, inputs, out):
x, y = inputs
out = out[0]
x_is_sparse = _is_sparse(x) x_is_sparse = _is_sparse(x)
y_is_sparse = _is_sparse(y) y_is_sparse = _is_sparse(y)
if not x_is_sparse and not y_is_sparse: if not x_is_sparse and not y_is_sparse:
raise TypeError(x) raise TypeError(x)
rval = x * y rval = x * y
if x_is_sparse and y_is_sparse: if x_is_sparse and y_is_sparse:
rval = rval.todense() rval = rval.todense()
out[0] = rval out[0] = rval
def grad(self, (x, y), (gz,)): def grad(self, (x, y), (gz,)):
assert _is_sparse_variable(x) or _is_sparse_variable(y) assert _is_sparse_variable(x) or _is_sparse_variable(y)
rval = []
rval = [
tensor.dot(gz, y.T) if _is_dense_variable(y) else dot(gz, y.T), if _is_dense_variable(y):
tensor.dot(x.T, gz) if _is_dense_variable(x) else dot(x.T, gz) rval.append(tensor.dot(gz, y.T))
] else:
rval.append(dot(gz, y.T))
if _is_dense_variable(x):
rval.append(tensor.dot(x.T, gz))
else:
rval.append(dot(x.T, gz))
return rval return rval
_dot = Dot() _dot = Dot()
def dot(x, y): def dot(x, y):
""" """
Operation for efficiently calculating the dot product when Operation for efficiently calculating the dot product when
one or all operands is sparse. Supported format are CSC and CSR. one or all operands is sparse. Supported format are CSC and CSR.
The output of the operation is dense. The output of the operation is dense.
""" """
if hasattr(x, 'getnnz'): x = as_sparse_variable(x) if hasattr(x, 'getnnz'):
if hasattr(y, 'getnnz'): y = as_sparse_variable(y) x = as_sparse_variable(x)
if hasattr(y, 'getnnz'):
y = as_sparse_variable(y)
x_is_sparse_variable = _is_sparse_variable(x) x_is_sparse_variable = _is_sparse_variable(x)
y_is_sparse_variable = _is_sparse_variable(y) y_is_sparse_variable = _is_sparse_variable(y)
if not x_is_sparse_variable and not y_is_sparse_variable: if not x_is_sparse_variable and not y_is_sparse_variable:
raise TypeError() raise TypeError()
return _dot(x, y) return _dot(x, y)
...@@ -1543,16 +1556,16 @@ class Usmm(gof.op.Op): ...@@ -1543,16 +1556,16 @@ class Usmm(gof.op.Op):
""" """
def __eq__(self, other): def __eq__(self, other):
return type(self) == type(other) return type(self) == type(other)
def __hash__(self): def __hash__(self):
return hash(type(self)) return hash(type(self))
def __ne__(self, other): def __ne__(self, other):
return not (self == other) return not (self == other)
def __str__(self): def __str__(self):
return 'Usmm{no_inplace}' return 'Usmm{no_inplace}'
def infer_shape(self, node, shapes): def infer_shape(self, node, shapes):
xshp, yshp = shapes xshp, yshp = shapes
x, y = node.inputs x, y = node.inputs
...@@ -1565,19 +1578,20 @@ class Usmm(gof.op.Op): ...@@ -1565,19 +1578,20 @@ class Usmm(gof.op.Op):
if x.ndim == 1 and y.ndim == 1: if x.ndim == 1 and y.ndim == 1:
return [()] return [()]
raise NotImplementedError() raise NotImplementedError()
def make_node(self, alpha, x, y, z): def make_node(self, alpha, x, y, z):
if not _is_sparse_variable(x) and not _is_sparse_variable(y): if not _is_sparse_variable(x) and not _is_sparse_variable(y):
# If x and y are tensor, we don't want to use this class # If x and y are tensor, we don't want to use this class
# We should use Dot22 and Gemm in that case. # We should use Dot22 and Gemm in that case.
raise TypeError(x) raise TypeError(x)
dtype_out = scalar.upcast(alpha.type.dtype, x.type.dtype, y.type.dtype, z.type.dtype) dtype_out = scalar.upcast(alpha.type.dtype, x.type.dtype,
y.type.dtype, z.type.dtype)
alpha = tensor.as_tensor_variable(alpha) alpha = tensor.as_tensor_variable(alpha)
z = tensor.as_tensor_variable(z) z = tensor.as_tensor_variable(z)
assert z.ndim == 2 assert z.ndim == 2
assert alpha.type.broadcastable == (True,)* alpha.ndim assert alpha.type.broadcastable == (True,) * alpha.ndim
if not _is_sparse_variable(x): if not _is_sparse_variable(x):
x = tensor.as_tensor_variable(x) x = tensor.as_tensor_variable(x)
assert x.ndim == 2 assert x.ndim == 2
...@@ -1585,35 +1599,38 @@ class Usmm(gof.op.Op): ...@@ -1585,35 +1599,38 @@ class Usmm(gof.op.Op):
y = tensor.as_tensor_variable(y) y = tensor.as_tensor_variable(y)
assert y.ndim == 2 assert y.ndim == 2
return gof.Apply(self, [alpha, x, y, z], [tensor.tensor(dtype=dtype_out, broadcastable=(False, False))]) return gof.Apply(self, [alpha, x, y, z],
[tensor.tensor(dtype=dtype_out,
broadcastable=(False, False))])
def perform(self, node, (alpha, x, y, z), (out, )): def perform(self, node, (alpha, x, y, z), (out, )):
x_is_sparse = _is_sparse(x) x_is_sparse = _is_sparse(x)
y_is_sparse = _is_sparse(y) y_is_sparse = _is_sparse(y)
if not x_is_sparse and not y_is_sparse: if not x_is_sparse and not y_is_sparse:
raise TypeError(x) raise TypeError(x)
rval = x * y rval = x * y
if isinstance(rval, scipy.sparse.spmatrix): if isinstance(rval, scipy.sparse.spmatrix):
rval = rval.toarray() rval = rval.toarray()
if rval.dtype == alpha.dtype: if rval.dtype == alpha.dtype:
rval *= alpha # Faster because operation is inplace rval *= alpha # Faster because operation is inplace
else: else:
rval = rval * alpha rval = rval * alpha
if rval.dtype == z.dtype: if rval.dtype == z.dtype:
rval += z # Faster because operation is inplace rval += z # Faster because operation is inplace
else: else:
rval = rval + z rval = rval + z
out[0] = rval out[0] = rval
usmm = Usmm() usmm = Usmm()
class UsmmCscDense(gof.Op): class UsmmCscDense(gof.Op):
""" """
Performs the expression is alpha * x y + z Performs the expression is alpha * x y + z
This is an optimized operation for the case when x is in CSC format. This is an optimized operation for the case when x is in CSC format.
x are sparse matrix x are sparse matrix
y, z is a dense matrix y, z is a dense matrix
alpha is a scalar alpha is a scalar
...@@ -1621,16 +1638,20 @@ class UsmmCscDense(gof.Op): ...@@ -1621,16 +1638,20 @@ class UsmmCscDense(gof.Op):
def __init__(self, inplace): def __init__(self, inplace):
self.inplace = inplace self.inplace = inplace
if inplace: if inplace:
self.destroy_map={ 0 : [6] } self.destroy_map = {0: [6]}
def __str__(self): def __str__(self):
if self.inplace: if self.inplace:
return 'UsmmCscDense{inplace}' return 'UsmmCscDense{inplace}'
else: else:
return 'UsmmCscDense{no_inplace}' return 'UsmmCscDense{no_inplace}'
def __eq__(self, other): def __eq__(self, other):
return (type(self) == type(other)) and self.inplace == other.inplace return (type(self) == type(other)) and self.inplace == other.inplace
def __hash__(self): def __hash__(self):
return hash(type(self)) ^ self.inplace return hash(type(self)) ^ self.inplace
def infer_shape(self, node, shapes): def infer_shape(self, node, shapes):
xshp, yshp = shapes xshp, yshp = shapes
x, y = node.inputs x, y = node.inputs
...@@ -1643,6 +1664,7 @@ class UsmmCscDense(gof.Op): ...@@ -1643,6 +1664,7 @@ class UsmmCscDense(gof.Op):
if x.ndim == 1 and y.ndim == 1: if x.ndim == 1 and y.ndim == 1:
return [()] return [()]
raise NotImplementedError() raise NotImplementedError()
def make_node(self, alpha, x_val, x_ind, x_ptr, x_nrows, y, z): def make_node(self, alpha, x_val, x_ind, x_ptr, x_nrows, y, z):
alpha = tensor.as_tensor_variable(alpha) alpha = tensor.as_tensor_variable(alpha)
x_val = tensor.as_tensor_variable(x_val) x_val = tensor.as_tensor_variable(x_val)
...@@ -1678,16 +1700,13 @@ class UsmmCscDense(gof.Op): ...@@ -1678,16 +1700,13 @@ class UsmmCscDense(gof.Op):
if dtype_out != z.type.dtype: if dtype_out != z.type.dtype:
z = tensor.cast(z, dtype_out) z = tensor.cast(z, dtype_out)
r = gof.Apply(self, [alpha, x_val, x_ind, x_ptr, x_nrows, y, z], r = gof.Apply(self, [alpha, x_val, x_ind, x_ptr, x_nrows, y, z],
[tensor.tensor(dtype_out, (False, y.type.broadcastable[1]))]) [tensor.tensor(dtype_out, (False, y.type.broadcastable[1]))])
return r return r
#def perform(self, node, (alpha, x_val, x_ind, x_ptr, x_nrows, y, z), (out,)):
# raise NotImplemented()
def c_support_code(self): def c_support_code(self):
return blas.blas_header_text() return blas.blas_header_text()
def c_libraries(self): def c_libraries(self):
return blas.ldflags() return blas.ldflags()
...@@ -1696,33 +1715,36 @@ class UsmmCscDense(gof.Op): ...@@ -1696,33 +1715,36 @@ class UsmmCscDense(gof.Op):
def c_lib_dirs(self): def c_lib_dirs(self):
return blas.ldflags(libs=False, libs_dir=True) return blas.ldflags(libs=False, libs_dir=True)
def c_header_dirs(self): def c_header_dirs(self):
return blas.ldflags(libs=False, include_dir=True) return blas.ldflags(libs=False, include_dir=True)
def c_code(self, node, name, (alpha, x_val, x_ind, x_ptr, x_nrows, y, z), (zn,), sub): def c_code(self, node, name, inputs, outputs, sub):
alpha, x_val, x_ind, x_ptr, x_nrows, y, z = inputs
zn = outputs[0]
if node.inputs[1].type.dtype in ('complex64', 'complex128'): if node.inputs[1].type.dtype in ('complex64', 'complex128'):
raise NotImplementedError('Complex types are not supported for x_val') raise NotImplementedError('Complex types are not supported for '
'x_val')
if node.inputs[5].type.dtype in ('complex64', 'complex128'): if node.inputs[5].type.dtype in ('complex64', 'complex128'):
raise NotImplementedError('Complex types are not supported for y') raise NotImplementedError('Complex types are not supported for y')
if node.inputs[6].type.dtype != node.outputs[0].type.dtype: if node.inputs[6].type.dtype != node.outputs[0].type.dtype:
raise NotImplementedError('z and output must have same type') raise NotImplementedError('z and output must have same type')
if node.inputs[1].type.dtype == "float32": if node.inputs[1].type.dtype == "float32":
conv_type = "float" conv_type = "float"
axpy = "saxpy_" axpy = "saxpy_"
else: else:
conv_type = "double" conv_type = "double"
axpy = "daxpy_" axpy = "daxpy_"
# retrieve dtype numbers
typenum_alpha = node.inputs[0].type.dtype_specs()[-1] # retrieve dtype number typenum_alpha = node.inputs[0].type.dtype_specs()[-1]
typenum_x_val = node.inputs[1].type.dtype_specs()[-1] # retrieve dtype number typenum_x_val = node.inputs[1].type.dtype_specs()[-1]
typenum_y = node.inputs[5].type.dtype_specs()[-1] # retrieve dtype number typenum_y = node.inputs[5].type.dtype_specs()[-1]
typenum_z = node.inputs[6].type.dtype_specs()[-1] # retrieve dtype number typenum_z = node.inputs[6].type.dtype_specs()[-1]
typenum_zn = node.outputs[0].type.dtype_specs()[-1] # retrieve dtype number typenum_zn = node.outputs[0].type.dtype_specs()[-1]
inplace = int(self.inplace) inplace = int(self.inplace)
rval = """ rval = """
if (%(x_val)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(x_val) != 1"); %(fail)s;} if (%(x_val)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(x_val) != 1"); %(fail)s;}
if (%(x_ind)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(x_ind) != 1"); %(fail)s;} if (%(x_ind)s->nd != 1) {PyErr_SetString(PyExc_NotImplementedError, "rank(x_ind) != 1"); %(fail)s;}
...@@ -1756,10 +1778,10 @@ class UsmmCscDense(gof.Op): ...@@ -1756,10 +1778,10 @@ class UsmmCscDense(gof.Op):
if (%(x_ptr)s->dimensions[0] != %(y)s->dimensions[0]+1) if (%(x_ptr)s->dimensions[0] != %(y)s->dimensions[0]+1)
{PyErr_SetString(PyExc_NotImplementedError, "x's number of columns doesn't match y's rows"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "x's number of columns doesn't match y's rows"); %(fail)s;}
if (%(z)s->dimensions[0] != ((npy_int32 *)%(x_nrows)s->data)[0] || %(z)s->dimensions[1] != %(y)s->dimensions[1]) if (%(z)s->dimensions[0] != ((npy_int32 *)%(x_nrows)s->data)[0] || %(z)s->dimensions[1] != %(y)s->dimensions[1])
{PyErr_SetString(PyExc_NotImplementedError, "The dimension of the allocated output doesn't match the correct output size."); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "The dimension of the allocated output doesn't match the correct output size."); %(fail)s;}
if (PyArray_SIZE(%(alpha)s) != 1) if (PyArray_SIZE(%(alpha)s) != 1)
{PyErr_SetString(PyExc_NotImplementedError, "The number of element in alpha must be 1"); %(fail)s;} {PyErr_SetString(PyExc_NotImplementedError, "The number of element in alpha must be 1"); %(fail)s;}
...@@ -1783,7 +1805,7 @@ class UsmmCscDense(gof.Op): ...@@ -1783,7 +1805,7 @@ class UsmmCscDense(gof.Op):
Py_XDECREF(%(zn)s); Py_XDECREF(%(zn)s);
%(zn)s = %(z)s; %(zn)s = %(z)s;
Py_INCREF(%(zn)s); Py_INCREF(%(zn)s);
} }
else if (!%(zn)s else if (!%(zn)s
|| (%(zn)s->dimensions[0] != ((npy_int32 *)%(x_nrows)s->data)[0]) || (%(zn)s->dimensions[0] != ((npy_int32 *)%(x_nrows)s->data)[0])
|| (%(zn)s->dimensions[1] != %(y)s->dimensions[1]) || (%(zn)s->dimensions[1] != %(y)s->dimensions[1])
...@@ -1795,13 +1817,13 @@ class UsmmCscDense(gof.Op): ...@@ -1795,13 +1817,13 @@ class UsmmCscDense(gof.Op):
dims[1] = %(y)s->dimensions[1]; dims[1] = %(y)s->dimensions[1];
%(zn)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, %(typenum_zn)s); %(zn)s = (PyArrayObject*) PyArray_SimpleNew(2, dims, %(typenum_zn)s);
} }
{ {
// sparse array has size MxK, dense KxN, output MxN // sparse array has size MxK, dense KxN, output MxN
npy_intp M = %(zn)s->dimensions[0]; npy_intp M = %(zn)s->dimensions[0];
npy_intp N = %(zn)s->dimensions[1]; npy_intp N = %(zn)s->dimensions[1];
npy_intp K = %(y)s->dimensions[0]; npy_intp K = %(y)s->dimensions[0];
// pointers to access actual data in the arrays passed as params. // pointers to access actual data in the arrays passed as params.
dtype_%(z)s* __restrict__ Dz = (dtype_%(z)s*)%(z)s->data; dtype_%(z)s* __restrict__ Dz = (dtype_%(z)s*)%(z)s->data;
dtype_%(zn)s* __restrict__ Dzn = (dtype_%(zn)s*)%(zn)s->data; dtype_%(zn)s* __restrict__ Dzn = (dtype_%(zn)s*)%(zn)s->data;
...@@ -1809,45 +1831,54 @@ class UsmmCscDense(gof.Op): ...@@ -1809,45 +1831,54 @@ class UsmmCscDense(gof.Op):
const npy_int32 * __restrict__ Dind = (npy_int32*)%(x_ind)s->data; const npy_int32 * __restrict__ Dind = (npy_int32*)%(x_ind)s->data;
const npy_int32 * __restrict__ Dptr = (npy_int32*)%(x_ptr)s->data; const npy_int32 * __restrict__ Dptr = (npy_int32*)%(x_ptr)s->data;
const dtype_%(alpha)s alpha = ((dtype_%(alpha)s*)%(alpha)s->data)[0]; const dtype_%(alpha)s alpha = ((dtype_%(alpha)s*)%(alpha)s->data)[0];
npy_intp Sz = %(z)s->strides[1] / %(z)s->descr->elsize; npy_intp Sz = %(z)s->strides[1] / %(z)s->descr->elsize;
npy_intp Szn = %(zn)s->strides[1] / %(zn)s->descr->elsize; npy_intp Szn = %(zn)s->strides[1] / %(zn)s->descr->elsize;
npy_intp Sval = %(x_val)s->strides[0] / %(x_val)s->descr->elsize; npy_intp Sval = %(x_val)s->strides[0] / %(x_val)s->descr->elsize;
npy_intp Sind = %(x_ind)s->strides[0] / %(x_ind)s->descr->elsize; npy_intp Sind = %(x_ind)s->strides[0] / %(x_ind)s->descr->elsize;
npy_intp Sptr = %(x_ptr)s->strides[0] / %(x_ptr)s->descr->elsize; npy_intp Sptr = %(x_ptr)s->strides[0] / %(x_ptr)s->descr->elsize;
npy_intp Sy = %(y)s->strides[1] / %(y)s->descr->elsize; npy_intp Sy = %(y)s->strides[1] / %(y)s->descr->elsize;
if (!(%(inplace)s)) if (!(%(inplace)s))
{ {
memcpy(Dzn, Dz, M*N*sizeof(dtype_%(zn)s)); memcpy(Dzn, Dz, M*N*sizeof(dtype_%(zn)s));
} }
for (npy_int32 k = 0; k < K; ++k) for (npy_int32 k = 0; k < K; ++k)
{ {
for (npy_int32 m_idx = Dptr[k * Sptr]; m_idx < Dptr[(k+1)*Sptr]; ++m_idx) for (npy_int32 m_idx = Dptr[k * Sptr]; m_idx < Dptr[(k+1)*Sptr]; ++m_idx)
{ {
const npy_int32 m = Dind[m_idx * Sind]; // row index of non-null value for column K const npy_int32 m = Dind[m_idx * Sind]; // row index of non-null value for column K
const dtype_%(x_val)s Amk = alpha * Dval[m_idx * Sval]; // actual value at that location const dtype_%(x_val)s Amk = alpha * Dval[m_idx * Sval]; // actual value at that location
const dtype_%(y)s* y_row = (dtype_%(y)s*)(%(y)s->data + %(y)s->strides[0] * k); const dtype_%(y)s* y_row = (dtype_%(y)s*)(%(y)s->data + %(y)s->strides[0] * k);
const dtype_%(zn)s* z_row = (dtype_%(zn)s*)(%(zn)s->data + %(zn)s->strides[0] * m); const dtype_%(zn)s* z_row = (dtype_%(zn)s*)(%(zn)s->data + %(zn)s->strides[0] * m);
%(axpy)s((int*)&N, (%(conv_type)s*)&Amk, (%(conv_type)s*)y_row, (int*)&Sy, (%(conv_type)s*)z_row, (int*)&Szn); %(axpy)s((int*)&N, (%(conv_type)s*)&Amk, (%(conv_type)s*)y_row, (int*)&Sy, (%(conv_type)s*)z_row, (int*)&Szn);
} }
} }
} }
"""% dict(locals(), **sub) """ % dict(locals(), **sub)
return rval return rval
usmm_csc_dense = UsmmCscDense(inplace=False) usmm_csc_dense = UsmmCscDense(inplace=False)
usmm_csc_dense_inplace = UsmmCscDense(inplace=True) usmm_csc_dense_inplace = UsmmCscDense(inplace=True)
local_usmm = gof.opt.PatternSub((tensor.sub, 'z', (tensor.mul, {'pattern' : 'alpha', 'constraint' : lambda expr: numpy.all(expr.type.broadcastable) },
(_dot, 'x', 'y'))), local_usmm = gof.opt.PatternSub(
(usmm, (tensor.neg, 'alpha'), 'x', 'y', 'z')) (tensor.sub, 'z',
(tensor.mul,
{'pattern': 'alpha',
'constraint': lambda expr: numpy.all(expr.type.broadcastable)},
(_dot, 'x', 'y'))),
(usmm, (tensor.neg, 'alpha'), 'x', 'y', 'z'))
register_specialize(local_usmm, name="local_usmm") register_specialize(local_usmm, name="local_usmm")
...@@ -1855,23 +1886,26 @@ register_specialize(local_usmm, name="local_usmm") ...@@ -1855,23 +1886,26 @@ register_specialize(local_usmm, name="local_usmm")
def local_usmm_csx(node): def local_usmm_csx(node):
if node.op == usmm: if node.op == usmm:
alpha, x, y, z = node.inputs alpha, x, y, z = node.inputs
x_is_sparse_variable = _is_sparse_variable(x) x_is_sparse_variable = _is_sparse_variable(x)
y_is_sparse_variable = _is_sparse_variable(y) y_is_sparse_variable = _is_sparse_variable(y)
if x_is_sparse_variable and not y_is_sparse_variable: if x_is_sparse_variable and not y_is_sparse_variable:
if x.type.format == 'csc': if x.type.format == 'csc':
x_val, x_ind, x_ptr, x_shape = csm_properties(x) x_val, x_ind, x_ptr, x_shape = csm_properties(x)
x_nsparse = x_shape[0] x_nsparse = x_shape[0]
dtype_out = scalar.upcast(alpha.type.dtype, x.type.dtype, y.type.dtype, z.type.dtype) dtype_out = scalar.upcast(alpha.type.dtype, x.type.dtype,
y.type.dtype, z.type.dtype)
# Sparse cast is not implemented. # Sparse cast is not implemented.
if y.type.dtype != dtype_out: if y.type.dtype != dtype_out:
return False return False
return [usmm_csc_dense(alpha, x_val, x_ind, x_ptr, x_nsparse, y, z)] return [usmm_csc_dense(alpha, x_val, x_ind, x_ptr,
x_nsparse, y, z)]
return False return False
register_specialize(local_usmm_csx) register_specialize(local_usmm_csx)
@gof.local_optimizer([usmm_csc_dense]) @gof.local_optimizer([usmm_csc_dense])
def local_usmm_csc_dense_inplace(node): def local_usmm_csc_dense_inplace(node):
if node.op == usmm_csc_dense: if node.op == usmm_csc_dense:
......
...@@ -12,6 +12,8 @@ except ImportError: ...@@ -12,6 +12,8 @@ except ImportError:
import theano import theano
from theano import compile, config from theano import compile, config
from theano.sparse import enable_sparse from theano.sparse import enable_sparse
from theano.gof.python25 import product
if enable_sparse == False: if enable_sparse == False:
raise SkipTest('Optional package sparse disabled') raise SkipTest('Optional package sparse disabled')
...@@ -527,31 +529,32 @@ class DotTests(unittest.TestCase): ...@@ -527,31 +529,32 @@ class DotTests(unittest.TestCase):
def setUp(self): def setUp(self):
x_size = (10, 1000) x_size = (10, 1000)
y_size = (1000, 10000) y_size = (1000, 10000)
self.x_csr = scipy.sparse.csr_matrix(numpy.random.binomial(1, 0.5, x_size), dtype=theano.config.floatX) self.x_csr = scipy.sparse.csr_matrix(numpy.random.binomial(1, 0.5, x_size), dtype=theano.config.floatX)
self.x_csc = scipy.sparse.csc_matrix(numpy.random.binomial(1, 0.5, x_size), dtype=theano.config.floatX) self.x_csc = scipy.sparse.csc_matrix(numpy.random.binomial(1, 0.5, x_size), dtype=theano.config.floatX)
self.y = numpy.asarray(numpy.random.uniform(-1, 1, y_size), dtype=theano.config.floatX) self.y = numpy.asarray(numpy.random.uniform(-1, 1, y_size), dtype=theano.config.floatX)
self.y_csr = scipy.sparse.csr_matrix(numpy.random.binomial(1, 0.5, y_size), dtype=theano.config.floatX) self.y_csr = scipy.sparse.csr_matrix(numpy.random.binomial(1, 0.5, y_size), dtype=theano.config.floatX)
self.y_csc = scipy.sparse.csc_matrix(numpy.random.binomial(1, 0.5, y_size), dtype=theano.config.floatX) self.y_csc = scipy.sparse.csc_matrix(numpy.random.binomial(1, 0.5, y_size), dtype=theano.config.floatX)
def test_csr_dense(self): def test_csr_dense(self):
x = theano.sparse.csr_matrix('x') x = theano.sparse.csr_matrix('x')
y = theano.tensor.matrix('y') y = theano.tensor.matrix('y')
f_a = theano.function([x, y], theano.sparse.dot(x, y)) f_a = theano.function([x, y], theano.sparse.dot(x, y))
f_b = lambda x, y: x * y f_b = lambda x, y: x * y
assert abs(f_a(self.x_csr, self.y) - f_b(self.x_csr, self.y)).max() < 10**-4 assert abs(f_a(self.x_csr, self.y) - f_b(self.x_csr, self.y)).max() < 1e-4
def test_csc_dense(self): def test_csc_dense(self):
x = theano.sparse.csc_matrix('x') x = theano.sparse.csc_matrix('x')
y = theano.tensor.matrix('y') y = theano.tensor.matrix('y')
f_a = theano.function([x, y], theano.sparse.dot(x, y)) f_a = theano.function([x, y], theano.sparse.dot(x, y))
f_b = lambda x, y: x * y f_b = lambda x, y: x * y
assert abs(f_a(self.x_csc, self.y) - f_b(self.x_csc, self.y)).max() < 10**-4 assert (abs(f_a(self.x_csc, self.y) - f_b(self.x_csc, self.y)).max()
< 1e-4)
def test_sparse_sparse(self): def test_sparse_sparse(self):
for d1, d2 in [('float32', 'float32'), for d1, d2 in [('float32', 'float32'),
('float32', 'float64'), ('float32', 'float64'),
...@@ -571,7 +574,7 @@ class DotTests(unittest.TestCase): ...@@ -571,7 +574,7 @@ class DotTests(unittest.TestCase):
vx = getattr(self,'x_'+x_f).astype(d1) vx = getattr(self,'x_'+x_f).astype(d1)
vy = getattr(self,'y_'+y_f).astype(d2) vy = getattr(self,'y_'+y_f).astype(d2)
assert abs(f_a(vx, vy) - f_b(vx, vy)).max() < 10**-4 assert abs(f_a(vx, vy) - f_b(vx, vy)).max() < 1e-4
class UsmmTests(unittest.TestCase): class UsmmTests(unittest.TestCase):
...@@ -579,11 +582,11 @@ class UsmmTests(unittest.TestCase): ...@@ -579,11 +582,11 @@ class UsmmTests(unittest.TestCase):
x_size = (10, 200) x_size = (10, 200)
y_size = (200, 2000) y_size = (200, 2000)
z_size = (x_size[0], y_size[1]) z_size = (x_size[0], y_size[1])
self.x = numpy.asarray(numpy.random.binomial(1, 0.5, x_size), dtype=theano.config.floatX) self.x = numpy.asarray(numpy.random.binomial(1, 0.5, x_size), dtype=theano.config.floatX)
self.y = numpy.asarray(numpy.random.uniform(-1, 1, y_size), dtype=theano.config.floatX) self.y = numpy.asarray(numpy.random.uniform(-1, 1, y_size), dtype=theano.config.floatX)
self.z = numpy.asarray(numpy.random.uniform(-1, 1, z_size), dtype=theano.config.floatX) self.z = numpy.asarray(numpy.random.uniform(-1, 1, z_size), dtype=theano.config.floatX)
def test(self): def test(self):
def mat(format, name, dtype): def mat(format, name, dtype):
if format == 'dense': if format == 'dense':
...@@ -591,69 +594,80 @@ class UsmmTests(unittest.TestCase): ...@@ -591,69 +594,80 @@ class UsmmTests(unittest.TestCase):
else: else:
return theano.sparse.matrix(format, name, dtype=dtype) return theano.sparse.matrix(format, name, dtype=dtype)
for dtype1 in ['float32', 'float64']: params = product(*([['float32', 'float64']] * 4 +
for dtype2 in ['float32', 'float64']: [['dense', 'csc', 'csr']] * 2))
for dtype3 in ['float32', 'float64']:
for dtype4 in ['float32', 'float64']: for dtype1, dtype2, dtype3, dtype4, format1, format2 in params:
for format1 in ['dense', 'csc','csr']: if format1 == 'dense' and format2 == 'dense':
for format2 in ['dense', 'csc','csr']: # Usmm won't be used!
if format1 == 'dense' and format2 == 'dense': continue
# Usmm won't be used! x = mat(format1, 'x', dtype1)
continue y = mat(format2, 'y', dtype2)
x = mat(format1, 'x', dtype1) a = theano.tensor.scalar('a', dtype=dtype3)
y = mat(format2, 'y', dtype2) z = theano.tensor.shared(
a = theano.tensor.scalar('a', dtype=dtype3) numpy.asarray(self.z, dtype=dtype4).copy()
z = theano.tensor.shared(numpy.asarray(self.z,dtype=dtype4).copy()) )
f_b = lambda z, a, x, y: z - a * (x * y) f_b = lambda z, a, x, y: z - a * (x * y)
x_data = numpy.asarray(self.x, dtype = dtype1) x_data = numpy.asarray(self.x, dtype=dtype1)
if format1 != 'dense': if format1 != 'dense':
x_data = as_sparse_format(x_data, format1) x_data = as_sparse_format(x_data, format1)
y_data = numpy.asarray(self.y, dtype = dtype2) y_data = numpy.asarray(self.y, dtype=dtype2)
if format2 != 'dense': if format2 != 'dense':
y_data = as_sparse_format(y_data, format2) y_data = as_sparse_format(y_data, format2)
z_data = numpy.asarray(self.z, dtype = dtype3) z_data = numpy.asarray(self.z, dtype=dtype3)
f_b_out = f_b(z_data, 1, x_data, y_data) f_b_out = f_b(z_data, 1, x_data, y_data)
# Can it work inplace? # Can it work inplace?
inplace = dtype4 == theano.scalar.upcast(dtype1, dtype2, dtype3) inplace = dtype4 == theano.scalar.upcast(dtype1, dtype2, dtype3)
# To make it easier to check the toposort # To make it easier to check the toposort
mode = theano.compile.mode.get_default_mode().excluding('fusion') mode = theano.compile.mode.get_default_mode().excluding('fusion')
if inplace: if inplace:
f_a = theano.function([a, x, y], [], updates = {z: z - a * theano.sparse.dot(x, y)}
updates={ z : z - a * theano.sparse.dot(x, y)}, f_a = theano.function([a, x, y], [],
mode = mode) updates=updates,
f_a(1, x_data, y_data) mode=mode)
assert abs(z.get_value(borrow=True) - f_b_out).max() < 10**-4 f_a(1, x_data, y_data)
else: assert abs(z.get_value(borrow=True) - f_b_out).max() < 1e-4
f_a = theano.function([a, x, y], z - a * theano.sparse.dot(x, y), else:
mode = mode) f_a = theano.function([a, x, y],
f_a_out = f_a(1, x_data, y_data) z - a * theano.sparse.dot(x, y),
assert abs(f_a_out - f_b_out).max() < 10**-4 mode=mode)
topo = f_a.maker.env.toposort() f_a_out = f_a(1, x_data, y_data)
if (y.type.dtype == theano.scalar.upcast(dtype1, dtype2, dtype3, dtype4) assert abs(f_a_out - f_b_out).max() < 1e-4
and format1=='csc' and format2=='dense'): topo = f_a.maker.env.toposort()
up = theano.scalar.upcast(dtype1, dtype2, dtype3, dtype4)
assert sum([isinstance(node.op, tensor.Elemwise) and isinstance(node.op.scalar_op, theano.scalar.basic.Cast) for node in topo])==len(topo)-5 if y.type.dtype == up and format1 == 'csc' and format2 == 'dense':
topo = [node for node in topo if not(isinstance(node.op, tensor.Elemwise) and isinstance(node.op.scalar_op, theano.scalar.basic.Cast))] assert (sum([isinstance(node.op, tensor.Elemwise) and
assert len(topo)==5, topo isinstance(node.op.scalar_op,
# Usmm is tested at the same time in debugmode theano.scalar.basic.Cast)
# Check if the optimization local_usmm and local_usmm_csx is applied for node in topo]) == len(topo) - 5)
assert isinstance(topo[0].op, theano.sparse.basic.CSMProperties) new_topo = []
assert isinstance(topo[1].op, theano.tensor.DimShuffle) for node in topo:
assert isinstance(topo[2].op, theano.tensor.Subtensor) if not isinstance(node.op, tensor.Elemwise) and \
assert topo[3].op == theano.tensor.neg isinstance(node.op.scalar_op, theano.scalar.basic.Cast):
assert isinstance(topo[4].op, theano.sparse.UsmmCscDense) new_topo.append(node)
if inplace: topo = new_topo
assert topo[4].op.inplace assert len(topo) == 5, topo
else: # Usmm is tested at the same time in debugmode
assert len(topo)==3, topo # Check if the optimization local_usmm and local_usmm_csx is
assert isinstance(topo[0].op, theano.tensor.DimShuffle) # applied
assert topo[1].op == theano.tensor.neg assert isinstance(topo[0].op,
assert isinstance(topo[2].op, theano.sparse.Usmm) theano.sparse.basic.CSMProperties)
assert isinstance(topo[1].op, theano.tensor.DimShuffle)
assert isinstance(topo[2].op, theano.tensor.Subtensor)
assert topo[3].op == theano.tensor.neg
assert isinstance(topo[4].op, theano.sparse.UsmmCscDense)
if inplace:
assert topo[4].op.inplace
else:
assert len(topo)==3, topo
assert isinstance(topo[0].op, theano.tensor.DimShuffle)
assert topo[1].op == theano.tensor.neg
assert isinstance(topo[2].op, theano.sparse.Usmm)
def test_shape_i(): def test_shape_i():
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论