提交 9a038652 authored 作者: James Bergstra's avatar James Bergstra

moved sparse/true_dot to sandbox because perform doesnt always produce sparse…

moved sparse/true_dot to sandbox because perform doesnt always produce sparse results and no one uses it
上级 5786d6bf
...@@ -652,82 +652,6 @@ def mul(x,y): ...@@ -652,82 +652,6 @@ def mul(x,y):
elif y_is_sparse_result and not x_is_sparse_result: return mul_s_d(y,x) elif y_is_sparse_result and not x_is_sparse_result: return mul_s_d(y,x)
else: raise NotImplementedError() else: raise NotImplementedError()
###############
#
# TrueDot
#
class TrueDot(gof.op.Op):
"""
Attributes:
grad_preserves_dense - a boolean flags [default: True].
grad_preserves_dense controls whether gradients with respect to inputs
are converted to dense matrices when the corresponding input y is
dense (not in a L{SparseResult} wrapper). This is generally a good idea
when L{Dot} is in the middle of a larger graph, because the types
of gy will match that of y. This conversion might be inefficient if
the gradients are graph outputs though, hence this mask.
@todo: Simplify code by splitting into DotSS and DotSD.
"""
def __init__(self, grad_preserves_dense=True):
self.grad_preserves_dense = grad_preserves_dense
def make_node(self, x, y):
"""
Because of trickiness of implementing, we assume that the left argument x is SparseResult (not dense)
"""
if x.type.dtype != y.type.dtype:
raise NotImplementedError()
assert _is_sparse_result(x)
# These are the conversions performed by scipy.sparse.dot
if x.type.format == "csc" or x.type.format == "coo":
myformat = "csc"
elif x.type.format == "csr":
myformat = "csr"
else:
raise NotImplementedError()
inputs = [x, y] # Need to convert? e.g. assparse
outputs = [Sparse(dtype = x.type.dtype, format = myformat).make_result()]
return gof.Apply(self, inputs, outputs)
def perform(self, node, (x, y), (out, )):
"""
@todo: Verify that output is sufficiently sparse, and raise a warning if it is not
@todo: Also determine that we are storing the output in the best storage format?
"""
out[0] = x.dot(y)
def grad(self, (x, y), (gz,)):
assert _is_sparse_result(gz)
assert _is_sparse_result(x)
rval = [true_dot(gz, y.T), true_dot(x.T, gz)]
if _is_dense_result(y):
if self.grad_preserves_dense:
rval[1] = dense_from_sparse(rval[1])
return rval
def __eq__(self, other):
return type(self) == type(other) and self.grad_preserves_dense == other.grad_preserves_dense
def __hash__(self):
return hash(self.grad_preserves_dense)
def true_dot(x, y, grad_preserves_dense=True):
"""
@todo: Maybe the triple-transposition formulation (when x is dense)
is slow. See if there is a direct way to do this.
"""
if hasattr(x, 'getnnz'): x = as_sparse(x)
if hasattr(y, 'getnnz'): y = as_sparse(y)
x_is_sparse_result = _is_sparse_result(x)
y_is_sparse_result = _is_sparse_result(y)
if not x_is_sparse_result and not y_is_sparse_result:
raise TypeError()
if x_is_sparse_result:
return TrueDot(grad_preserves_dense)(x, y)
else:
assert y_is_sparse_result
return transpose(TrueDot(grad_preserves_dense)(y.T, x.T))
############### ###############
# #
# StructuredDot # StructuredDot
......
###############
#
# TrueDot
#
class TrueDot(gof.op.Op):
"""
Attributes:
grad_preserves_dense - a boolean flags [default: True].
grad_preserves_dense controls whether gradients with respect to inputs
are converted to dense matrices when the corresponding input y is
dense (not in a L{SparseResult} wrapper). This is generally a good idea
when L{Dot} is in the middle of a larger graph, because the types
of gy will match that of y. This conversion might be inefficient if
the gradients are graph outputs though, hence this mask.
@todo: Simplify code by splitting into DotSS and DotSD.
"""
def __init__(self, grad_preserves_dense=True):
self.grad_preserves_dense = grad_preserves_dense
def __eq__(self, other):
return type(self) == type(other) and self.grad_preserves_dense == other.grad_preserves_dense
def __hash__(self):
return hash(self.grad_preserves_dense)
def __ne__(self, other):
return not (self == other)
def make_node(self, x, y):
"""
:note: Because of trickiness of implementing, we assume that the left argument x is SparseResult (not dense)
"""
if x.type.dtype != y.type.dtype:
raise NotImplementedError()
if not _is_sparse_result(x):
raise TypeError(x)
# These are the conversions performed by scipy.sparse.dot
if x.type.format == "csc" or x.type.format == "coo":
myformat = "csc"
elif x.type.format == "csr":
myformat = "csr"
else:
raise NotImplementedError()
inputs = [x, y] # Need to convert? e.g. assparse
outputs = [Sparse(dtype = x.type.dtype, format = myformat).make_result()]
return gof.Apply(self, inputs, outputs)
def perform(self, node, (x, y), (out, )):
"""
@todo: Verify that output is sufficiently sparse, and raise a warning if it is not
@todo: Also determine that we are storing the output in the best storage format?
"""
rval = x.dot(y)
out[0] = rval
def grad(self, (x, y), (gz,)):
assert _is_sparse_result(gz)
assert _is_sparse_result(x)
rval = [true_dot(gz, y.T), true_dot(x.T, gz)]
if _is_dense_result(y):
if self.grad_preserves_dense:
rval[1] = dense_from_sparse(rval[1])
return rval
def true_dot(x, y, grad_preserves_dense=True):
"""
@todo: Maybe the triple-transposition formulation (when x is dense)
is slow. See if there is a direct way to do this.
"""
if hasattr(x, 'getnnz'): x = as_sparse(x)
if hasattr(y, 'getnnz'): y = as_sparse(y)
x_is_sparse_result = _is_sparse_result(x)
y_is_sparse_result = _is_sparse_result(y)
if not x_is_sparse_result and not y_is_sparse_result:
raise TypeError()
if x_is_sparse_result:
return TrueDot(grad_preserves_dense)(x, y)
else:
assert y_is_sparse_result
return transpose(TrueDot(grad_preserves_dense)(y.T, x.T))
class test_true_dot(unittest.TestCase):
def setUp(self):
numpy.random.seed(44)
def test_basicSS(self):
for mtype in _mtypes:
x = as_sparse(mtype((500,3)))
x.data[(10, 1)] = 1
x.data[(20, 2)] = 2
self.failUnless(_is_sparse_result(x))
xT = x.T
self.failUnless(_is_sparse_result(xT))
zop = true_dot(x,xT)
self.failUnless(_is_sparse_result(zop))
z = eval_outputs([zop])
self.failUnless(_is_sparse(z))
self.failUnless(z.shape == (500,500))
self.failUnless(type(z) is mtype)
w = mtype((500,500))
w[(10, 10)] = 1
w[(20, 20)] = 4
self.failUnless(z.shape == w.shape)
self.failUnless(type(z) == type(w))
self.failUnless(z.dtype == w.dtype)
#self.failUnless(z == w)
self.failUnless(abs(z-w).nnz == 0)
z = z.todense()
w = w.todense()
self.failUnless((z == w).all() == True)
def test_basicSD(self):
for mtype in _mtypes:
x = as_sparse(mtype((500,3)))
x.data[(10, 1)] = 1
x.data[(20, 2)] = 2
self.failUnless(_is_sparse_result(x))
y = tensor.as_tensor([[1., 2], [3, 4], [2, 1]])
self.failUnless(_is_dense_result(y))
zop = true_dot(x,y)
self.failUnless(_is_sparse_result(zop))
z = eval_outputs([zop])
self.failUnless(_is_sparse(z))
self.failUnless(z.shape == (500,2))
self.failUnless(type(z) is mtype)
w = mtype((500,2))
w[(10, 0)] = 3.
w[(20, 0)] = 4
w[(10, 1)] = 4
w[(20, 1)] = 2
self.failUnless(z.shape == w.shape)
self.failUnless(type(z) == type(w))
self.failUnless(z.dtype == w.dtype)
#self.failUnless(z == w)
self.failUnless(abs(z-w).nnz == 0)
z = z.todense()
w = w.todense()
self.failUnless((z == w).all() == True)
def test_basicDS(self):
for mtype in _mtypes:
x = as_sparse(mtype((500,3)))
x.data[(10, 1)] = 1
x.data[(20, 2)] = 2
self.failUnless(_is_sparse_result(x))
y = tensor.as_tensor([[1., 2], [3, 4], [2, 1]])
self.failUnless(_is_dense_result(y))
x.data = x.data.T
y.data = y.data.T
zop = true_dot(y, x)
zop = transpose(true_dot(y, x))
self.failUnless(_is_sparse_result(zop))
z = eval_outputs([zop])
self.failUnless(_is_sparse(z))
self.failUnless(z.shape == (500,2))
# self.failUnless(type(z) is mtype)
w = mtype((500,2))
w[(10, 0)] = 3.
w[(20, 0)] = 4
w[(10, 1)] = 4
w[(20, 1)] = 2
self.failUnless(z.shape == w.shape)
# Type should switch from csr to csc and vice-versa, so don't perform this test
#self.failUnless(type(z) == type(w))
self.failUnless(z.dtype == w.dtype)
# Type should switch from csr to csc and vice-versa, so don't perform this test
#self.failUnless(z == w)
self.failUnless(abs(z-w).nnz == 0)
z = z.todense()
w = w.todense()
self.failUnless((z == w).all() == True)
def test_graph_bprop0(self):
for mtype in _mtypes:
x = tensor.matrix('x') #Tensor('float64', broadcastable=[False,False], name='x')
w = Sparse(dtype = 'float64', format = _mtype_to_str[mtype]).make_result()
xw = dense_from_sparse(true_dot(w, x))
y = dense_from_sparse(true_dot(w.T, xw))
diff = x-y
loss = tensor.sum(tensor.sqr(diff))
gw = tensor.grad(loss, w)
trainfn = compile.function([x, w], [y, loss, gw])
x = numpy.asarray([[1., 2], [3, 4], [2, 1]])
w = mtype((500,3))
w[(10, 1)] = 1
w[(20, 2)] = 2
lr = 0.001
y, origloss, gw = trainfn(x, w)
for epoch in xrange(50):
y, loss, gw = trainfn(x, w)
w = w - (lr * gw)
print loss
self.failUnless(origloss > loss)
self.failUnless('1.05191241115' == str(loss))
def test_graph_bprop_rand(self):
for i in range(10):
xorig = numpy.random.rand(3,2)
for mtype in _mtypes:
x = tensor.matrix('x')
w = Sparse(dtype = 'float64', format = _mtype_to_str[mtype]).make_result()
xw = dense_from_sparse(true_dot(w, x))
y = dense_from_sparse(true_dot(w.T, xw))
diff = x-y
loss = tensor.sum(tensor.sqr(diff))
gw = tensor.grad(loss, w)
trainfn = compile.function([x, w], [y, loss, gw])
x = xorig
w = mtype((500,3))
w[(10, 1)] = 1
w[(20, 2)] = 2
lr = 0.001
y, origloss, gw = trainfn(x, w)
for epoch in xrange(50):
y, loss, gw = trainfn(x, w)
w = w - (lr * gw)
self.failUnless(origloss > loss)
...@@ -19,7 +19,7 @@ class T_transpose(unittest.TestCase): ...@@ -19,7 +19,7 @@ class T_transpose(unittest.TestCase):
def setUp(self): def setUp(self):
numpy.random.seed(44) numpy.random.seed(44)
def test_transpose_csc(self): def test_transpose_csc(self):
sp = sparse.csc_matrix(sparse.speye(5,3)) sp = sparse.csc_matrix(sparse.eye(5,3))
a = as_sparse(sp) a = as_sparse(sp)
self.failUnless(a.data is sp) self.failUnless(a.data is sp)
self.failUnless(a.data.shape == (5,3)) self.failUnless(a.data.shape == (5,3))
...@@ -32,7 +32,7 @@ class T_transpose(unittest.TestCase): ...@@ -32,7 +32,7 @@ class T_transpose(unittest.TestCase):
vta = eval_outputs([ta]) vta = eval_outputs([ta])
self.failUnless(vta.shape == (3,5)) self.failUnless(vta.shape == (3,5))
def test_transpose_csr(self): def test_transpose_csr(self):
a = as_sparse(sparse.csr_matrix(sparse.speye(5,3))) a = as_sparse(sparse.csr_matrix(sparse.eye(5,3)))
self.failUnless(a.data.shape == (5,3)) self.failUnless(a.data.shape == (5,3))
self.failUnless(a.type.dtype == 'float64') self.failUnless(a.type.dtype == 'float64')
self.failUnless(a.type.format == 'csr') self.failUnless(a.type.format == 'csr')
...@@ -149,163 +149,6 @@ class T_conversion(unittest.TestCase): ...@@ -149,163 +149,6 @@ class T_conversion(unittest.TestCase):
self.failUnless(numpy.all(val[0] == [1,0,0,0,0])) self.failUnless(numpy.all(val[0] == [1,0,0,0,0]))
class test_true_dot(unittest.TestCase):
def setUp(self):
numpy.random.seed(44)
def test_basicSS(self):
for mtype in _mtypes:
x = as_sparse(mtype((500,3)))
x.data[(10, 1)] = 1
x.data[(20, 2)] = 2
self.failUnless(_is_sparse_result(x))
xT = x.T
self.failUnless(_is_sparse_result(xT))
zop = true_dot(x,xT)
self.failUnless(_is_sparse_result(zop))
z = eval_outputs([zop])
self.failUnless(_is_sparse(z))
self.failUnless(z.shape == (500,500))
self.failUnless(type(z) is mtype)
w = mtype((500,500))
w[(10, 10)] = 1
w[(20, 20)] = 4
self.failUnless(z.shape == w.shape)
self.failUnless(type(z) == type(w))
self.failUnless(z.dtype == w.dtype)
#self.failUnless(z == w)
self.failUnless(abs(z-w).nnz == 0)
z = z.todense()
w = w.todense()
self.failUnless((z == w).all() == True)
def test_basicSD(self):
for mtype in _mtypes:
x = as_sparse(mtype((500,3)))
x.data[(10, 1)] = 1
x.data[(20, 2)] = 2
self.failUnless(_is_sparse_result(x))
y = tensor.as_tensor([[1., 2], [3, 4], [2, 1]])
self.failUnless(_is_dense_result(y))
zop = true_dot(x,y)
self.failUnless(_is_sparse_result(zop))
z = eval_outputs([zop])
self.failUnless(_is_sparse(z))
self.failUnless(z.shape == (500,2))
self.failUnless(type(z) is mtype)
w = mtype((500,2))
w[(10, 0)] = 3.
w[(20, 0)] = 4
w[(10, 1)] = 4
w[(20, 1)] = 2
self.failUnless(z.shape == w.shape)
self.failUnless(type(z) == type(w))
self.failUnless(z.dtype == w.dtype)
#self.failUnless(z == w)
self.failUnless(abs(z-w).nnz == 0)
z = z.todense()
w = w.todense()
self.failUnless((z == w).all() == True)
def test_basicDS(self):
for mtype in _mtypes:
x = as_sparse(mtype((500,3)))
x.data[(10, 1)] = 1
x.data[(20, 2)] = 2
self.failUnless(_is_sparse_result(x))
y = tensor.as_tensor([[1., 2], [3, 4], [2, 1]])
self.failUnless(_is_dense_result(y))
x.data = x.data.T
y.data = y.data.T
zop = true_dot(y, x)
zop = transpose(true_dot(y, x))
self.failUnless(_is_sparse_result(zop))
z = eval_outputs([zop])
self.failUnless(_is_sparse(z))
self.failUnless(z.shape == (500,2))
# self.failUnless(type(z) is mtype)
w = mtype((500,2))
w[(10, 0)] = 3.
w[(20, 0)] = 4
w[(10, 1)] = 4
w[(20, 1)] = 2
self.failUnless(z.shape == w.shape)
# Type should switch from csr to csc and vice-versa, so don't perform this test
#self.failUnless(type(z) == type(w))
self.failUnless(z.dtype == w.dtype)
# Type should switch from csr to csc and vice-versa, so don't perform this test
#self.failUnless(z == w)
self.failUnless(abs(z-w).nnz == 0)
z = z.todense()
w = w.todense()
self.failUnless((z == w).all() == True)
def test_graph_bprop0(self):
for mtype in _mtypes:
x = tensor.matrix('x') #Tensor('float64', broadcastable=[False,False], name='x')
w = Sparse(dtype = 'float64', format = _mtype_to_str[mtype]).make_result()
xw = dense_from_sparse(true_dot(w, x))
y = dense_from_sparse(true_dot(w.T, xw))
diff = x-y
loss = tensor.sum(tensor.sqr(diff))
gw = tensor.grad(loss, w)
trainfn = compile.function([x, w], [y, loss, gw])
x = numpy.asarray([[1., 2], [3, 4], [2, 1]])
w = mtype((500,3))
w[(10, 1)] = 1
w[(20, 2)] = 2
lr = 0.001
y, origloss, gw = trainfn(x, w)
for epoch in xrange(50):
y, loss, gw = trainfn(x, w)
w = w - (lr * gw)
print loss
self.failUnless(origloss > loss)
self.failUnless('1.05191241115' == str(loss))
def test_graph_bprop_rand(self):
for i in range(10):
xorig = numpy.random.rand(3,2)
for mtype in _mtypes:
x = tensor.matrix('x')
w = Sparse(dtype = 'float64', format = _mtype_to_str[mtype]).make_result()
xw = dense_from_sparse(true_dot(w, x))
y = dense_from_sparse(true_dot(w.T, xw))
diff = x-y
loss = tensor.sum(tensor.sqr(diff))
gw = tensor.grad(loss, w)
trainfn = compile.function([x, w], [y, loss, gw])
x = xorig
w = mtype((500,3))
w[(10, 1)] = 1
w[(20, 2)] = 2
lr = 0.001
y, origloss, gw = trainfn(x, w)
for epoch in xrange(50):
y, loss, gw = trainfn(x, w)
w = w - (lr * gw)
self.failUnless(origloss > loss)
import scipy.sparse as sp import scipy.sparse as sp
class test_structureddot(unittest.TestCase): class test_structureddot(unittest.TestCase):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论