* fixed gradient of CSM to support "kernel map"

* bug fix: force todense of sparse matrices in check_equal, as two sparse matrices are never equal in scipy 0.6 * numeric_grad no longer cares that all parameters are of the same type * new Reshape optimization (@todo: reshape-flatten optimization)
上级 5c05a90c
import numpy import numpy
import scipy.sparse as sp
from .. import gof from .. import gof
def check_equal(x, y): def check_equal(x, y):
...@@ -8,6 +9,13 @@ def check_equal(x, y): ...@@ -8,6 +9,13 @@ def check_equal(x, y):
shape if x and y are numpy.ndarray instances). Used internally. shape if x and y are numpy.ndarray instances). Used internally.
""" """
x, y = x[0], y[0] x, y = x[0], y[0]
# TODO: bug in current scipy, two sparse matrices are never equal, remove when moving to 0.7
if sp.issparse(x):
x = x.todense()
if sp.issparse(y):
y = y.todense()
if isinstance(x, numpy.ndarray) and isinstance(y, numpy.ndarray): if isinstance(x, numpy.ndarray) and isinstance(y, numpy.ndarray):
if x.dtype != y.dtype or x.shape != y.shape or numpy.any(abs(x - y) > 1e-10): if x.dtype != y.dtype or x.shape != y.shape or numpy.any(abs(x - y) > 1e-10):
raise Exception("Output mismatch.", {'performlinker': x, 'clinker': y}) raise Exception("Output mismatch.", {'performlinker': x, 'clinker': y})
......
...@@ -205,20 +205,12 @@ class CSMProperties(gof.Op): ...@@ -205,20 +205,12 @@ class CSMProperties(gof.Op):
self.map = map self.map = map
def make_node(self, csm): def make_node(self, csm):
print '******* sp:CSMProperties:make_node *******'
csm = as_sparse(csm) csm = as_sparse(csm)
data = tensor.Tensor(dtype=csm.type.dtype, broadcastable = (False,)).make_result() data = tensor.Tensor(dtype=csm.type.dtype, broadcastable = (False,)).make_result()
return gof.Apply(self, [csm], return gof.Apply(self, [csm],
[data, tensor.ivector(), tensor.ivector(), tensor.ivector()]) [data, tensor.ivector(), tensor.ivector(), tensor.ivector()])
def perform(self, node, (csm,), out): def perform(self, node, (csm,), out):
if 0:
print '******* sp:CSMProperties:perform *******'
print 'self.map = ', self.map
print 'csm.data = ', csm.data
print 'size(csm.data) = ', numpy.size(csm.data)
print 'csm.todense.shape = ', csm.todense().shape
print 'type(csm) = ', type(csm)
out[0][0] = csm.data if self.map is None else csm.data[self.map] out[0][0] = csm.data if self.map is None else csm.data[self.map]
out[1][0] = numpy.asarray(csm.indices, dtype='int32') out[1][0] = numpy.asarray(csm.indices, dtype='int32')
out[2][0] = numpy.asarray(csm.indptr, dtype='int32') out[2][0] = numpy.asarray(csm.indptr, dtype='int32')
...@@ -226,16 +218,15 @@ class CSMProperties(gof.Op): ...@@ -226,16 +218,15 @@ class CSMProperties(gof.Op):
# TODO FIX THIS # TODO FIX THIS
def grad(self, (csm,), g): def grad(self, (csm,), g):
print '******* sp:CSMProperties:grad *******'
assert [gg is None for gg in g[1:]] assert [gg is None for gg in g[1:]]
data, indices, indptr, shape = csm_properties(csm, self.map) data, indices, indptr, shape = csm_properties(csm)
if csm.format == 'csc': if csm.format == 'csc':
return [CSM('csc',self.map)(g_data, indices, indptr, shape)] return [CSM('csc')(g_data, indices, indptr, shape)]
else: else:
return [CSR('csm',self.map)(g_data, indices, indptr, shape)] return [CSR('csm')(g_data, indices, indptr, shape)]
def csm_properties(csm, map=None): return CSMProperties(map)(csm) def csm_properties(csm): return CSMProperties()(csm)
def csm_data(csm,map=None): return csm_properties(csm,map)[0] def csm_data(csm): return csm_properties(csm)[0]
def csm_indices(csm): return csm_properties(csm)[1] def csm_indices(csm): return csm_properties(csm)[1]
def csm_indptr(csm): return csm_properties(csm)[2] def csm_indptr(csm): return csm_properties(csm)[2]
def csm_shape(csm): return csm_properties(csm)[3] def csm_shape(csm): return csm_properties(csm)[3]
...@@ -251,7 +242,7 @@ class CSM(gof.Op): ...@@ -251,7 +242,7 @@ class CSM(gof.Op):
self.format = format self.format = format
# for efficiency, if remap does nothing, then do not apply it # for efficiency, if remap does nothing, then do not apply it
if map is not None and all(map==N.arange(N.size(map))): if map is not None and all(map==numpy.arange(numpy.size(map))):
map = None map = None
self.map = map self.map = map
...@@ -296,15 +287,7 @@ class CSM(gof.Op): ...@@ -296,15 +287,7 @@ class CSM(gof.Op):
def perform(self, node, (data, indices, indptr, shape), (out,)): def perform(self, node, (data, indices, indptr, shape), (out,)):
"""Build a csc_matrix""" """Build a csc_matrix"""
#assert len(data.flatten()) == len(indices.flatten()) #assert len(data.flatten()) == len(indices.flatten())
if 0:
print '********** sp:CSM:perform ***********'
print 'data =', data.__repr__()
print 'size(data) = ', numpy.size(data)
print 'kmap =', self.map.__repr__()
data = data[self.map] if self.map!=None else data data = data[self.map] if self.map!=None else data
if 0:
print 'data[kmap] =', data.__repr__()
if len(shape) != 2: if len(shape) != 2:
raise ValueError('Shape should be an array of length 2') raise ValueError('Shape should be an array of length 2')
...@@ -321,18 +304,30 @@ class CSM(gof.Op): ...@@ -321,18 +304,30 @@ class CSM(gof.Op):
shape.copy(), shape.copy(),
copy = False #1000*len(data.flatten()) copy = False #1000*len(data.flatten())
) )
if 0:
print 'out[0] = ', out[0].todense().__repr__()
def grad(self, input, (g_out,)): def grad(self, (data, indices, indptr, shape), (g_out,)):
"""Return a gradient on the data vector""" """Return a gradient on the data vector"""
#unpack the data vector and wrap it as a 1d Tensor #unpack the data vector and wrap it as a 1d Tensor
return [csm_data(g_out,self.map), None, None, None] g_data = csm_grad(self.map)(data, csm_data(g_out),csm_indices(g_out))
return [g_data, None, None, None]
CSC = CSM('csc') CSC = CSM('csc')
CSR = CSM('csr') CSR = CSM('csr')
class CSMGrad(gof.op.Op):
def __init__(self, map=None):
self.map = map
def make_node(self, data, gout_data, gout_indices):
g_data = data.type()
return gof.Apply(self, [data, gout_data, gout_indices], [g_data])
def perform(self, node, (data, gout_data, gout_indices), (g_data,)):
grad = numpy.zeros_like(data)
grad[self.map] = gout_data
g_data[0] = grad
csm_grad = CSMGrad
@gof.local_optimizer([csm_properties]) @gof.local_optimizer([csm_properties])
def skip_pack_csc01(node): def skip_pack_csc01(node):
"""if we find csm_properties(CSM(*args)), then we can replace that with the *args """if we find csm_properties(CSM(*args)), then we can replace that with the *args
......
...@@ -2156,8 +2156,10 @@ class numeric_grad: ...@@ -2156,8 +2156,10 @@ class numeric_grad:
shapes = [p.shape for p in apt] shapes = [p.shape for p in apt]
dtypes = [str(p.dtype) for p in apt] dtypes = [str(p.dtype) for p in apt]
if not dtypes == [dtypes[0]] * len(apt): # TODO: remove this eventually (why was this here in the first place ?)
raise TypeError('All function arguments must have same dtype') # In the case of CSM, the arguments are a mixture of floats and integers...
#if not dtypes == [dtypes[0]] * len(apt):
#raise TypeError('All function arguments must have same dtype')
total_size = __builtin__.sum(prod(sh) for sh in shapes) total_size = __builtin__.sum(prod(sh) for sh in shapes)
...@@ -2214,8 +2216,8 @@ def verify_grad(testcase, op, pt, n_tests=1, rng=numpy.random, eps=1.0e-7, tol=0 ...@@ -2214,8 +2216,8 @@ def verify_grad(testcase, op, pt, n_tests=1, rng=numpy.random, eps=1.0e-7, tol=0
testcase.failUnless(analytic gradient matches finite-diff gradient) testcase.failUnless(analytic gradient matches finite-diff gradient)
:param pt: the list of numpy.ndarrays to use as inputs to the op :param pt: the list of numpy.ndarrays to use as inputs to the op
:param op: something that behaves like an Op instance with a single output (can be a :param op: something that behaves like an Op instance with a single output
function) (can be a python function combining multiple ops)
:param testcase: the thing to call `fail` on if things go awry. :param testcase: the thing to call `fail` on if things go awry.
""" """
...@@ -2264,7 +2266,7 @@ def verify_grad(testcase, op, pt, n_tests=1, rng=numpy.random, eps=1.0e-7, tol=0 ...@@ -2264,7 +2266,7 @@ def verify_grad(testcase, op, pt, n_tests=1, rng=numpy.random, eps=1.0e-7, tol=0
#print "PT D", pt #print "PT D", pt
analytic_grad = grad_fn(*pt) analytic_grad = grad_fn(*pt)
#print "PT Z", pt #print "PT Z", pt
if not isinstance(analytic_grad, (list, tuple)): if not isinstance(analytic_grad, (list, tuple)):
analytic_grad = [analytic_grad] analytic_grad = [analytic_grad]
...@@ -2277,4 +2279,3 @@ def verify_grad(testcase, op, pt, n_tests=1, rng=numpy.random, eps=1.0e-7, tol=0 ...@@ -2277,4 +2279,3 @@ def verify_grad(testcase, op, pt, n_tests=1, rng=numpy.random, eps=1.0e-7, tol=0
verify_grad.E_grad = 'gradient error exceeded tolerance' verify_grad.E_grad = 'gradient error exceeded tolerance'
"""This error is raised when a gradient is calculated, but incorrect.""" """This error is raised when a gradient is calculated, but incorrect."""
...@@ -285,6 +285,23 @@ def local_inplace_setsubtensor(node): ...@@ -285,6 +285,23 @@ def local_inplace_setsubtensor(node):
return False return False
compile.optdb.register('inplace_setsubtensor', TopoOptimizer(local_inplace_setsubtensor), 60, 'fast_run', 'inplace') #DEBUG compile.optdb.register('inplace_setsubtensor', TopoOptimizer(local_inplace_setsubtensor), 60, 'fast_run', 'inplace') #DEBUG
##################
# Reshape opts #
##################
@gof.local_optimizer([None, None])
def local_reshape_chain(node):
"""
Reshape(Reshape(shape1),shape2) -> Reshape(shape2)
"""
if not opt.check_chain(node, T.Reshape, T.Reshape):
return False
return [node.op(node.inputs[0].owner.inputs[0], node.inputs[1])]
register_canonicalize(local_reshape_chain)
################## ##################
# Middleman cuts # # Middleman cuts #
################## ##################
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论