提交 357efa53 authored 作者: James Bergstra's avatar James Bergstra

replaced MakeVector, VerticalStack and horizontal stacking ops with Join and…

replaced MakeVector, VerticalStack and horizontal stacking ops with Join and Split. Rewrote numeric_grad to work properly with in-place operations and arbitrary-rank tensors
上级 fcc8197e
......@@ -533,64 +533,6 @@ DotTester = make_tester(name = 'DotTester',
# rationale: it's tricky, and necessary everytime you want to verify
# gradient numerically
def verify_grad(testcase, op, pt, n_tests=1, rng=numpy.random, eps=0.0000001, tol=0.0001,
linker='c&py'):
"""testcase.failUnless(analytic gradient matches finite-diff gradient)"""
pt = [numpy.asarray(p) for p in pt]
for test_num in xrange(n_tests):
# tensor_pt = [as_tensor(p,name='input %i'%i) for i,p in enumerate(pt)]
tensor_pt = [constant(p).type('input %i'%i) for i,p in enumerate(pt)]
#o = op.make_node(*[tpt.copy() for tpt in tensor_pt])
o = safe_make_node(op, *[tpt.copy() for tpt in tensor_pt])
if hasattr(o, 'outputs'):
o_outputs = o.outputs
else:
o_outputs = o
if len(o_outputs) > 1:
raise NotImplementedError('cant (yet) autotest gradient of op with multiple outputs')
# we could make loop over outputs making random projections R for each,
# but this doesn't handle the case where not all the outputs are
# differentiable... so I leave this as TODO for now -JB.
o_fn = function(tensor_pt, o_outputs[0], mode=compile.Mode(optimizer = None, linker = linker))
o_fn_out = o_fn(*pt)
random_projection = rng.rand(*o_fn_out.shape)
t_r = as_tensor(random_projection)
#random projection of o onto t_r
cost = sum(t_r * o_outputs[0])
cost_fn = function(tensor_pt, cost, mode=compile.Mode(optimizer = None, linker = linker))
num_grad = gradient.numeric_grad(cost_fn, pt)
symbolic_grad = grad(cost, tensor_pt,as_tensor(1.0,name='g_cost'))
if 0:
print '-------'
print '----------'
for op in gof.graph.io_toposort(tensor_pt, symbolic_grad):
print op
grad_fn = function(tensor_pt, symbolic_grad, mode=compile.Mode(optimizer = None, linker = linker))
analytic_grad = grad_fn(*pt)
if not isinstance(analytic_grad, (list, tuple)):
analytic_grad = [analytic_grad]
# if num_grad.max_err(analytic_grad) > 1.0e-4:
# print "aaaaaaaaaa"
# print gof.Env(tensor_pt, [cost])
# print gof.Env(tensor_pt, symbolic_grad)
# print analytic_grad
# print num_grad.gf
# print num_grad.max_err(analytic_grad)
# print "bbbbbbbbbb"
if num_grad.max_err(analytic_grad) > 1.0e-4:
raise Exception(verify_grad.E_grad)
verify_grad.E_grad = 'gradient error exceeded tolerance'
#useful mostly for unit tests
......@@ -945,29 +887,101 @@ class T_subtensor(unittest.TestCase):
class T_Stack(unittest.TestCase):
def test_hstack(self):
a = as_tensor(numpy.array([[1, 2, 3], [4, 5, 6]]))
b = as_tensor(numpy.array([[7], [8]]))
s = horizontal_stack(a, b)
c = numpy.array([[1, 2, 3, 7], [4, 5, 6, 8]])
self.failUnless((eval_outputs([s]) == c).all())
def test_vstack(self):
a = as_tensor(numpy.array([[1, 2, 3], [4, 5, 6]]))
b = as_tensor(numpy.array([[7, 8, 9]]))
s = vertical_stack(a, b)
c = numpy.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
self.failUnless((eval_outputs([s]) == c).all())
class T_Join_and_Split(unittest.TestCase):
"""
Split is tested by each verify_grad method.
"""
class Join1(Op):
def make_node(self, *inputs):
inputs = [as_tensor(t) for t in inputs]
outputs = [lscalar()] + [i.type() for i in inputs]
return Apply(self, inputs, outputs)
def perform(self, node, inputs, outputs):
outputs[0][0] = 1
for i,o in zip(inputs, outputs[1:]):
o[0] = i.copy()
def grad(self, inputs, g_outputs):
return g_outputs[1:]
def setUp(self):
Join.debug = False
def test_join_scalar(self):
a = as_tensor(1)
b = as_tensor(2)
try:
s = join(0, a, b)
except:
return
self.fail()
def test_stack_scalar(self):
a = as_tensor(1)
b = as_tensor(2)
c = as_tensor(3)
s = stack(a, b, c)
want = numpy.array([1, 2, 3])
self.failUnless((eval_outputs([s]) == want).all())
def test_vstack_grad(self):
def test_join_vector(self):
a = as_tensor(numpy.array([1, 2, 3]))
b = as_tensor(numpy.array([7, 8, 9]))
s = join(0, a, b)
want = numpy.array([1, 2, 3, 7, 8, 9])
self.failUnless((eval_outputs([s]) == want).all())
def test_stack_vector(self):
a = as_tensor(numpy.array([1, 2, 3]))
b = as_tensor(numpy.array([7, 8, 9]))
s = stack(a, b)
want = numpy.array([[1, 2, 3],[ 7, 8, 9]])
self.failUnless((eval_outputs([s]) == want).all())
def test_join_matrix0(self):
a = as_tensor(numpy.array([[1, 2, 3], [4, 5, 6]]))
b = as_tensor(numpy.array([[7, 8, 9]]))
s = vertical_stack(a, b)
ga,gb = grad(sum(vertical_stack(a,b)), [a,b])
s = join(0, a, b)
want = numpy.array([[1, 2, 3],[4,5,6],[7, 8, 9]])
self.failUnless((eval_outputs([s]) == want).all())
def test_join_matrix1(self):
av=numpy.array([[1, 2, 3], [4, 5, 6]], dtype='float32')
bv= numpy.array([[7], [8]],dtype='float32')
a = as_tensor(av)
b = as_tensor(bv)
s = join(1, a, b)
want = numpy.array([[1, 2, 3, 7], [4, 5, 6, 8]], dtype='float32')
self.failUnless((eval_outputs([s]) == want).all())
verify_grad(self, lambda a, b: join(1,a,b), [av, bv], eps=1.0e-4, tol=1.0e-3)
def test_join_matrixV(self):
"""variable join axis"""
v = numpy.array([[1., 2., 3.], [4., 5., 6.]])
a = as_tensor(v.copy())
b = as_tensor(v.copy())
ax = lscalar()
s = join(ax, a, b)
f = function([ax], [s])
want = numpy.array([[1, 2, 3], [4, 5, 6] ,[1, 2, 3], [4, 5, 6]])
got = f(0)
self.failUnless((got == want).all(), (got, want))
want = numpy.array([[ 1, 2, 3, 1, 2, 3], [4, 5, 6, 4, 5, 6]])
got = f(1)
self.failUnless((got == want).all(), (got, want))
verify_grad(self, lambda a, b: join(0,a,b), [v, 2*v])
verify_grad(self, lambda a, b: join(1,a,b), [v, 2*v])
gval = eval_outputs([ga, gb])
self.failUnless(numpy.all(gval[0] == 1.0))
self.failUnless(numpy.all(gval[1] == 1.0))
class _test_comparison(unittest.TestCase):
......@@ -1761,10 +1775,10 @@ class T_op_cache(unittest.TestCase):
self.failUnless(numpy.all(fn_py(a) == fn_c_or_py(a)))
if __name__ == '__main__':
if 1:
if 0:
unittest.main()
else:
testcase = t_dot
testcase = AbsInplaceTester
suite = unittest.TestLoader()
suite = suite.loadTestsFromTestCase(testcase)
......
"""Convenient driver of graph construction, optimization, and linking."""
import copy_reg
import cPickle
from functools import partial
import numpy
import gof
import sys
from copy import copy
import tensor_opt
def check_equal(x, y):
"""
......@@ -57,6 +60,12 @@ predefined_linkers = {
default_linker = 'c|py'
def register_linker(name, linker):
"""Add a `Linker` which can be referred to by `name` in `Mode`."""
if name in predefined_linkers:
raise ValueError('Linker name already taken: %s' % name)
predefined_linkers[name] = linker
# If a string is passed as the optimizer argument in the constructor
# for Mode, it will be used as the key to retrieve the real optimizer
......@@ -64,13 +73,15 @@ default_linker = 'c|py'
predefined_optimizers = {
None : lambda env: None,
'merge' : gof.MergeOptimizer(),
'math' : gof.MergeOptMerge(
gof.PureThenInplaceOptimizer(tensor_opt.math_optimizer,
tensor_opt.inplace_optimizer))
}
default_optimizer = 'merge'
def register_optimizer(name, opt):
"""Add a `Optimizer` which can be referred to by `name` in `Mode`."""
if name in predefined_optimizers:
raise ValueError('Optimizer name already taken: %s' % name)
predefined_optimizers[name] = opt
class Mode(object):
"""
......@@ -110,15 +121,14 @@ class Mode(object):
# If a string is passed as the mode argument in function or
# FunctionMaker, the Mode will be taken from this dictionary using the
# string as the key
predefined_modes = {
'SANITY_CHECK' : Mode('c&py', 'math'),
'FAST_COMPILE' : Mode('py', 'merge'),
'FAST_RUN' : Mode('c|py', 'math'),
'EXPENSIVE_OPTIMIZATIONS' : Mode('c|py', 'math'),
}
default_mode = 'FAST_RUN'
predefined_modes = {'FAST_COMPILE': Mode('py', 'merge')}
default_mode = 'FAST_COMPILE'
def register_mode(name, mode):
"""Add a `Mode` which can be referred to by `name` in `function`."""
if name in predefined_modes:
raise ValueError('Mode name already taken: %s' % name)
predefined_modes[name] = mode
......@@ -508,9 +518,6 @@ class FunctionMaker(object):
return fn
import copy_reg
import cPickle
def _pickle_FunctionMaker(fm):
return (_constructor_FunctionMaker, (fm.inputs, fm.outputs, fm.mode, fm.accept_inplace))
......@@ -527,8 +534,6 @@ copy_reg.pickle(slice, _pickle_slice)
from functools import partial
DUPLICATE = ['DUPLICATE'] # unique id object used as a placeholder for duplicate entries
class Function(object):
......
......@@ -110,62 +110,4 @@ def grad_sources_inputs(sources, graph_inputs):
gmap[r] = g_r
return gmap
class numeric_grad:
def __init__(self, f, pt, eps=1.0e-7):
"""Return the gradient of f at pt.
This function computes the gradient by a one-sided finite differences of a
fixed step size (eps).
It is assumed that f(...) will return a scalar.
It is assumed that all f's inputs are numpy.ndarray objects.
"""
gf = [numpy.ndarray(x.shape) for x in pt]
f_pt = f(*pt)
if isinstance(f, (list, tuple)):
f_pt = [numpy.copy(x) for x in f_pt]
else:
f_pt = numpy.copy(f_pt)
for idx in xrange(len(gf)):
if len(pt[idx].shape) == 0:
orig = pt[idx]
pt[idx] = numpy.asarray(pt[idx] + eps)
f_eps = f(*pt)
gf[idx] = numpy.asarray((f_eps - f_pt)/eps)
pt[idx] = orig
elif len(pt[idx].shape) == 1:
for i in xrange(pt[idx].shape[0]):
orig = pt[idx][i]
pt[idx][i] = pt[idx][i] + eps
f_eps = f(*pt)
gf[idx][i] = numpy.asarray((f_eps - f_pt)/eps)
pt[idx][i] = orig
elif len(pt[idx].shape) == 2:
for i in xrange(pt[idx].shape[0]):
for j in xrange(pt[idx].shape[1]):
orig = pt[idx][i,j]
pt[idx][i,j] = pt[idx][i,j] + eps
f_eps = f(*pt)
gf[idx][i,j] = numpy.asarray((f_eps - f_pt)/eps)
pt[idx][i,j] = orig
else:
raise NotImplementedError()
self.gf = gf
@staticmethod
def abs_rel_err(a,b,eps=1.0e-10):
"""Return a small number when a and b are close, relative to how big they are"""
return abs(a-b) / (abs(a)+abs(b)+eps)
def max_err(self, g_pt):
"""Return the biggest relative error between g_pt and self.gf"""
assert len(g_pt) == len(self.gf)
errs = []
for a, b in zip(g_pt, self.gf):
errs.append(numpy.max(numeric_grad.abs_rel_err(a,b)))
return max(errs)
差异被折叠。
......@@ -8,6 +8,7 @@ import numpy as N
import operator
import itertools
import sys
import compile #to register the optimizer built by this file
# Utilities
......@@ -32,9 +33,10 @@ gemm_pattern_1 = gof.PatternSub((T._sub_inplace,
# gemm: (d,a,b,c,s) -> d = d*s + a*dot(b,c)
# Transforms dot(a, b) into gemm(zeros(2)(hstack(shape(a)[:1], shape(b)[1:])), 1.0, a, b, 1.0)
# The construction of the 'gemm' node may fail if, for example, a and b are not both matrices.
dot_to_gemm = gof.PatternSub((T.dot, 'a', 'b'),
(T.gemm, (T.Zeros(2),
(T.vertical_stack,
(T.stack,
(T.Subtensor([slice(0, 1)]), (T.shape, 'a')),
(T.Subtensor([slice(1, 2)]), (T.shape, 'b')))),
T.constant(1.0), 'a', 'b', T.constant(1.0)),
......@@ -231,7 +233,15 @@ def local_subtensor_make_vector(node):
If the index or slice is constant.
"""
if not opt.check_chain(node, T.Subtensor, T.MakeVector):
if not opt.check_chain(node, T.Subtensor, T.Join):
return False
joined_r = node.inputs[0]
try:
#check that join is being used to join scalars
veclen = T.join.vec_length(joined_r)
except:
return False
idxlist = node.op.idx_list
......@@ -644,6 +654,16 @@ def _math_optimizer():
math_optimizer = _math_optimizer()
compile.register_optimizer('math',
gof.MergeOptMerge(
gof.PureThenInplaceOptimizer(
math_optimizer,
inplace_optimizer)))
compile.register_mode('SANITY_CHECK', compile.Mode('c&py', 'math'))
compile.register_mode('FAST_RUN', compile.Mode('c|py', 'math'))
compile.register_mode('EXPENSIVE_OPTIMIZATIONS', compile.Mode('c|py', 'math'))
# @gof.local_optimizer
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论