提交 357efa53 authored 作者: James Bergstra's avatar James Bergstra

replaced MakeVector, VerticalStack and horizontal stacking ops with Join and…

replaced MakeVector, VerticalStack and horizontal stacking ops with Join and Split. Rewrote numeric_grad to work properly with in-place operations and arbitrary-rank tensors
上级 fcc8197e
......@@ -533,64 +533,6 @@ DotTester = make_tester(name = 'DotTester',
# rationale: it's tricky, and necessary everytime you want to verify
# gradient numerically
def verify_grad(testcase, op, pt, n_tests=1, rng=numpy.random, eps=0.0000001, tol=0.0001,
linker='c&py'):
"""testcase.failUnless(analytic gradient matches finite-diff gradient)"""
pt = [numpy.asarray(p) for p in pt]
for test_num in xrange(n_tests):
# tensor_pt = [as_tensor(p,name='input %i'%i) for i,p in enumerate(pt)]
tensor_pt = [constant(p).type('input %i'%i) for i,p in enumerate(pt)]
#o = op.make_node(*[tpt.copy() for tpt in tensor_pt])
o = safe_make_node(op, *[tpt.copy() for tpt in tensor_pt])
if hasattr(o, 'outputs'):
o_outputs = o.outputs
else:
o_outputs = o
if len(o_outputs) > 1:
raise NotImplementedError('cant (yet) autotest gradient of op with multiple outputs')
# we could make loop over outputs making random projections R for each,
# but this doesn't handle the case where not all the outputs are
# differentiable... so I leave this as TODO for now -JB.
o_fn = function(tensor_pt, o_outputs[0], mode=compile.Mode(optimizer = None, linker = linker))
o_fn_out = o_fn(*pt)
random_projection = rng.rand(*o_fn_out.shape)
t_r = as_tensor(random_projection)
#random projection of o onto t_r
cost = sum(t_r * o_outputs[0])
cost_fn = function(tensor_pt, cost, mode=compile.Mode(optimizer = None, linker = linker))
num_grad = gradient.numeric_grad(cost_fn, pt)
symbolic_grad = grad(cost, tensor_pt,as_tensor(1.0,name='g_cost'))
if 0:
print '-------'
print '----------'
for op in gof.graph.io_toposort(tensor_pt, symbolic_grad):
print op
grad_fn = function(tensor_pt, symbolic_grad, mode=compile.Mode(optimizer = None, linker = linker))
analytic_grad = grad_fn(*pt)
if not isinstance(analytic_grad, (list, tuple)):
analytic_grad = [analytic_grad]
# if num_grad.max_err(analytic_grad) > 1.0e-4:
# print "aaaaaaaaaa"
# print gof.Env(tensor_pt, [cost])
# print gof.Env(tensor_pt, symbolic_grad)
# print analytic_grad
# print num_grad.gf
# print num_grad.max_err(analytic_grad)
# print "bbbbbbbbbb"
if num_grad.max_err(analytic_grad) > 1.0e-4:
raise Exception(verify_grad.E_grad)
verify_grad.E_grad = 'gradient error exceeded tolerance'
#useful mostly for unit tests
......@@ -945,29 +887,101 @@ class T_subtensor(unittest.TestCase):
class T_Stack(unittest.TestCase):
def test_hstack(self):
a = as_tensor(numpy.array([[1, 2, 3], [4, 5, 6]]))
b = as_tensor(numpy.array([[7], [8]]))
s = horizontal_stack(a, b)
c = numpy.array([[1, 2, 3, 7], [4, 5, 6, 8]])
self.failUnless((eval_outputs([s]) == c).all())
def test_vstack(self):
a = as_tensor(numpy.array([[1, 2, 3], [4, 5, 6]]))
b = as_tensor(numpy.array([[7, 8, 9]]))
s = vertical_stack(a, b)
c = numpy.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
self.failUnless((eval_outputs([s]) == c).all())
class T_Join_and_Split(unittest.TestCase):
"""
Split is tested by each verify_grad method.
"""
class Join1(Op):
def make_node(self, *inputs):
inputs = [as_tensor(t) for t in inputs]
outputs = [lscalar()] + [i.type() for i in inputs]
return Apply(self, inputs, outputs)
def perform(self, node, inputs, outputs):
outputs[0][0] = 1
for i,o in zip(inputs, outputs[1:]):
o[0] = i.copy()
def grad(self, inputs, g_outputs):
return g_outputs[1:]
def setUp(self):
Join.debug = False
def test_join_scalar(self):
a = as_tensor(1)
b = as_tensor(2)
try:
s = join(0, a, b)
except:
return
self.fail()
def test_stack_scalar(self):
a = as_tensor(1)
b = as_tensor(2)
c = as_tensor(3)
s = stack(a, b, c)
want = numpy.array([1, 2, 3])
self.failUnless((eval_outputs([s]) == want).all())
def test_vstack_grad(self):
def test_join_vector(self):
a = as_tensor(numpy.array([1, 2, 3]))
b = as_tensor(numpy.array([7, 8, 9]))
s = join(0, a, b)
want = numpy.array([1, 2, 3, 7, 8, 9])
self.failUnless((eval_outputs([s]) == want).all())
def test_stack_vector(self):
a = as_tensor(numpy.array([1, 2, 3]))
b = as_tensor(numpy.array([7, 8, 9]))
s = stack(a, b)
want = numpy.array([[1, 2, 3],[ 7, 8, 9]])
self.failUnless((eval_outputs([s]) == want).all())
def test_join_matrix0(self):
a = as_tensor(numpy.array([[1, 2, 3], [4, 5, 6]]))
b = as_tensor(numpy.array([[7, 8, 9]]))
s = vertical_stack(a, b)
ga,gb = grad(sum(vertical_stack(a,b)), [a,b])
s = join(0, a, b)
want = numpy.array([[1, 2, 3],[4,5,6],[7, 8, 9]])
self.failUnless((eval_outputs([s]) == want).all())
def test_join_matrix1(self):
av=numpy.array([[1, 2, 3], [4, 5, 6]], dtype='float32')
bv= numpy.array([[7], [8]],dtype='float32')
a = as_tensor(av)
b = as_tensor(bv)
s = join(1, a, b)
want = numpy.array([[1, 2, 3, 7], [4, 5, 6, 8]], dtype='float32')
self.failUnless((eval_outputs([s]) == want).all())
verify_grad(self, lambda a, b: join(1,a,b), [av, bv], eps=1.0e-4, tol=1.0e-3)
def test_join_matrixV(self):
"""variable join axis"""
v = numpy.array([[1., 2., 3.], [4., 5., 6.]])
a = as_tensor(v.copy())
b = as_tensor(v.copy())
ax = lscalar()
s = join(ax, a, b)
f = function([ax], [s])
want = numpy.array([[1, 2, 3], [4, 5, 6] ,[1, 2, 3], [4, 5, 6]])
got = f(0)
self.failUnless((got == want).all(), (got, want))
want = numpy.array([[ 1, 2, 3, 1, 2, 3], [4, 5, 6, 4, 5, 6]])
got = f(1)
self.failUnless((got == want).all(), (got, want))
verify_grad(self, lambda a, b: join(0,a,b), [v, 2*v])
verify_grad(self, lambda a, b: join(1,a,b), [v, 2*v])
gval = eval_outputs([ga, gb])
self.failUnless(numpy.all(gval[0] == 1.0))
self.failUnless(numpy.all(gval[1] == 1.0))
class _test_comparison(unittest.TestCase):
......@@ -1761,10 +1775,10 @@ class T_op_cache(unittest.TestCase):
self.failUnless(numpy.all(fn_py(a) == fn_c_or_py(a)))
if __name__ == '__main__':
if 1:
if 0:
unittest.main()
else:
testcase = t_dot
testcase = AbsInplaceTester
suite = unittest.TestLoader()
suite = suite.loadTestsFromTestCase(testcase)
......
"""Convenient driver of graph construction, optimization, and linking."""
import copy_reg
import cPickle
from functools import partial
import numpy
import gof
import sys
from copy import copy
import tensor_opt
def check_equal(x, y):
"""
......@@ -57,6 +60,12 @@ predefined_linkers = {
default_linker = 'c|py'
def register_linker(name, linker):
"""Add a `Linker` which can be referred to by `name` in `Mode`."""
if name in predefined_linkers:
raise ValueError('Linker name already taken: %s' % name)
predefined_linkers[name] = linker
# If a string is passed as the optimizer argument in the constructor
# for Mode, it will be used as the key to retrieve the real optimizer
......@@ -64,13 +73,15 @@ default_linker = 'c|py'
predefined_optimizers = {
None : lambda env: None,
'merge' : gof.MergeOptimizer(),
'math' : gof.MergeOptMerge(
gof.PureThenInplaceOptimizer(tensor_opt.math_optimizer,
tensor_opt.inplace_optimizer))
}
default_optimizer = 'merge'
def register_optimizer(name, opt):
"""Add a `Optimizer` which can be referred to by `name` in `Mode`."""
if name in predefined_optimizers:
raise ValueError('Optimizer name already taken: %s' % name)
predefined_optimizers[name] = opt
class Mode(object):
"""
......@@ -110,15 +121,14 @@ class Mode(object):
# If a string is passed as the mode argument in function or
# FunctionMaker, the Mode will be taken from this dictionary using the
# string as the key
predefined_modes = {
'SANITY_CHECK' : Mode('c&py', 'math'),
'FAST_COMPILE' : Mode('py', 'merge'),
'FAST_RUN' : Mode('c|py', 'math'),
'EXPENSIVE_OPTIMIZATIONS' : Mode('c|py', 'math'),
}
default_mode = 'FAST_RUN'
predefined_modes = {'FAST_COMPILE': Mode('py', 'merge')}
default_mode = 'FAST_COMPILE'
def register_mode(name, mode):
"""Add a `Mode` which can be referred to by `name` in `function`."""
if name in predefined_modes:
raise ValueError('Mode name already taken: %s' % name)
predefined_modes[name] = mode
......@@ -508,9 +518,6 @@ class FunctionMaker(object):
return fn
import copy_reg
import cPickle
def _pickle_FunctionMaker(fm):
return (_constructor_FunctionMaker, (fm.inputs, fm.outputs, fm.mode, fm.accept_inplace))
......@@ -527,8 +534,6 @@ copy_reg.pickle(slice, _pickle_slice)
from functools import partial
DUPLICATE = ['DUPLICATE'] # unique id object used as a placeholder for duplicate entries
class Function(object):
......
......@@ -110,62 +110,4 @@ def grad_sources_inputs(sources, graph_inputs):
gmap[r] = g_r
return gmap
class numeric_grad:
def __init__(self, f, pt, eps=1.0e-7):
"""Return the gradient of f at pt.
This function computes the gradient by a one-sided finite differences of a
fixed step size (eps).
It is assumed that f(...) will return a scalar.
It is assumed that all f's inputs are numpy.ndarray objects.
"""
gf = [numpy.ndarray(x.shape) for x in pt]
f_pt = f(*pt)
if isinstance(f, (list, tuple)):
f_pt = [numpy.copy(x) for x in f_pt]
else:
f_pt = numpy.copy(f_pt)
for idx in xrange(len(gf)):
if len(pt[idx].shape) == 0:
orig = pt[idx]
pt[idx] = numpy.asarray(pt[idx] + eps)
f_eps = f(*pt)
gf[idx] = numpy.asarray((f_eps - f_pt)/eps)
pt[idx] = orig
elif len(pt[idx].shape) == 1:
for i in xrange(pt[idx].shape[0]):
orig = pt[idx][i]
pt[idx][i] = pt[idx][i] + eps
f_eps = f(*pt)
gf[idx][i] = numpy.asarray((f_eps - f_pt)/eps)
pt[idx][i] = orig
elif len(pt[idx].shape) == 2:
for i in xrange(pt[idx].shape[0]):
for j in xrange(pt[idx].shape[1]):
orig = pt[idx][i,j]
pt[idx][i,j] = pt[idx][i,j] + eps
f_eps = f(*pt)
gf[idx][i,j] = numpy.asarray((f_eps - f_pt)/eps)
pt[idx][i,j] = orig
else:
raise NotImplementedError()
self.gf = gf
@staticmethod
def abs_rel_err(a,b,eps=1.0e-10):
"""Return a small number when a and b are close, relative to how big they are"""
return abs(a-b) / (abs(a)+abs(b)+eps)
def max_err(self, g_pt):
"""Return the biggest relative error between g_pt and self.gf"""
assert len(g_pt) == len(self.gf)
errs = []
for a, b in zip(g_pt, self.gf):
errs.append(numpy.max(numeric_grad.abs_rel_err(a,b)))
return max(errs)
......@@ -2,6 +2,7 @@
__docformat__ = "restructuredtext en"
import __builtin__
import sys # for sys.maxint
import inspect
import functools
......@@ -20,6 +21,8 @@ import elemwise
import scalar as scal
from gof.python25 import partial
import compile
### set up the external interface
from elemwise import Elemwise, DimShuffle, CAReduce, Sum
......@@ -91,7 +94,7 @@ def constant(x):
except:
raise TypeError("Could not convert %s to Tensor" % x, type(x))
def value(x):
def value(x, name=None):
"""Return a symbolic `Value` with default value `x`
:Exceptions:
......@@ -102,8 +105,12 @@ def value(x):
else:
x_ = numpy.asarray(x)
try:
return TensorValue(Tensor(dtype = x_.dtype,
if name is None:
return TensorValue(Tensor(dtype = x_.dtype,
broadcastable = [d == 1 for d in x_.shape]), x_)
else:
return TensorValue(Tensor(dtype = x_.dtype,
broadcastable = [d == 1 for d in x_.shape]), x_, name=name)
except:
raise TypeError("Could not convert %s to Tensor" % x, type(x))
......@@ -203,7 +210,7 @@ class Tensor(Type):
- `name`: str
A pretty name to identify this `Result` when printing and debugging
"""
"""
return TensorResult(self, name = name)
def __str__(self):
......@@ -478,6 +485,18 @@ class _tensor_py_operators:
raise TypeError('Tensor does not support iteration. '
'Maybe you are using builtin.sum instead of theano.tensor.sum? (Maybe .max?)')
# CONVENIENT ACCESS TO TYPE PROPERTIES
ndim = property(lambda self: self.type.ndim)
"""The rank of this tensor."""
broadcastable = property(lambda self: self.type.broadcastable)
"""The broadcastable signature of this tensor.
See :doc:`broadcasting` for details.
"""
dtype = property(lambda self: self.type.dtype)
""" The dtype of this tensor. """
class TensorResult(Result, _tensor_py_operators):
......@@ -1311,32 +1330,101 @@ class SetSubtensor(Subtensor):
x.__setitem__(cdata, y)
out[0] = x
class Split(Op):
"""Partition a `TensorResult` along some axis.
class MakeVector(Op):
"""WRITEME"""
def __init__(self, stype):
self.stype = stype
def make_node(self, *inputs):
inputs = map(as_tensor, inputs)
assert all(a.type == self.stype for a in inputs)
return Apply(self, inputs, [Tensor(broadcastable = (False,),
dtype = self.stype.dtype)()])
def perform(self, node, inputs, (out,)):
out[0] = numpy.asarray(inputs)
def grad(self, inputs, (gout,)):
return [None]*len(inputs)
make_lvector = MakeVector(lscalar)
"""WRITEME"""
.. python::
x = vector()
splits = lvector()
# you have to declare right away how many split_points there will be.
ra, rb, rc = split(x, axis=0, points=splits, n_splits=3)
f = compile([x, splits], [ra, rb, rc])
a, b, c = f([0,1,2,3,4,5,6], [3, 2, 1])
class Concatenate(Op):
#a == [0,1,2]
#b == [3, 4]
#c == [5]
"""
len_splits = None
"""A Split instance will have this many outputs, and require that the splits argument to
`perform` have exactly this many elements.
"""
def __init__(self, len_splits):
self.len_splits = int(len_splits)
def make_node(self, x, axis, splits):
"""WRITEME"""
x = as_tensor(x)
axis = as_tensor(axis)
splits = as_tensor(splits)
if splits.type != lvector:
raise TypeError('splits must have type tensor.lvector', splits.type)
if axis.type != lscalar:
raise TypeError('axis must have type lscalar', axis.type)
inputs = [x, axis, splits]
outputs = [x.type() for i in xrange(self.len_splits)]
return Apply(self, inputs, outputs)
def perform(self, node, (x, axis, splits), outputs):
"""WRITEME"""
try:
len_along_axis = x.shape[axis]
except :
raise ValueError('Split.perform() with axis=(%s) is invalid for x.shape==(%s)'
%(axis, x.shape))
if len(splits) != self.len_splits:
raise ValueError('In Split.perform(), len(splits) != len_splits.',
(len(splits), self.len_splits))
# Checking is done, lets roll the splitting algorithm!
# Basically we step along the given axis of x, extracting subtensors of size splits[i]
# as we go along.
general_key = [slice(None, None, None) for s in x.shape]
lower_idx = 0
for i in xrange(self.len_splits):
upper_idx = lower_idx + splits[i]
general_key[axis] = slice(lower_idx, upper_idx, None)
outputs[i][0] = x.__getitem__(general_key).copy()
lower_idx = upper_idx
def grad(self, (x, axis, splits), g_outputs):
"""Join the gradients along the axis that was used to split x."""
return [join(axis, *g_outputs), None, None]
class Join(Op):
"""
Concatenate two L{Tensor}s along the given axis.
These L{Tensor}s must have the same shape along all dimensions other than
this axis.
Concatenate two `TensorResult`s along some axis.
These tensors must have the same shape along all dimensions other than this axis.
Of course, TensorResult instances don't have a shape, so this error can't be caught until
runtime. See `perform()`.
.. python::
x, y, z = tensor.matrix(), tensor.matrix(), tensor.matrix()
u = tensor.vector()
r = join(0, x, y, z)
c = join(1, x, y, z)
join(2, x, y, z) # WRONG: the axis has to be an index into the shape
join(0, x, y) # WRONG: tensors have to have the same rank to be joined
"""
def make_node(self, *axis_and_tensors):
"""
WRITEME
"""
axis, tensors = axis_and_tensors[0], axis_and_tensors[1:]
as_tensor_args= [as_tensor(x) for x in tensors]
dtypes = [x.type.dtype for x in as_tensor_args]
......@@ -1364,17 +1452,36 @@ class Concatenate(Op):
bcastable[:] = as_tensor_args[0].type.broadcastable
bcastable[axis] = False
inputs = [scal.as_scalar(axis)] + as_tensor_args
inputs = [as_tensor(axis)] + as_tensor_args
if inputs[0].type != lscalar:
raise TypeError('Axis could not be cast to lscalar', axis)
outputs = [tensor(dtype = dtypes[0],
broadcastable = bcastable)]
return Apply(self, inputs, outputs)
def perform(self, node, axis_and_tensors, (out, )):
"""
WRITEME
"""
axis, tensors = axis_and_tensors[0], axis_and_tensors[1:]
out[0] = numpy.concatenate(tensors, axis = axis)
def grad(self, axis_and_tensors, (gz,)):
""" The gradient wrt a join op is a `Split`, used to partition the gradient along the
`axis` which was used for joining.
"""
axis, tensors = axis_and_tensors[0], axis_and_tensors[1:]
if 'float' in tensors[0].dtype or 'complex' in tensors[0].dtype:
# assume that this isn't differentiable
split = Split(len(tensors))
return [None] + split(gz, axis, stack(*[shape(x)[axis] for x in tensors]))
else:
# assume that this isn't differentiable
return [None] * (1 + len(tensors))
def _native_grad(self, axis_and_tensors, (gz,)):
"""WRITEME"""
axis, tensors = axis_and_tensors[0], axis_and_tensors[1:]
n_dims = len(shape(tensors[0]))
sizes_along_axis = [shape(x)[axis] for x in tensors]
......@@ -1387,91 +1494,172 @@ class Concatenate(Op):
[slice(None)] * (n_dims - axis - 1)] \
for k in range(len(sizes_along_axis))]
def get_vector_length(v):
if isinstance(v, gof.Constant) and v.type.ndim == 1:
return len(v.data)
elif v.owner and isinstance(v.owner.op, MakeVector):
return len(v.owner.inputs)
elif v.owner and v.owner.op == shape:
return v.owner.inputs[0].type.ndim
else:
return None
def concatenate(tensors, axis=0):
def vec_length(self, node):
assert isinstance(node.owner.op, Join)
if node.ndim != 1:
raise TypeError('argument must be symbolic vector')
inputs = node.owner.inputs
axis, tensors = inputs[0], inputs[1]
# if v is a vector, axis must be 0
# the question is whether all the inputs are broadcastable.
if all(i.broadcastable[0] for i in tensors):
return len(tensors)
@_redefine_asRoutine(Join())
def join(axis, *tensors):
"""
Convenience function to concatenate `Tensor`s along the given axis.
The `axis` parameter may either be an integer or an object that can be
converted to a scalar using `as_scalar`(`axis`). In the former case,
the axis is fixed at construction, while in the latter it may vary over
time depending on the value of the `axis` variable.
:Parameters:
- `tensors` : list of tensors (or list-like)
A list of tensors to be concatenated along the given axis.
- `axis` : int (symbolic or literal)
On which dimension should the tensors be joined? The `axis` must be a valid index into
the shape of the tensors to be concatenated.
The `axis` parameter may either be an integer or an object that can be converted to a
scalar using `as_scalar`(`axis`). In the former case, the axis is fixed at construction,
while in the latter it may vary over time depending on the value of the `axis` variable.
The shapes of the tensors to be concatenated must be all identical, except in the dimension
(`axis`) on which they are to be joined.
"""
@constructor
def leftpad_shape(tensor, n_ones):
"""Reshape `tensor` by left-padding the shape with `n_ones` 1s"""
pattern = ['x']*n_ones + [i for i in range(tensor.type.ndim)]
return DimShuffle(tensor.broadcastable, pattern)(tensor)
@constructor
def stack(*tensors):
"""Insert the arguments as slices into a tensor of 1 rank greater.
EXAMPLE
"""
return join(0, *[leftpad_shape(t, 1) for t in tensors])
@constructor
def concatenate(tensor_list, axis=0):
"""Alias for `join`(axis, *tensor_list).
This function is similar to `join`, but uses the signature of numpy's concatenate function.
This function
:Exceptions:
- `TypeError` : the tensor_list must be a tuple or list
"""
# Check someone did not make the common mistake to do something like:
# c = concatenate(x, y)
# instead of
# c = concatenate((x, y))
if not isinstance(tensors, (tuple, list)):
if not isinstance(tensor_list, (tuple, list)):
raise TypeError("The 'tensors' argument must be either a tuple "
"or a list, make sure you did not forget () or [] around "
"arguments of concatenate.", tensors)
# Ensure we only create one instance of 'Concatenate', to simplify the
# merging job.
if not hasattr(concatenate, 'obj'):
concatenate.obj = Concatenate()
return concatenate.obj(axis, *tensors)
return join(axis, *tensor_list)
class VerticalStack(Op):
"""
Vertically stack two L{Tensor}s.
Stack two L{Tensor}s along the first axis (row wise). These
L{Tensor}s must have the same shape along all dimensions but the
first.
def get_vector_length(v):
"""Return the run-time length of a symbolic vector.
:Parameters:
- `v` : A rank-1 Tensor result.
:Exceptions:
- `TypeError` : `v` hasn't the proper type.
- `ValueError` : No special case applies, the length is not known.
In general this is not possible, but for a number of special cases the length can be
determined at compile / graph-construction time. This function implements these special
cases.
@attention: Because we use vstack as the implementation, if the
inputs have 1-dimension, the output will have 2-dimensions.
"""
def make_node(self, x, y):
x = as_tensor(x)
y = as_tensor(y)
assert x.type.dtype == y.type.dtype
if x.type.broadcastable[1:] != y.type.broadcastable[1:]:
raise NotImplementedError
inputs = [x, y]
bcastable = (False, ) + x.type.broadcastable[1:]
outputs = [tensor(dtype = x.type.dtype,
broadcastable = bcastable)]
return Apply(self, inputs, outputs)
def perform(self, node, (x, y), (out, )):
assert x.ndim == y.ndim
# Make sure every dimension (save the first) is the same
for i in range(x.ndim): assert i == 0 or x.shape[i] == y.shape[i]
out[0] = numpy.vstack([x, y])
def grad(self, (x, y), (gz,)):
if v.ndim != 1:
raise TypeError('argument must be symbolic vector')
if isinstance(v, gof.Constant) and v.type.ndim == 1:
return len(v.data)
if v.owner and isinstance(v.owner.op, join):
try:
return join.vec_length(v)
except:
pass
if v.owner and v.owner.op == shape:
return v.owner.inputs[0].type.ndim
raise ValueError("length not known")
if 0: #vertical and horizontal stacking are deprecated. Better to use stack() and join().
class VerticalStack(Op):
"""
@todo: Make VSplit (or this grad implementation) its own L{Op},
that way we can do more sanity-checking::
Vertically stack two L{Tensor}s.
Stack two L{Tensor}s along the first axis (row wise). These
L{Tensor}s must have the same shape along all dimensions but the
first.
@attention: Because we use vstack as the implementation, if the
inputs have 1-dimension, the output will have 2-dimensions.
"""
def make_node(self, x, y):
x = as_tensor(x)
y = as_tensor(y)
assert x.type.dtype == y.type.dtype
if x.type.broadcastable[1:] != y.type.broadcastable[1:]:
raise NotImplementedError
inputs = [x, y]
bcastable = (False, ) + x.type.broadcastable[1:]
outputs = [tensor(dtype = x.type.dtype,
broadcastable = bcastable)]
return Apply(self, inputs, outputs)
def perform(self, node, (x, y), (out, )):
assert x.ndim == y.ndim
# Make sure every dimension (save the first) is the same
for i in range(x.data.ndim): assert i == 0 or x.data.shape[i] == y.shape[i]
etc...
for i in range(x.ndim): assert i == 0 or x.shape[i] == y.shape[i]
out[0] = numpy.vstack([x, y])
def grad(self, (x, y), (gz,)):
"""
@todo: Make VSplit (or this grad implementation) its own L{Op},
that way we can do more sanity-checking::
assert x.ndim == y.ndim
# Make sure every dimension (save the first) is the same
for i in range(x.data.ndim): assert i == 0 or x.data.shape[i] == y.shape[i]
etc...
"""
xs = shape(x)
ys = shape(y)
return gz[:xs[0]], gz[xs[0]:]
vertical_stack = VerticalStack()
def horizontal_stack(x, y):
"""
xs = shape(x)
ys = shape(y)
return gz[:xs[0]], gz[xs[0]:]
vertical_stack = VerticalStack()
Horizontally stack two L{Tensor}s.
Stack two L{Tensor}s along the second axis (column wise). These
L{Tensor}s must have the same shape along all dimensions but the
second.
def horizontal_stack(x, y):
"""
Horizontally stack two L{Tensor}s.
Stack two L{Tensor}s along the second axis (column wise). These
L{Tensor}s must have the same shape along all dimensions but the
second.
@note: Unlike VerticalStack, we assume that the L{Tensor}s have
two dimensions.
"""
assert x.type.ndim == 2
assert y.type.ndim == 2
return transpose(vertical_stack(x.T, y.T))
class MakeVector(Op):
"""WRITEME"""
def __init__(self, stype):
self.stype = stype
def make_node(self, *inputs):
inputs = map(as_tensor, inputs)
assert all(a.type == self.stype for a in inputs)
return Apply(self, inputs, [Tensor(broadcastable = (False,),
dtype = self.stype.dtype)()])
def perform(self, node, inputs, (out,)):
out[0] = numpy.asarray(inputs)
def grad(self, inputs, (gout,)):
return [None]*len(inputs)
make_lvector = MakeVector(lscalar)
"""WRITEME"""
@note: Unlike VerticalStack, we assume that the L{Tensor}s have
two dimensions.
"""
assert x.type.ndim == 2
assert y.type.ndim == 2
return transpose(vertical_stack(x.T, y.T))
else:
pass
#########################
......@@ -1842,3 +2030,146 @@ def grad(cost, wrt, g_cost=None):
else:
return gmap.get(wrt, zero(wrt))
class numeric_grad:
"""WRITEME"""
def __init__(self, f, pt, eps=1.0e-7):
"""Return the gradient of f at pt.
This function computes the gradient by a one-sided finite differences of a
fixed step size (eps).
It is assumed that f(...) will return a scalar.
It is assumed that all f's inputs are numpy.ndarray objects.
"""
def prod(inputs):
rval = 1
for i in inputs:
rval *= i
return rval
packed_pt = False
if not isinstance(pt, (list, tuple)):
pt = [pt]
packed_pt = True
apt = [numpy.array(p) for p in pt]
shapes = [p.shape for p in apt]
dtypes = [str(p.dtype) for p in apt]
if not dtypes == [dtypes[0]] * len(apt):
raise TypeError('All function arguments must have same dtype')
total_size = __builtin__.sum(prod(sh) for sh in shapes)
#create un-initialized memory
x = numpy.ndarray((total_size,), dtype=dtypes[0])
gx = numpy.ndarray((total_size,), dtype=dtypes[0])
#set up aliases so that apt[i] is backed by memory in x
# and self.gf is backed by memory in gx
cur_pos = 0
self.gf = []
for i,p in enumerate(apt):
p_size = prod(p.shape)
# set up alias
apt[i] = x[cur_pos:cur_pos+p_size].reshape(p.shape)
self.gf.append(gx[cur_pos:cur_pos+p_size].reshape(p.shape))
# initialize with p's value
apt[i][:] = p
cur_pos += p_size
f_x = f(*[p.copy() for p in apt])
# now iterate over the elements of x, and call f on apt.
x_copy = x.copy()
for i in xrange(total_size):
x[:] = x_copy
x[i] += eps
f_eps = f(*apt)
gx[i] = numpy.asarray((f_eps - f_x)/eps)
if packed_pt:
self.gf = self.gf[0]
@staticmethod
def abs_rel_err(a,b,eps=1.0e-10):
"""Return a small number when a and b are close, relative to how big they are"""
return abs(a-b) / (abs(a)+abs(b)+eps)
def max_err(self, g_pt):
"""Return the biggest relative error between g_pt and self.gf"""
assert len(g_pt) == len(self.gf)
errs = []
for a, b in zip(g_pt, self.gf):
errs.append(numpy.max(numeric_grad.abs_rel_err(a,b)))
return numpy.max(errs)
def verify_grad(testcase, op, pt, n_tests=1, rng=numpy.random, eps=1.0e-7, tol=0.0001,
linker='c&py'):
""" WRITEME
testcase.failUnless(analytic gradient matches finite-diff gradient)
"""
pt = [numpy.array(p) for p in pt]
#print "PT", pt
def function(inputs, output):
return compile.function(inputs, output,
mode=compile.Mode(optimizer = None, linker = linker),
accept_inplace=True)
for test_num in xrange(n_tests):
tensor_pt = [value(p.copy(), name='input %i'%i) for i,p in enumerate(pt)]
#op can be either a function or an actual Op instance
#print "OP", op
#print "TENSOR PT", tensor_pt
o_output = op(*tensor_pt)
if isinstance(o_output,list) > 1:
raise NotImplementedError('cant (yet) autotest gradient of op with multiple outputs')
# we could make loop over outputs making random projections R for each,
# but this doesn't handle the case where not all the outputs are
# differentiable... so I leave this as TODO for now -JB.
o_fn = function(tensor_pt, o_output)
#print "PT B", pt
o_fn_out = o_fn(*[p.copy() for p in pt])
#print "PT C", pt
random_projection = rng.rand(*o_fn_out.shape)
t_r = as_tensor(random_projection)
#random projection of o onto t_r
cost = sum(t_r * o_output)
cost_fn = function(tensor_pt, cost)
num_grad = numeric_grad(cost_fn, [p.copy() for p in pt], eps)
symbolic_grad = grad(cost, tensor_pt,as_tensor(1.0,name='g_cost'))
if 0:
print '----------'
for op in gof.graph.io_toposort(tensor_pt, symbolic_grad):
print op
grad_fn = function(tensor_pt, symbolic_grad)
#print "PT D", pt
analytic_grad = grad_fn(*pt)
#print "PT Z", pt
if not isinstance(analytic_grad, (list, tuple)):
analytic_grad = [analytic_grad]
max_err = num_grad.max_err(analytic_grad)
if max_err > tol:
#print 'analytic grad', analytic_grad
#print 'numeric grad', num_grad.gf
raise Exception(verify_grad.E_grad, (max_err, tol))
verify_grad.E_grad = 'gradient error exceeded tolerance'
"""This error is raised when a gradient is calculated, but incorrect."""
......@@ -8,6 +8,7 @@ import numpy as N
import operator
import itertools
import sys
import compile #to register the optimizer built by this file
# Utilities
......@@ -32,9 +33,10 @@ gemm_pattern_1 = gof.PatternSub((T._sub_inplace,
# gemm: (d,a,b,c,s) -> d = d*s + a*dot(b,c)
# Transforms dot(a, b) into gemm(zeros(2)(hstack(shape(a)[:1], shape(b)[1:])), 1.0, a, b, 1.0)
# The construction of the 'gemm' node may fail if, for example, a and b are not both matrices.
dot_to_gemm = gof.PatternSub((T.dot, 'a', 'b'),
(T.gemm, (T.Zeros(2),
(T.vertical_stack,
(T.stack,
(T.Subtensor([slice(0, 1)]), (T.shape, 'a')),
(T.Subtensor([slice(1, 2)]), (T.shape, 'b')))),
T.constant(1.0), 'a', 'b', T.constant(1.0)),
......@@ -231,7 +233,15 @@ def local_subtensor_make_vector(node):
If the index or slice is constant.
"""
if not opt.check_chain(node, T.Subtensor, T.MakeVector):
if not opt.check_chain(node, T.Subtensor, T.Join):
return False
joined_r = node.inputs[0]
try:
#check that join is being used to join scalars
veclen = T.join.vec_length(joined_r)
except:
return False
idxlist = node.op.idx_list
......@@ -644,6 +654,16 @@ def _math_optimizer():
math_optimizer = _math_optimizer()
compile.register_optimizer('math',
gof.MergeOptMerge(
gof.PureThenInplaceOptimizer(
math_optimizer,
inplace_optimizer)))
compile.register_mode('SANITY_CHECK', compile.Mode('c&py', 'math'))
compile.register_mode('FAST_RUN', compile.Mode('c|py', 'math'))
compile.register_mode('EXPENSIVE_OPTIMIZATIONS', compile.Mode('c|py', 'math'))
# @gof.local_optimizer
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论