提交 357efa53 authored 作者: James Bergstra's avatar James Bergstra

replaced MakeVector, VerticalStack and horizontal stacking ops with Join and…

replaced MakeVector, VerticalStack and horizontal stacking ops with Join and Split. Rewrote numeric_grad to work properly with in-place operations and arbitrary-rank tensors
上级 fcc8197e
...@@ -533,64 +533,6 @@ DotTester = make_tester(name = 'DotTester', ...@@ -533,64 +533,6 @@ DotTester = make_tester(name = 'DotTester',
# rationale: it's tricky, and necessary everytime you want to verify # rationale: it's tricky, and necessary everytime you want to verify
# gradient numerically # gradient numerically
def verify_grad(testcase, op, pt, n_tests=1, rng=numpy.random, eps=0.0000001, tol=0.0001,
linker='c&py'):
"""testcase.failUnless(analytic gradient matches finite-diff gradient)"""
pt = [numpy.asarray(p) for p in pt]
for test_num in xrange(n_tests):
# tensor_pt = [as_tensor(p,name='input %i'%i) for i,p in enumerate(pt)]
tensor_pt = [constant(p).type('input %i'%i) for i,p in enumerate(pt)]
#o = op.make_node(*[tpt.copy() for tpt in tensor_pt])
o = safe_make_node(op, *[tpt.copy() for tpt in tensor_pt])
if hasattr(o, 'outputs'):
o_outputs = o.outputs
else:
o_outputs = o
if len(o_outputs) > 1:
raise NotImplementedError('cant (yet) autotest gradient of op with multiple outputs')
# we could make loop over outputs making random projections R for each,
# but this doesn't handle the case where not all the outputs are
# differentiable... so I leave this as TODO for now -JB.
o_fn = function(tensor_pt, o_outputs[0], mode=compile.Mode(optimizer = None, linker = linker))
o_fn_out = o_fn(*pt)
random_projection = rng.rand(*o_fn_out.shape)
t_r = as_tensor(random_projection)
#random projection of o onto t_r
cost = sum(t_r * o_outputs[0])
cost_fn = function(tensor_pt, cost, mode=compile.Mode(optimizer = None, linker = linker))
num_grad = gradient.numeric_grad(cost_fn, pt)
symbolic_grad = grad(cost, tensor_pt,as_tensor(1.0,name='g_cost'))
if 0:
print '-------'
print '----------'
for op in gof.graph.io_toposort(tensor_pt, symbolic_grad):
print op
grad_fn = function(tensor_pt, symbolic_grad, mode=compile.Mode(optimizer = None, linker = linker))
analytic_grad = grad_fn(*pt)
if not isinstance(analytic_grad, (list, tuple)):
analytic_grad = [analytic_grad]
# if num_grad.max_err(analytic_grad) > 1.0e-4:
# print "aaaaaaaaaa"
# print gof.Env(tensor_pt, [cost])
# print gof.Env(tensor_pt, symbolic_grad)
# print analytic_grad
# print num_grad.gf
# print num_grad.max_err(analytic_grad)
# print "bbbbbbbbbb"
if num_grad.max_err(analytic_grad) > 1.0e-4:
raise Exception(verify_grad.E_grad)
verify_grad.E_grad = 'gradient error exceeded tolerance'
#useful mostly for unit tests #useful mostly for unit tests
...@@ -945,29 +887,101 @@ class T_subtensor(unittest.TestCase): ...@@ -945,29 +887,101 @@ class T_subtensor(unittest.TestCase):
class T_Stack(unittest.TestCase): class T_Join_and_Split(unittest.TestCase):
def test_hstack(self): """
a = as_tensor(numpy.array([[1, 2, 3], [4, 5, 6]])) Split is tested by each verify_grad method.
b = as_tensor(numpy.array([[7], [8]])) """
s = horizontal_stack(a, b)
c = numpy.array([[1, 2, 3, 7], [4, 5, 6, 8]]) class Join1(Op):
self.failUnless((eval_outputs([s]) == c).all()) def make_node(self, *inputs):
def test_vstack(self): inputs = [as_tensor(t) for t in inputs]
a = as_tensor(numpy.array([[1, 2, 3], [4, 5, 6]])) outputs = [lscalar()] + [i.type() for i in inputs]
b = as_tensor(numpy.array([[7, 8, 9]])) return Apply(self, inputs, outputs)
s = vertical_stack(a, b) def perform(self, node, inputs, outputs):
c = numpy.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) outputs[0][0] = 1
self.failUnless((eval_outputs([s]) == c).all()) for i,o in zip(inputs, outputs[1:]):
o[0] = i.copy()
def grad(self, inputs, g_outputs):
return g_outputs[1:]
def setUp(self):
Join.debug = False
def test_join_scalar(self):
a = as_tensor(1)
b = as_tensor(2)
try:
s = join(0, a, b)
except:
return
self.fail()
def test_stack_scalar(self):
a = as_tensor(1)
b = as_tensor(2)
c = as_tensor(3)
s = stack(a, b, c)
want = numpy.array([1, 2, 3])
self.failUnless((eval_outputs([s]) == want).all())
def test_vstack_grad(self):
def test_join_vector(self):
a = as_tensor(numpy.array([1, 2, 3]))
b = as_tensor(numpy.array([7, 8, 9]))
s = join(0, a, b)
want = numpy.array([1, 2, 3, 7, 8, 9])
self.failUnless((eval_outputs([s]) == want).all())
def test_stack_vector(self):
a = as_tensor(numpy.array([1, 2, 3]))
b = as_tensor(numpy.array([7, 8, 9]))
s = stack(a, b)
want = numpy.array([[1, 2, 3],[ 7, 8, 9]])
self.failUnless((eval_outputs([s]) == want).all())
def test_join_matrix0(self):
a = as_tensor(numpy.array([[1, 2, 3], [4, 5, 6]])) a = as_tensor(numpy.array([[1, 2, 3], [4, 5, 6]]))
b = as_tensor(numpy.array([[7, 8, 9]])) b = as_tensor(numpy.array([[7, 8, 9]]))
s = vertical_stack(a, b) s = join(0, a, b)
ga,gb = grad(sum(vertical_stack(a,b)), [a,b])
want = numpy.array([[1, 2, 3],[4,5,6],[7, 8, 9]])
self.failUnless((eval_outputs([s]) == want).all())
def test_join_matrix1(self):
av=numpy.array([[1, 2, 3], [4, 5, 6]], dtype='float32')
bv= numpy.array([[7], [8]],dtype='float32')
a = as_tensor(av)
b = as_tensor(bv)
s = join(1, a, b)
want = numpy.array([[1, 2, 3, 7], [4, 5, 6, 8]], dtype='float32')
self.failUnless((eval_outputs([s]) == want).all())
verify_grad(self, lambda a, b: join(1,a,b), [av, bv], eps=1.0e-4, tol=1.0e-3)
def test_join_matrixV(self):
"""variable join axis"""
v = numpy.array([[1., 2., 3.], [4., 5., 6.]])
a = as_tensor(v.copy())
b = as_tensor(v.copy())
ax = lscalar()
s = join(ax, a, b)
f = function([ax], [s])
want = numpy.array([[1, 2, 3], [4, 5, 6] ,[1, 2, 3], [4, 5, 6]])
got = f(0)
self.failUnless((got == want).all(), (got, want))
want = numpy.array([[ 1, 2, 3, 1, 2, 3], [4, 5, 6, 4, 5, 6]])
got = f(1)
self.failUnless((got == want).all(), (got, want))
verify_grad(self, lambda a, b: join(0,a,b), [v, 2*v])
verify_grad(self, lambda a, b: join(1,a,b), [v, 2*v])
gval = eval_outputs([ga, gb])
self.failUnless(numpy.all(gval[0] == 1.0))
self.failUnless(numpy.all(gval[1] == 1.0))
class _test_comparison(unittest.TestCase): class _test_comparison(unittest.TestCase):
...@@ -1761,10 +1775,10 @@ class T_op_cache(unittest.TestCase): ...@@ -1761,10 +1775,10 @@ class T_op_cache(unittest.TestCase):
self.failUnless(numpy.all(fn_py(a) == fn_c_or_py(a))) self.failUnless(numpy.all(fn_py(a) == fn_c_or_py(a)))
if __name__ == '__main__': if __name__ == '__main__':
if 1: if 0:
unittest.main() unittest.main()
else: else:
testcase = t_dot testcase = AbsInplaceTester
suite = unittest.TestLoader() suite = unittest.TestLoader()
suite = suite.loadTestsFromTestCase(testcase) suite = suite.loadTestsFromTestCase(testcase)
......
"""Convenient driver of graph construction, optimization, and linking.""" """Convenient driver of graph construction, optimization, and linking."""
import copy_reg
import cPickle
from functools import partial
import numpy import numpy
import gof import gof
import sys import sys
from copy import copy from copy import copy
import tensor_opt
def check_equal(x, y): def check_equal(x, y):
""" """
...@@ -57,6 +60,12 @@ predefined_linkers = { ...@@ -57,6 +60,12 @@ predefined_linkers = {
default_linker = 'c|py' default_linker = 'c|py'
def register_linker(name, linker):
"""Add a `Linker` which can be referred to by `name` in `Mode`."""
if name in predefined_linkers:
raise ValueError('Linker name already taken: %s' % name)
predefined_linkers[name] = linker
# If a string is passed as the optimizer argument in the constructor # If a string is passed as the optimizer argument in the constructor
# for Mode, it will be used as the key to retrieve the real optimizer # for Mode, it will be used as the key to retrieve the real optimizer
...@@ -64,13 +73,15 @@ default_linker = 'c|py' ...@@ -64,13 +73,15 @@ default_linker = 'c|py'
predefined_optimizers = { predefined_optimizers = {
None : lambda env: None, None : lambda env: None,
'merge' : gof.MergeOptimizer(), 'merge' : gof.MergeOptimizer(),
'math' : gof.MergeOptMerge(
gof.PureThenInplaceOptimizer(tensor_opt.math_optimizer,
tensor_opt.inplace_optimizer))
} }
default_optimizer = 'merge' default_optimizer = 'merge'
def register_optimizer(name, opt):
"""Add a `Optimizer` which can be referred to by `name` in `Mode`."""
if name in predefined_optimizers:
raise ValueError('Optimizer name already taken: %s' % name)
predefined_optimizers[name] = opt
class Mode(object): class Mode(object):
""" """
...@@ -110,15 +121,14 @@ class Mode(object): ...@@ -110,15 +121,14 @@ class Mode(object):
# If a string is passed as the mode argument in function or # If a string is passed as the mode argument in function or
# FunctionMaker, the Mode will be taken from this dictionary using the # FunctionMaker, the Mode will be taken from this dictionary using the
# string as the key # string as the key
predefined_modes = { predefined_modes = {'FAST_COMPILE': Mode('py', 'merge')}
'SANITY_CHECK' : Mode('c&py', 'math'), default_mode = 'FAST_COMPILE'
'FAST_COMPILE' : Mode('py', 'merge'),
'FAST_RUN' : Mode('c|py', 'math'),
'EXPENSIVE_OPTIMIZATIONS' : Mode('c|py', 'math'),
}
default_mode = 'FAST_RUN'
def register_mode(name, mode):
"""Add a `Mode` which can be referred to by `name` in `function`."""
if name in predefined_modes:
raise ValueError('Mode name already taken: %s' % name)
predefined_modes[name] = mode
...@@ -508,9 +518,6 @@ class FunctionMaker(object): ...@@ -508,9 +518,6 @@ class FunctionMaker(object):
return fn return fn
import copy_reg
import cPickle
def _pickle_FunctionMaker(fm): def _pickle_FunctionMaker(fm):
return (_constructor_FunctionMaker, (fm.inputs, fm.outputs, fm.mode, fm.accept_inplace)) return (_constructor_FunctionMaker, (fm.inputs, fm.outputs, fm.mode, fm.accept_inplace))
...@@ -527,8 +534,6 @@ copy_reg.pickle(slice, _pickle_slice) ...@@ -527,8 +534,6 @@ copy_reg.pickle(slice, _pickle_slice)
from functools import partial
DUPLICATE = ['DUPLICATE'] # unique id object used as a placeholder for duplicate entries DUPLICATE = ['DUPLICATE'] # unique id object used as a placeholder for duplicate entries
class Function(object): class Function(object):
......
...@@ -110,62 +110,4 @@ def grad_sources_inputs(sources, graph_inputs): ...@@ -110,62 +110,4 @@ def grad_sources_inputs(sources, graph_inputs):
gmap[r] = g_r gmap[r] = g_r
return gmap return gmap
class numeric_grad:
def __init__(self, f, pt, eps=1.0e-7):
"""Return the gradient of f at pt.
This function computes the gradient by a one-sided finite differences of a
fixed step size (eps).
It is assumed that f(...) will return a scalar.
It is assumed that all f's inputs are numpy.ndarray objects.
"""
gf = [numpy.ndarray(x.shape) for x in pt]
f_pt = f(*pt)
if isinstance(f, (list, tuple)):
f_pt = [numpy.copy(x) for x in f_pt]
else:
f_pt = numpy.copy(f_pt)
for idx in xrange(len(gf)):
if len(pt[idx].shape) == 0:
orig = pt[idx]
pt[idx] = numpy.asarray(pt[idx] + eps)
f_eps = f(*pt)
gf[idx] = numpy.asarray((f_eps - f_pt)/eps)
pt[idx] = orig
elif len(pt[idx].shape) == 1:
for i in xrange(pt[idx].shape[0]):
orig = pt[idx][i]
pt[idx][i] = pt[idx][i] + eps
f_eps = f(*pt)
gf[idx][i] = numpy.asarray((f_eps - f_pt)/eps)
pt[idx][i] = orig
elif len(pt[idx].shape) == 2:
for i in xrange(pt[idx].shape[0]):
for j in xrange(pt[idx].shape[1]):
orig = pt[idx][i,j]
pt[idx][i,j] = pt[idx][i,j] + eps
f_eps = f(*pt)
gf[idx][i,j] = numpy.asarray((f_eps - f_pt)/eps)
pt[idx][i,j] = orig
else:
raise NotImplementedError()
self.gf = gf
@staticmethod
def abs_rel_err(a,b,eps=1.0e-10):
"""Return a small number when a and b are close, relative to how big they are"""
return abs(a-b) / (abs(a)+abs(b)+eps)
def max_err(self, g_pt):
"""Return the biggest relative error between g_pt and self.gf"""
assert len(g_pt) == len(self.gf)
errs = []
for a, b in zip(g_pt, self.gf):
errs.append(numpy.max(numeric_grad.abs_rel_err(a,b)))
return max(errs)
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
__docformat__ = "restructuredtext en" __docformat__ = "restructuredtext en"
import __builtin__
import sys # for sys.maxint import sys # for sys.maxint
import inspect import inspect
import functools import functools
...@@ -20,6 +21,8 @@ import elemwise ...@@ -20,6 +21,8 @@ import elemwise
import scalar as scal import scalar as scal
from gof.python25 import partial from gof.python25 import partial
import compile
### set up the external interface ### set up the external interface
from elemwise import Elemwise, DimShuffle, CAReduce, Sum from elemwise import Elemwise, DimShuffle, CAReduce, Sum
...@@ -91,7 +94,7 @@ def constant(x): ...@@ -91,7 +94,7 @@ def constant(x):
except: except:
raise TypeError("Could not convert %s to Tensor" % x, type(x)) raise TypeError("Could not convert %s to Tensor" % x, type(x))
def value(x): def value(x, name=None):
"""Return a symbolic `Value` with default value `x` """Return a symbolic `Value` with default value `x`
:Exceptions: :Exceptions:
...@@ -102,8 +105,12 @@ def value(x): ...@@ -102,8 +105,12 @@ def value(x):
else: else:
x_ = numpy.asarray(x) x_ = numpy.asarray(x)
try: try:
return TensorValue(Tensor(dtype = x_.dtype, if name is None:
return TensorValue(Tensor(dtype = x_.dtype,
broadcastable = [d == 1 for d in x_.shape]), x_) broadcastable = [d == 1 for d in x_.shape]), x_)
else:
return TensorValue(Tensor(dtype = x_.dtype,
broadcastable = [d == 1 for d in x_.shape]), x_, name=name)
except: except:
raise TypeError("Could not convert %s to Tensor" % x, type(x)) raise TypeError("Could not convert %s to Tensor" % x, type(x))
...@@ -203,7 +210,7 @@ class Tensor(Type): ...@@ -203,7 +210,7 @@ class Tensor(Type):
- `name`: str - `name`: str
A pretty name to identify this `Result` when printing and debugging A pretty name to identify this `Result` when printing and debugging
""" """
return TensorResult(self, name = name) return TensorResult(self, name = name)
def __str__(self): def __str__(self):
...@@ -478,6 +485,18 @@ class _tensor_py_operators: ...@@ -478,6 +485,18 @@ class _tensor_py_operators:
raise TypeError('Tensor does not support iteration. ' raise TypeError('Tensor does not support iteration. '
'Maybe you are using builtin.sum instead of theano.tensor.sum? (Maybe .max?)') 'Maybe you are using builtin.sum instead of theano.tensor.sum? (Maybe .max?)')
# CONVENIENT ACCESS TO TYPE PROPERTIES
ndim = property(lambda self: self.type.ndim)
"""The rank of this tensor."""
broadcastable = property(lambda self: self.type.broadcastable)
"""The broadcastable signature of this tensor.
See :doc:`broadcasting` for details.
"""
dtype = property(lambda self: self.type.dtype)
""" The dtype of this tensor. """
class TensorResult(Result, _tensor_py_operators): class TensorResult(Result, _tensor_py_operators):
...@@ -1311,32 +1330,101 @@ class SetSubtensor(Subtensor): ...@@ -1311,32 +1330,101 @@ class SetSubtensor(Subtensor):
x.__setitem__(cdata, y) x.__setitem__(cdata, y)
out[0] = x out[0] = x
class Split(Op):
"""Partition a `TensorResult` along some axis.
class MakeVector(Op): .. python::
"""WRITEME"""
def __init__(self, stype): x = vector()
self.stype = stype splits = lvector()
def make_node(self, *inputs): # you have to declare right away how many split_points there will be.
inputs = map(as_tensor, inputs) ra, rb, rc = split(x, axis=0, points=splits, n_splits=3)
assert all(a.type == self.stype for a in inputs)
return Apply(self, inputs, [Tensor(broadcastable = (False,), f = compile([x, splits], [ra, rb, rc])
dtype = self.stype.dtype)()])
def perform(self, node, inputs, (out,)): a, b, c = f([0,1,2,3,4,5,6], [3, 2, 1])
out[0] = numpy.asarray(inputs)
def grad(self, inputs, (gout,)):
return [None]*len(inputs)
make_lvector = MakeVector(lscalar)
"""WRITEME"""
class Concatenate(Op): #a == [0,1,2]
#b == [3, 4]
#c == [5]
"""
len_splits = None
"""A Split instance will have this many outputs, and require that the splits argument to
`perform` have exactly this many elements.
"""
def __init__(self, len_splits):
self.len_splits = int(len_splits)
def make_node(self, x, axis, splits):
"""WRITEME"""
x = as_tensor(x)
axis = as_tensor(axis)
splits = as_tensor(splits)
if splits.type != lvector:
raise TypeError('splits must have type tensor.lvector', splits.type)
if axis.type != lscalar:
raise TypeError('axis must have type lscalar', axis.type)
inputs = [x, axis, splits]
outputs = [x.type() for i in xrange(self.len_splits)]
return Apply(self, inputs, outputs)
def perform(self, node, (x, axis, splits), outputs):
"""WRITEME"""
try:
len_along_axis = x.shape[axis]
except :
raise ValueError('Split.perform() with axis=(%s) is invalid for x.shape==(%s)'
%(axis, x.shape))
if len(splits) != self.len_splits:
raise ValueError('In Split.perform(), len(splits) != len_splits.',
(len(splits), self.len_splits))
# Checking is done, lets roll the splitting algorithm!
# Basically we step along the given axis of x, extracting subtensors of size splits[i]
# as we go along.
general_key = [slice(None, None, None) for s in x.shape]
lower_idx = 0
for i in xrange(self.len_splits):
upper_idx = lower_idx + splits[i]
general_key[axis] = slice(lower_idx, upper_idx, None)
outputs[i][0] = x.__getitem__(general_key).copy()
lower_idx = upper_idx
def grad(self, (x, axis, splits), g_outputs):
"""Join the gradients along the axis that was used to split x."""
return [join(axis, *g_outputs), None, None]
class Join(Op):
""" """
Concatenate two L{Tensor}s along the given axis. Concatenate two `TensorResult`s along some axis.
These L{Tensor}s must have the same shape along all dimensions other than
this axis. These tensors must have the same shape along all dimensions other than this axis.
Of course, TensorResult instances don't have a shape, so this error can't be caught until
runtime. See `perform()`.
.. python::
x, y, z = tensor.matrix(), tensor.matrix(), tensor.matrix()
u = tensor.vector()
r = join(0, x, y, z)
c = join(1, x, y, z)
join(2, x, y, z) # WRONG: the axis has to be an index into the shape
join(0, x, y) # WRONG: tensors have to have the same rank to be joined
""" """
def make_node(self, *axis_and_tensors): def make_node(self, *axis_and_tensors):
"""
WRITEME
"""
axis, tensors = axis_and_tensors[0], axis_and_tensors[1:] axis, tensors = axis_and_tensors[0], axis_and_tensors[1:]
as_tensor_args= [as_tensor(x) for x in tensors] as_tensor_args= [as_tensor(x) for x in tensors]
dtypes = [x.type.dtype for x in as_tensor_args] dtypes = [x.type.dtype for x in as_tensor_args]
...@@ -1364,17 +1452,36 @@ class Concatenate(Op): ...@@ -1364,17 +1452,36 @@ class Concatenate(Op):
bcastable[:] = as_tensor_args[0].type.broadcastable bcastable[:] = as_tensor_args[0].type.broadcastable
bcastable[axis] = False bcastable[axis] = False
inputs = [scal.as_scalar(axis)] + as_tensor_args inputs = [as_tensor(axis)] + as_tensor_args
if inputs[0].type != lscalar:
raise TypeError('Axis could not be cast to lscalar', axis)
outputs = [tensor(dtype = dtypes[0], outputs = [tensor(dtype = dtypes[0],
broadcastable = bcastable)] broadcastable = bcastable)]
return Apply(self, inputs, outputs) return Apply(self, inputs, outputs)
def perform(self, node, axis_and_tensors, (out, )): def perform(self, node, axis_and_tensors, (out, )):
"""
WRITEME
"""
axis, tensors = axis_and_tensors[0], axis_and_tensors[1:] axis, tensors = axis_and_tensors[0], axis_and_tensors[1:]
out[0] = numpy.concatenate(tensors, axis = axis) out[0] = numpy.concatenate(tensors, axis = axis)
def grad(self, axis_and_tensors, (gz,)): def grad(self, axis_and_tensors, (gz,)):
""" The gradient wrt a join op is a `Split`, used to partition the gradient along the
`axis` which was used for joining.
"""
axis, tensors = axis_and_tensors[0], axis_and_tensors[1:]
if 'float' in tensors[0].dtype or 'complex' in tensors[0].dtype:
# assume that this isn't differentiable
split = Split(len(tensors))
return [None] + split(gz, axis, stack(*[shape(x)[axis] for x in tensors]))
else:
# assume that this isn't differentiable
return [None] * (1 + len(tensors))
def _native_grad(self, axis_and_tensors, (gz,)):
"""WRITEME"""
axis, tensors = axis_and_tensors[0], axis_and_tensors[1:] axis, tensors = axis_and_tensors[0], axis_and_tensors[1:]
n_dims = len(shape(tensors[0])) n_dims = len(shape(tensors[0]))
sizes_along_axis = [shape(x)[axis] for x in tensors] sizes_along_axis = [shape(x)[axis] for x in tensors]
...@@ -1387,91 +1494,172 @@ class Concatenate(Op): ...@@ -1387,91 +1494,172 @@ class Concatenate(Op):
[slice(None)] * (n_dims - axis - 1)] \ [slice(None)] * (n_dims - axis - 1)] \
for k in range(len(sizes_along_axis))] for k in range(len(sizes_along_axis))]
def get_vector_length(v): def vec_length(self, node):
if isinstance(v, gof.Constant) and v.type.ndim == 1: assert isinstance(node.owner.op, Join)
return len(v.data) if node.ndim != 1:
elif v.owner and isinstance(v.owner.op, MakeVector): raise TypeError('argument must be symbolic vector')
return len(v.owner.inputs) inputs = node.owner.inputs
elif v.owner and v.owner.op == shape: axis, tensors = inputs[0], inputs[1]
return v.owner.inputs[0].type.ndim # if v is a vector, axis must be 0
else: # the question is whether all the inputs are broadcastable.
return None if all(i.broadcastable[0] for i in tensors):
return len(tensors)
def concatenate(tensors, axis=0):
@_redefine_asRoutine(Join())
def join(axis, *tensors):
""" """
Convenience function to concatenate `Tensor`s along the given axis. Convenience function to concatenate `Tensor`s along the given axis.
The `axis` parameter may either be an integer or an object that can be
converted to a scalar using `as_scalar`(`axis`). In the former case, :Parameters:
the axis is fixed at construction, while in the latter it may vary over - `tensors` : list of tensors (or list-like)
time depending on the value of the `axis` variable. A list of tensors to be concatenated along the given axis.
- `axis` : int (symbolic or literal)
On which dimension should the tensors be joined? The `axis` must be a valid index into
the shape of the tensors to be concatenated.
The `axis` parameter may either be an integer or an object that can be converted to a
scalar using `as_scalar`(`axis`). In the former case, the axis is fixed at construction,
while in the latter it may vary over time depending on the value of the `axis` variable.
The shapes of the tensors to be concatenated must be all identical, except in the dimension
(`axis`) on which they are to be joined.
"""
@constructor
def leftpad_shape(tensor, n_ones):
"""Reshape `tensor` by left-padding the shape with `n_ones` 1s"""
pattern = ['x']*n_ones + [i for i in range(tensor.type.ndim)]
return DimShuffle(tensor.broadcastable, pattern)(tensor)
@constructor
def stack(*tensors):
"""Insert the arguments as slices into a tensor of 1 rank greater.
EXAMPLE
"""
return join(0, *[leftpad_shape(t, 1) for t in tensors])
@constructor
def concatenate(tensor_list, axis=0):
"""Alias for `join`(axis, *tensor_list).
This function is similar to `join`, but uses the signature of numpy's concatenate function.
This function
:Exceptions:
- `TypeError` : the tensor_list must be a tuple or list
""" """
# Check someone did not make the common mistake to do something like: # Check someone did not make the common mistake to do something like:
# c = concatenate(x, y) # c = concatenate(x, y)
# instead of # instead of
# c = concatenate((x, y)) # c = concatenate((x, y))
if not isinstance(tensors, (tuple, list)): if not isinstance(tensor_list, (tuple, list)):
raise TypeError("The 'tensors' argument must be either a tuple " raise TypeError("The 'tensors' argument must be either a tuple "
"or a list, make sure you did not forget () or [] around " "or a list, make sure you did not forget () or [] around "
"arguments of concatenate.", tensors) "arguments of concatenate.", tensors)
# Ensure we only create one instance of 'Concatenate', to simplify the return join(axis, *tensor_list)
# merging job.
if not hasattr(concatenate, 'obj'):
concatenate.obj = Concatenate()
return concatenate.obj(axis, *tensors)
class VerticalStack(Op): def get_vector_length(v):
""" """Return the run-time length of a symbolic vector.
Vertically stack two L{Tensor}s.
Stack two L{Tensor}s along the first axis (row wise). These :Parameters:
L{Tensor}s must have the same shape along all dimensions but the - `v` : A rank-1 Tensor result.
first.
:Exceptions:
- `TypeError` : `v` hasn't the proper type.
- `ValueError` : No special case applies, the length is not known.
In general this is not possible, but for a number of special cases the length can be
determined at compile / graph-construction time. This function implements these special
cases.
@attention: Because we use vstack as the implementation, if the
inputs have 1-dimension, the output will have 2-dimensions.
""" """
def make_node(self, x, y): if v.ndim != 1:
x = as_tensor(x) raise TypeError('argument must be symbolic vector')
y = as_tensor(y) if isinstance(v, gof.Constant) and v.type.ndim == 1:
assert x.type.dtype == y.type.dtype return len(v.data)
if x.type.broadcastable[1:] != y.type.broadcastable[1:]: if v.owner and isinstance(v.owner.op, join):
raise NotImplementedError try:
inputs = [x, y] return join.vec_length(v)
bcastable = (False, ) + x.type.broadcastable[1:] except:
outputs = [tensor(dtype = x.type.dtype, pass
broadcastable = bcastable)] if v.owner and v.owner.op == shape:
return Apply(self, inputs, outputs) return v.owner.inputs[0].type.ndim
def perform(self, node, (x, y), (out, )): raise ValueError("length not known")
assert x.ndim == y.ndim
# Make sure every dimension (save the first) is the same if 0: #vertical and horizontal stacking are deprecated. Better to use stack() and join().
for i in range(x.ndim): assert i == 0 or x.shape[i] == y.shape[i] class VerticalStack(Op):
out[0] = numpy.vstack([x, y])
def grad(self, (x, y), (gz,)):
""" """
@todo: Make VSplit (or this grad implementation) its own L{Op}, Vertically stack two L{Tensor}s.
that way we can do more sanity-checking:: Stack two L{Tensor}s along the first axis (row wise). These
L{Tensor}s must have the same shape along all dimensions but the
first.
@attention: Because we use vstack as the implementation, if the
inputs have 1-dimension, the output will have 2-dimensions.
"""
def make_node(self, x, y):
x = as_tensor(x)
y = as_tensor(y)
assert x.type.dtype == y.type.dtype
if x.type.broadcastable[1:] != y.type.broadcastable[1:]:
raise NotImplementedError
inputs = [x, y]
bcastable = (False, ) + x.type.broadcastable[1:]
outputs = [tensor(dtype = x.type.dtype,
broadcastable = bcastable)]
return Apply(self, inputs, outputs)
def perform(self, node, (x, y), (out, )):
assert x.ndim == y.ndim assert x.ndim == y.ndim
# Make sure every dimension (save the first) is the same # Make sure every dimension (save the first) is the same
for i in range(x.data.ndim): assert i == 0 or x.data.shape[i] == y.shape[i] for i in range(x.ndim): assert i == 0 or x.shape[i] == y.shape[i]
etc... out[0] = numpy.vstack([x, y])
def grad(self, (x, y), (gz,)):
"""
@todo: Make VSplit (or this grad implementation) its own L{Op},
that way we can do more sanity-checking::
assert x.ndim == y.ndim
# Make sure every dimension (save the first) is the same
for i in range(x.data.ndim): assert i == 0 or x.data.shape[i] == y.shape[i]
etc...
"""
xs = shape(x)
ys = shape(y)
return gz[:xs[0]], gz[xs[0]:]
vertical_stack = VerticalStack()
def horizontal_stack(x, y):
""" """
xs = shape(x) Horizontally stack two L{Tensor}s.
ys = shape(y) Stack two L{Tensor}s along the second axis (column wise). These
return gz[:xs[0]], gz[xs[0]:] L{Tensor}s must have the same shape along all dimensions but the
vertical_stack = VerticalStack() second.
def horizontal_stack(x, y): @note: Unlike VerticalStack, we assume that the L{Tensor}s have
""" two dimensions.
Horizontally stack two L{Tensor}s. """
Stack two L{Tensor}s along the second axis (column wise). These assert x.type.ndim == 2
L{Tensor}s must have the same shape along all dimensions but the assert y.type.ndim == 2
second. return transpose(vertical_stack(x.T, y.T))
class MakeVector(Op):
"""WRITEME"""
def __init__(self, stype):
self.stype = stype
def make_node(self, *inputs):
inputs = map(as_tensor, inputs)
assert all(a.type == self.stype for a in inputs)
return Apply(self, inputs, [Tensor(broadcastable = (False,),
dtype = self.stype.dtype)()])
def perform(self, node, inputs, (out,)):
out[0] = numpy.asarray(inputs)
def grad(self, inputs, (gout,)):
return [None]*len(inputs)
make_lvector = MakeVector(lscalar)
"""WRITEME"""
@note: Unlike VerticalStack, we assume that the L{Tensor}s have else:
two dimensions. pass
"""
assert x.type.ndim == 2
assert y.type.ndim == 2
return transpose(vertical_stack(x.T, y.T))
######################### #########################
...@@ -1842,3 +2030,146 @@ def grad(cost, wrt, g_cost=None): ...@@ -1842,3 +2030,146 @@ def grad(cost, wrt, g_cost=None):
else: else:
return gmap.get(wrt, zero(wrt)) return gmap.get(wrt, zero(wrt))
class numeric_grad:
"""WRITEME"""
def __init__(self, f, pt, eps=1.0e-7):
"""Return the gradient of f at pt.
This function computes the gradient by a one-sided finite differences of a
fixed step size (eps).
It is assumed that f(...) will return a scalar.
It is assumed that all f's inputs are numpy.ndarray objects.
"""
def prod(inputs):
rval = 1
for i in inputs:
rval *= i
return rval
packed_pt = False
if not isinstance(pt, (list, tuple)):
pt = [pt]
packed_pt = True
apt = [numpy.array(p) for p in pt]
shapes = [p.shape for p in apt]
dtypes = [str(p.dtype) for p in apt]
if not dtypes == [dtypes[0]] * len(apt):
raise TypeError('All function arguments must have same dtype')
total_size = __builtin__.sum(prod(sh) for sh in shapes)
#create un-initialized memory
x = numpy.ndarray((total_size,), dtype=dtypes[0])
gx = numpy.ndarray((total_size,), dtype=dtypes[0])
#set up aliases so that apt[i] is backed by memory in x
# and self.gf is backed by memory in gx
cur_pos = 0
self.gf = []
for i,p in enumerate(apt):
p_size = prod(p.shape)
# set up alias
apt[i] = x[cur_pos:cur_pos+p_size].reshape(p.shape)
self.gf.append(gx[cur_pos:cur_pos+p_size].reshape(p.shape))
# initialize with p's value
apt[i][:] = p
cur_pos += p_size
f_x = f(*[p.copy() for p in apt])
# now iterate over the elements of x, and call f on apt.
x_copy = x.copy()
for i in xrange(total_size):
x[:] = x_copy
x[i] += eps
f_eps = f(*apt)
gx[i] = numpy.asarray((f_eps - f_x)/eps)
if packed_pt:
self.gf = self.gf[0]
@staticmethod
def abs_rel_err(a,b,eps=1.0e-10):
"""Return a small number when a and b are close, relative to how big they are"""
return abs(a-b) / (abs(a)+abs(b)+eps)
def max_err(self, g_pt):
"""Return the biggest relative error between g_pt and self.gf"""
assert len(g_pt) == len(self.gf)
errs = []
for a, b in zip(g_pt, self.gf):
errs.append(numpy.max(numeric_grad.abs_rel_err(a,b)))
return numpy.max(errs)
def verify_grad(testcase, op, pt, n_tests=1, rng=numpy.random, eps=1.0e-7, tol=0.0001,
linker='c&py'):
""" WRITEME
testcase.failUnless(analytic gradient matches finite-diff gradient)
"""
pt = [numpy.array(p) for p in pt]
#print "PT", pt
def function(inputs, output):
return compile.function(inputs, output,
mode=compile.Mode(optimizer = None, linker = linker),
accept_inplace=True)
for test_num in xrange(n_tests):
tensor_pt = [value(p.copy(), name='input %i'%i) for i,p in enumerate(pt)]
#op can be either a function or an actual Op instance
#print "OP", op
#print "TENSOR PT", tensor_pt
o_output = op(*tensor_pt)
if isinstance(o_output,list) > 1:
raise NotImplementedError('cant (yet) autotest gradient of op with multiple outputs')
# we could make loop over outputs making random projections R for each,
# but this doesn't handle the case where not all the outputs are
# differentiable... so I leave this as TODO for now -JB.
o_fn = function(tensor_pt, o_output)
#print "PT B", pt
o_fn_out = o_fn(*[p.copy() for p in pt])
#print "PT C", pt
random_projection = rng.rand(*o_fn_out.shape)
t_r = as_tensor(random_projection)
#random projection of o onto t_r
cost = sum(t_r * o_output)
cost_fn = function(tensor_pt, cost)
num_grad = numeric_grad(cost_fn, [p.copy() for p in pt], eps)
symbolic_grad = grad(cost, tensor_pt,as_tensor(1.0,name='g_cost'))
if 0:
print '----------'
for op in gof.graph.io_toposort(tensor_pt, symbolic_grad):
print op
grad_fn = function(tensor_pt, symbolic_grad)
#print "PT D", pt
analytic_grad = grad_fn(*pt)
#print "PT Z", pt
if not isinstance(analytic_grad, (list, tuple)):
analytic_grad = [analytic_grad]
max_err = num_grad.max_err(analytic_grad)
if max_err > tol:
#print 'analytic grad', analytic_grad
#print 'numeric grad', num_grad.gf
raise Exception(verify_grad.E_grad, (max_err, tol))
verify_grad.E_grad = 'gradient error exceeded tolerance'
"""This error is raised when a gradient is calculated, but incorrect."""
...@@ -8,6 +8,7 @@ import numpy as N ...@@ -8,6 +8,7 @@ import numpy as N
import operator import operator
import itertools import itertools
import sys import sys
import compile #to register the optimizer built by this file
# Utilities # Utilities
...@@ -32,9 +33,10 @@ gemm_pattern_1 = gof.PatternSub((T._sub_inplace, ...@@ -32,9 +33,10 @@ gemm_pattern_1 = gof.PatternSub((T._sub_inplace,
# gemm: (d,a,b,c,s) -> d = d*s + a*dot(b,c) # gemm: (d,a,b,c,s) -> d = d*s + a*dot(b,c)
# Transforms dot(a, b) into gemm(zeros(2)(hstack(shape(a)[:1], shape(b)[1:])), 1.0, a, b, 1.0) # Transforms dot(a, b) into gemm(zeros(2)(hstack(shape(a)[:1], shape(b)[1:])), 1.0, a, b, 1.0)
# The construction of the 'gemm' node may fail if, for example, a and b are not both matrices.
dot_to_gemm = gof.PatternSub((T.dot, 'a', 'b'), dot_to_gemm = gof.PatternSub((T.dot, 'a', 'b'),
(T.gemm, (T.Zeros(2), (T.gemm, (T.Zeros(2),
(T.vertical_stack, (T.stack,
(T.Subtensor([slice(0, 1)]), (T.shape, 'a')), (T.Subtensor([slice(0, 1)]), (T.shape, 'a')),
(T.Subtensor([slice(1, 2)]), (T.shape, 'b')))), (T.Subtensor([slice(1, 2)]), (T.shape, 'b')))),
T.constant(1.0), 'a', 'b', T.constant(1.0)), T.constant(1.0), 'a', 'b', T.constant(1.0)),
...@@ -231,7 +233,15 @@ def local_subtensor_make_vector(node): ...@@ -231,7 +233,15 @@ def local_subtensor_make_vector(node):
If the index or slice is constant. If the index or slice is constant.
""" """
if not opt.check_chain(node, T.Subtensor, T.MakeVector): if not opt.check_chain(node, T.Subtensor, T.Join):
return False
joined_r = node.inputs[0]
try:
#check that join is being used to join scalars
veclen = T.join.vec_length(joined_r)
except:
return False return False
idxlist = node.op.idx_list idxlist = node.op.idx_list
...@@ -644,6 +654,16 @@ def _math_optimizer(): ...@@ -644,6 +654,16 @@ def _math_optimizer():
math_optimizer = _math_optimizer() math_optimizer = _math_optimizer()
compile.register_optimizer('math',
gof.MergeOptMerge(
gof.PureThenInplaceOptimizer(
math_optimizer,
inplace_optimizer)))
compile.register_mode('SANITY_CHECK', compile.Mode('c&py', 'math'))
compile.register_mode('FAST_RUN', compile.Mode('c|py', 'math'))
compile.register_mode('EXPENSIVE_OPTIMIZATIONS', compile.Mode('c|py', 'math'))
# @gof.local_optimizer # @gof.local_optimizer
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论