merged

e2202c3f · James Bergstra · 8dd464d2 · 357efa53 · e2202c3f · e2202c3f
--- a/_test_tensor.py
+++ b/_test_tensor.py
@@ -533,64 +533,6 @@ DotTester = make_tester(name = 'DotTester',
 #      rationale: it's tricky, and necessary everytime you want to verify
 #      gradient numerically
-def verify_grad(testcase, op, pt, n_tests=1, rng=numpy.random, eps=0.0000001, tol=0.0001,
-        linker='c&py'):
-    """testcase.failUnless(analytic gradient matches finite-diff gradient)"""
-    pt = [numpy.asarray(p) for p in pt]
-    for test_num in xrange(n_tests):
-#        tensor_pt = [as_tensor(p,name='input %i'%i) for i,p in enumerate(pt)]
-        tensor_pt = [constant(p).type('input %i'%i) for i,p in enumerate(pt)]
-        #o = op.make_node(*[tpt.copy() for tpt in tensor_pt])
-        o = safe_make_node(op, *[tpt.copy() for tpt in tensor_pt])
-        if hasattr(o, 'outputs'):
-            o_outputs = o.outputs
-        else:
-            o_outputs = o
-        if len(o_outputs) > 1:
-            raise NotImplementedError('cant (yet) autotest gradient of op with multiple outputs')
-            # we could make loop over outputs making random projections R for each,
-            # but this doesn't handle the case where not all the outputs are
-            # differentiable... so I leave this as TODO for now -JB.
-        o_fn = function(tensor_pt, o_outputs[0], mode=compile.Mode(optimizer = None, linker = linker))
-        o_fn_out = o_fn(*pt)
-        random_projection = rng.rand(*o_fn_out.shape)
-        t_r = as_tensor(random_projection)
-        #random projection of o onto t_r
-        cost = sum(t_r * o_outputs[0])
-        cost_fn = function(tensor_pt, cost, mode=compile.Mode(optimizer = None, linker = linker))
-        num_grad = gradient.numeric_grad(cost_fn, pt)
-        symbolic_grad = grad(cost, tensor_pt,as_tensor(1.0,name='g_cost'))
-        if 0:
-            print '-------'
-            print '----------'
-            for op in gof.graph.io_toposort(tensor_pt, symbolic_grad):
-                print op
-        grad_fn = function(tensor_pt, symbolic_grad, mode=compile.Mode(optimizer = None, linker = linker))
-        analytic_grad = grad_fn(*pt)
-        if not isinstance(analytic_grad, (list, tuple)):
-            analytic_grad = [analytic_grad]
-#         if num_grad.max_err(analytic_grad) > 1.0e-4:
-#             print "aaaaaaaaaa"
-#             print gof.Env(tensor_pt, [cost])
-#             print gof.Env(tensor_pt, symbolic_grad)
-#             print analytic_grad
-#             print num_grad.gf
-#             print num_grad.max_err(analytic_grad)
-#             print "bbbbbbbbbb"
-        if num_grad.max_err(analytic_grad) > 1.0e-4:
-            raise Exception(verify_grad.E_grad)
-verify_grad.E_grad = 'gradient error exceeded tolerance'
 #useful mostly for unit tests
@@ -945,29 +887,101 @@ class T_subtensor(unittest.TestCase):
-class T_Stack(unittest.TestCase):
+class T_Join_and_Split(unittest.TestCase):
-    def test_hstack(self):
+    """
-        a = as_tensor(numpy.array([[1, 2, 3], [4, 5, 6]]))
+    Split is tested by each verify_grad method.
-        b = as_tensor(numpy.array([[7], [8]]))
+    """
-        s = horizontal_stack(a, b)
-        c = numpy.array([[1, 2, 3, 7], [4, 5, 6, 8]])
+    class Join1(Op):
-        self.failUnless((eval_outputs([s]) == c).all())
+        def make_node(self, *inputs):
-    def test_vstack(self):
+            inputs = [as_tensor(t) for t in inputs]
-        a = as_tensor(numpy.array([[1, 2, 3], [4, 5, 6]]))
+            outputs = [lscalar()] + [i.type() for i in inputs]
-        b = as_tensor(numpy.array([[7, 8, 9]]))
+            return Apply(self, inputs, outputs)
-        s = vertical_stack(a, b)
+        def perform(self, node, inputs, outputs):
-        c = numpy.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+            outputs[0][0] = 1
-        self.failUnless((eval_outputs([s]) == c).all())
+            for i,o in zip(inputs, outputs[1:]):
+                o[0] = i.copy()
+        def grad(self, inputs, g_outputs):
+            return g_outputs[1:]
+    def setUp(self):
+        Join.debug = False
+    def test_join_scalar(self):
+        a = as_tensor(1)
+        b = as_tensor(2)
+        try:
+            s = join(0, a, b)
+        except:
+            return
+        self.fail()
+    def test_stack_scalar(self):
+        a = as_tensor(1)
+        b = as_tensor(2)
+        c = as_tensor(3)
+        s = stack(a, b, c)
+        want = numpy.array([1, 2, 3])
+        self.failUnless((eval_outputs([s]) == want).all())
-    def test_vstack_grad(self):
+    def test_join_vector(self):
+        a = as_tensor(numpy.array([1, 2, 3]))
+        b = as_tensor(numpy.array([7, 8, 9]))
+        s = join(0, a, b)
+        want = numpy.array([1, 2, 3, 7, 8, 9])
+        self.failUnless((eval_outputs([s]) == want).all())
+    def test_stack_vector(self):
+        a = as_tensor(numpy.array([1, 2, 3]))
+        b = as_tensor(numpy.array([7, 8, 9]))
+        s = stack(a, b)
+        want = numpy.array([[1, 2, 3],[ 7, 8, 9]])
+        self.failUnless((eval_outputs([s]) == want).all())
+    def test_join_matrix0(self):
        a = as_tensor(numpy.array([[1, 2, 3], [4, 5, 6]]))
        b = as_tensor(numpy.array([[7, 8, 9]]))
-        s = vertical_stack(a, b)
+        s = join(0, a, b)
-        ga,gb = grad(sum(vertical_stack(a,b)), [a,b])
+        want = numpy.array([[1, 2, 3],[4,5,6],[7, 8, 9]])
+        self.failUnless((eval_outputs([s]) == want).all())
+    def test_join_matrix1(self):
+        av=numpy.array([[1, 2, 3], [4, 5, 6]], dtype='float32')
+        bv= numpy.array([[7], [8]],dtype='float32')
+        a = as_tensor(av)
+        b = as_tensor(bv)
+        s = join(1, a, b)
+        want = numpy.array([[1, 2, 3, 7], [4, 5, 6, 8]], dtype='float32')
+        self.failUnless((eval_outputs([s]) == want).all())
+        verify_grad(self, lambda a, b: join(1,a,b), [av, bv], eps=1.0e-4, tol=1.0e-3)
+    def test_join_matrixV(self):
+        """variable join axis"""
+        v = numpy.array([[1., 2., 3.], [4., 5., 6.]])
+        a = as_tensor(v.copy())
+        b = as_tensor(v.copy())
+        ax = lscalar()
+        s = join(ax, a, b)
+        f = function([ax], [s])
+        want = numpy.array([[1, 2, 3], [4, 5, 6] ,[1, 2, 3], [4, 5, 6]])
+        got = f(0)
+        self.failUnless((got == want).all(), (got, want))
+        want = numpy.array([[ 1, 2, 3, 1, 2, 3], [4, 5, 6, 4, 5, 6]])
+        got = f(1)
+        self.failUnless((got == want).all(), (got, want))
+        verify_grad(self, lambda a, b: join(0,a,b), [v, 2*v])
+        verify_grad(self, lambda a, b: join(1,a,b), [v, 2*v])
-        gval = eval_outputs([ga, gb])
-        self.failUnless(numpy.all(gval[0] == 1.0))
-        self.failUnless(numpy.all(gval[1] == 1.0))
 class _test_comparison(unittest.TestCase):
@@ -1761,10 +1775,10 @@ class T_op_cache(unittest.TestCase):
        self.failUnless(numpy.all(fn_py(a) == fn_c_or_py(a)))
 if __name__ == '__main__':
-    if 1:
+    if 0:
        unittest.main()
    else:
-        testcase =  t_dot
+        testcase =  AbsInplaceTester
        suite = unittest.TestLoader()
        suite = suite.loadTestsFromTestCase(testcase)

--- a/compile.py
+++ b/compile.py
 """Convenient driver of graph construction, optimization, and linking."""
+import copy_reg
+import cPickle
+from functools import partial
 import numpy
 import gof
 import sys
 from copy import copy
-import tensor_opt
 def check_equal(x, y):
    """
@@ -57,6 +60,12 @@ predefined_linkers = {
 default_linker = 'c|py'
+def register_linker(name, linker):
+    """Add a `Linker` which can be referred to by `name` in `Mode`."""
+    if name in predefined_linkers:
+        raise ValueError('Linker name already taken: %s' % name)
+    predefined_linkers[name] = linker
 # If a string is passed as the optimizer argument in the constructor
 # for Mode, it will be used as the key to retrieve the real optimizer
@@ -64,13 +73,15 @@ default_linker = 'c|py'
 predefined_optimizers = {
    None    : lambda env: None,
    'merge' : gof.MergeOptimizer(),
-    'math'  : gof.MergeOptMerge(
-        gof.PureThenInplaceOptimizer(tensor_opt.math_optimizer,
-                                     tensor_opt.inplace_optimizer))
    }
 default_optimizer = 'merge'
+def register_optimizer(name, opt):
+    """Add a `Optimizer` which can be referred to by `name` in `Mode`."""
+    if name in predefined_optimizers:
+        raise ValueError('Optimizer name already taken: %s' % name)
+    predefined_optimizers[name] = opt
 class Mode(object):
    """
@@ -110,15 +121,14 @@ class Mode(object):
 # If a string is passed as the mode argument in function or
 # FunctionMaker, the Mode will be taken from this dictionary using the
 # string as the key
-predefined_modes = {
+predefined_modes = {'FAST_COMPILE': Mode('py', 'merge')} 
-    'SANITY_CHECK'            : Mode('c&py', 'math'),
+default_mode = 'FAST_COMPILE'
-    'FAST_COMPILE'            : Mode('py', 'merge'),
-    'FAST_RUN'                : Mode('c|py', 'math'),
-    'EXPENSIVE_OPTIMIZATIONS' : Mode('c|py', 'math'),
-    }
-default_mode = 'FAST_RUN'
+def register_mode(name, mode):
+    """Add a `Mode` which can be referred to by `name` in `function`."""
+    if name in predefined_modes:
+        raise ValueError('Mode name already taken: %s' % name)
+    predefined_modes[name] = mode
@@ -508,9 +518,6 @@ class FunctionMaker(object):
        return fn
-import copy_reg
-import cPickle
 def _pickle_FunctionMaker(fm):
    return (_constructor_FunctionMaker, (fm.inputs, fm.outputs, fm.mode, fm.accept_inplace))
@@ -527,8 +534,6 @@ copy_reg.pickle(slice, _pickle_slice)
-from functools import partial
 DUPLICATE = ['DUPLICATE'] # unique id object used as a placeholder for duplicate entries
 class Function(object):

--- a/gradient.py
+++ b/gradient.py
@@ -110,62 +110,4 @@ def grad_sources_inputs(sources, graph_inputs):
                    gmap[r] = g_r
    return gmap
-class numeric_grad:
-    def __init__(self, f, pt, eps=1.0e-7):
-        """Return the gradient of f at pt.
-        This function computes the gradient by a one-sided finite differences of a
-        fixed step size (eps).
-        It is assumed that f(...) will return a scalar.
-        It is assumed that all f's inputs are numpy.ndarray objects.
-        """
-        gf = [numpy.ndarray(x.shape) for x in pt]
-        f_pt = f(*pt)
-        if isinstance(f, (list, tuple)):
-            f_pt = [numpy.copy(x) for x in f_pt]
-        else:
-            f_pt = numpy.copy(f_pt)
-        for idx in xrange(len(gf)):
-            if len(pt[idx].shape) == 0:
-                orig = pt[idx]
-                pt[idx] = numpy.asarray(pt[idx] + eps)
-                f_eps = f(*pt)
-                gf[idx] = numpy.asarray((f_eps - f_pt)/eps)
-                pt[idx] = orig
-            elif len(pt[idx].shape) == 1:
-                for i in xrange(pt[idx].shape[0]):
-                    orig = pt[idx][i]
-                    pt[idx][i] = pt[idx][i] + eps
-                    f_eps = f(*pt)
-                    gf[idx][i] = numpy.asarray((f_eps - f_pt)/eps)
-                    pt[idx][i] = orig
-            elif len(pt[idx].shape) == 2:
-                for i in xrange(pt[idx].shape[0]):
-                    for j in xrange(pt[idx].shape[1]):
-                        orig = pt[idx][i,j]
-                        pt[idx][i,j] = pt[idx][i,j] + eps
-                        f_eps = f(*pt)
-                        gf[idx][i,j] = numpy.asarray((f_eps - f_pt)/eps)
-                        pt[idx][i,j] = orig
-            else:
-                raise NotImplementedError()
-        self.gf = gf
-    @staticmethod
-    def abs_rel_err(a,b,eps=1.0e-10):
-        """Return a small number when a and b are close, relative to how big they are"""
-        return abs(a-b) / (abs(a)+abs(b)+eps)
-    def max_err(self, g_pt):
-        """Return the biggest relative error between g_pt and self.gf"""
-        assert len(g_pt) == len(self.gf)
-        errs = []
-        for a, b in zip(g_pt, self.gf):
-            errs.append(numpy.max(numeric_grad.abs_rel_err(a,b)))
-        return max(errs)
--- a/tensor.py
+++ b/tensor.py
--- a/tensor_opt.py
+++ b/tensor_opt.py
@@ -8,6 +8,7 @@ import numpy as N
 import operator
 import itertools
 import sys
+import compile  #to register the optimizer built by this file
 # Utilities
@@ -32,9 +33,10 @@ gemm_pattern_1 = gof.PatternSub((T._sub_inplace,
 # gemm: (d,a,b,c,s) -> d = d*s + a*dot(b,c)
 # Transforms dot(a, b) into gemm(zeros(2)(hstack(shape(a)[:1], shape(b)[1:])), 1.0, a, b, 1.0)
+# The construction of the 'gemm' node may fail if, for example, a and b are not both matrices.
 dot_to_gemm = gof.PatternSub((T.dot, 'a', 'b'),
                             (T.gemm, (T.Zeros(2),
-                                       (T.vertical_stack,
+                                       (T.stack,
                                        (T.Subtensor([slice(0, 1)]), (T.shape, 'a')),
                                        (T.Subtensor([slice(1, 2)]), (T.shape, 'b')))),
                              T.constant(1.0), 'a', 'b', T.constant(1.0)),
@@ -231,7 +233,15 @@ def local_subtensor_make_vector(node):
    If the index or slice is constant.
    """
-    if not opt.check_chain(node, T.Subtensor, T.MakeVector):
+    if not opt.check_chain(node, T.Subtensor, T.Join):
+        return False
+    joined_r = node.inputs[0]
+    try: 
+        #check that join is being used to join scalars
+        veclen = T.join.vec_length(joined_r)
+    except:
        return False
    idxlist = node.op.idx_list
@@ -644,6 +654,16 @@ def _math_optimizer():
 math_optimizer = _math_optimizer()
+compile.register_optimizer('math', 
+        gof.MergeOptMerge(
+            gof.PureThenInplaceOptimizer(
+                math_optimizer,
+                inplace_optimizer)))
+compile.register_mode('SANITY_CHECK', compile.Mode('c&py', 'math'))
+compile.register_mode('FAST_RUN', compile.Mode('c|py', 'math'))
+compile.register_mode('EXPENSIVE_OPTIMIZATIONS', compile.Mode('c|py', 'math'))
 # @gof.local_optimizer