Merged new ops from my sandbox

b6e2459d · Joseph Turian · 5e9fdfd3 · b6e2459d · b6e2459d · b6e2459d
--- a/_test_sparse.py
+++ b/_test_sparse.py
@@ -7,6 +7,8 @@ import gradient
 from sparse import _is_dense, _is_sparse, _is_dense_result, _is_sparse_result
 from sparse import _mtypes, _mtype_to_str
+import random
 class T_transpose(unittest.TestCase):
    def setUp(self):
        numpy.random.seed(44)
@@ -297,5 +299,59 @@ class _testCase_dot(unittest.TestCase):
                self.failUnless(origloss > loss)
+class T_RowRandomTransformation(unittest.TestCase):
+    def setUp(self):
+        random.seed(44)
+        numpy.random.seed(44)
+    def test_length(self):
+        """ Test that if length is increased, we obtain the same results
+        (except longer). """
+        for i in range(10):
+            mtype = random.choice(_mtypes)
+            rows = random.randint(1, 20)
+            cols = random.randint(1, 20)
+            fakeseed = random.randint(0, 100)
+            length = random.randint(1, 10)
+            extralength = random.randint(1, 10)
+            m = assparse(mtype(numpy.random.rand(rows, cols)))
+            o1 = row_random_transformation(m, length, initial_seed=fakeseed)
+            o2 = row_random_transformation(m, length + extralength, initial_seed=fakeseed)
+            y1 = compile.eval_outputs([o1])
+            y2 = compile.eval_outputs([o2])
+            self.failUnless((y1 == y2[:,:length]).all())
+    def test_permute(self):
+        """ Test that if the order of the rows is permuted, we obtain the same results. """
+        for i in range(10):
+            mtype = random.choice(_mtypes)
+            rows = random.randint(2, 20)
+            cols = random.randint(1, 20)
+            fakeseed = random.randint(0, 100)
+            length = random.randint(1, 10)
+            permute = numpy.random.permutation(rows)
+            m1 = numpy.random.rand(rows, cols)
+            m2 = m1[permute]
+            for r in range(rows):
+                self.failUnless((m2[r] == m1[permute[r]]).all())
+            s1 = assparse(mtype(m1))
+            s2 = assparse(mtype(m2))
+            o1 = row_random_transformation(s1, length, initial_seed=fakeseed)
+            o2 = row_random_transformation(s2, length, initial_seed=fakeseed)
+            y1 = compile.eval_outputs([o1])
+            y2 = compile.eval_outputs([o2])
+            self.failUnless(y1.shape == y2.shape)
+            for r in range(rows):
+                self.failUnless((y2[r] == y1[permute[r]]).all())
 if __name__ == '__main__':
    unittest.main()
--- a/_test_tensor.py
+++ b/_test_tensor.py
@@ -566,6 +566,17 @@ def check_eq2_both(self, inputs, output, args_in, arg_out):
    val = fn(*args_in)
    self.failUnless( numpy.all(val == arg_out), (val, arg_out))
+class T_Shape(unittest.TestCase):
+    def test_basic0(self):
+        s = shape(numpy.ones((5, 3)))
+        self.failUnless((eval_outputs([s]) == [5, 3]).all())
+    def test_basic1(self):
+        s = shape(numpy.ones((2)))
+        self.failUnless((eval_outputs([s]) == [2]).all())
+    def test_basic2(self):
+        s = shape(numpy.ones((5, 3, 10)))
+        self.failUnless((eval_outputs([s]) == [5, 3, 10]).all())
 class T_argmax(unittest.TestCase):
    def setUp(self):
        numpy.random.seed(123784)
@@ -819,6 +830,21 @@ class T_subtensor(unittest.TestCase):
        self.failUnless(numpy.all(tval == 0))
+class T_Stack(unittest.TestCase):
+    def test_hstack(self):
+        a = astensor(numpy.array([[1, 2, 3], [4, 5, 6]]), broadcastable=[False,False])
+        b = astensor(numpy.array([[7], [8]]), broadcastable=[False,False])
+        s = horizontal_stack(a, b)
+        c = numpy.array([[1, 2, 3, 7], [4, 5, 6, 8]])
+        self.failUnless((eval_outputs([s]) == c).all())
+    def test_vstack(self):
+        a = astensor(numpy.array([[1, 2, 3], [4, 5, 6]]), broadcastable=[False,False])
+        b = astensor(numpy.array([[7, 8, 9]]), broadcastable=[False,False])
+        s = vertical_stack(a, b)
+        c = numpy.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+        self.failUnless((eval_outputs([s]) == c).all())
 class T_add(unittest.TestCase):
    def test_complex_all_ops(self):

--- a/_test_tensor_opt.py
+++ b/_test_tensor_opt.py
@@ -25,37 +25,37 @@ class _test_inplace_opt(unittest.TestCase):
        x, y, z = inputs()
        e = x + y + z
        g = Env([x, y], [e])
-        assert str(g) == "[Broadcast{Add}(Broadcast{Add}(x, y), z)]"
+        self.failUnless(str(g) == "[Broadcast{Add}(Broadcast{Add}(x, y), z)]")
        inplace_optimizer.optimize(g)
-        assert str(g) == "[Broadcast{Add}{0: 0}(Broadcast{Add}{0: 0}(x, y), z)]"
+        self.failUnless(str(g) == "[Broadcast{Add}{0: 0}(Broadcast{Add}{0: 0}(x, y), z)]")
    def test_multiple_uses(self):
        x, y, z = inputs()
        e0 = x + y
        e1 = x * y
        g = Env([x, y], [e0, e1])
-        assert str(g) == "[Broadcast{Add}(x, y), Broadcast{Mul}(x, y)]"
+        self.failUnless(str(g) == "[Broadcast{Add}(x, y), Broadcast{Mul}(x, y)]")
        inplace_optimizer.optimize(g)
-        assert str(g) == "[Broadcast{Add}{0: 0}(x, y), Broadcast{Mul}(x, y)]" \
+        self.failUnless(str(g) == "[Broadcast{Add}{0: 0}(x, y), Broadcast{Mul}(x, y)]" \
-            or str(g) == "[Broadcast{Add}(x, y), Broadcast{Mul}{0: 0}(x, y)]"
+            or str(g) == "[Broadcast{Add}(x, y), Broadcast{Mul}{0: 0}(x, y)]")
    def test_user_inplace(self):
        x, y, z = inputs()
        e0 = x + y
        e1 = tensor.mul_inplace(x, y)
        g = Env([x, y], [e0, e1])
-        assert str(g) == "[Broadcast{Add}(x, y), Broadcast{Mul}{0: 0}(x, y)]"
+        self.failUnless(str(g) == "[Broadcast{Add}(x, y), Broadcast{Mul}{0: 0}(x, y)]")
        inplace_optimizer.optimize(g)
-        assert str(g) == "[Broadcast{Add}(x, y), Broadcast{Mul}{0: 0}(x, y)]"
+        self.failUnless(str(g) == "[Broadcast{Add}(x, y), Broadcast{Mul}{0: 0}(x, y)]")
    def test_inplace_on_second_argument(self):
        x, y, z = inputs()
        e0 = x + y
        e1 = tensor.mul_inplace(x, z)
        g = Env([x, y], [e0, e1])
-        assert str(g) == "[Broadcast{Add}(x, y), Broadcast{Mul}{0: 0}(x, z)]"
+        self.failUnless(str(g) == "[Broadcast{Add}(x, y), Broadcast{Mul}{0: 0}(x, z)]")
        inplace_optimizer.optimize(g)
-        assert str(g) == "[Broadcast{Add}{0: 1}(x, y), Broadcast{Mul}{0: 0}(x, z)]"
+        self.failUnless(str(g) == "[Broadcast{Add}{0: 1}(x, y), Broadcast{Mul}{0: 0}(x, z)]")
 class _test_dimshuffle_lift(unittest.TestCase):
@@ -64,9 +64,9 @@ class _test_dimshuffle_lift(unittest.TestCase):
        x, y, z = inputs()
        e = ds(ds(x, (1, 0)), (1, 0))
        g = Env([x], [e])
-        assert str(g) == "[InplaceDimShuffle{1,0}(InplaceDimShuffle{1,0}(x))]"
+        self.failUnless(str(g) == "[InplaceDimShuffle{1,0}(InplaceDimShuffle{1,0}(x))]")
        lift_dimshuffle.optimize(g)
-        assert str(g) == "[x]"
+        self.failUnless(str(g) == "[x]")
    def test_merge2(self):
        x, y, z = inputs()
@@ -103,10 +103,10 @@ class _test_cliques(unittest.TestCase):
        e = x + y + d
        g = Env([x, y, z], [e])
        cliques = find_cliques(g)
-        assert len(cliques) == 2
+        self.failUnless(len(cliques) == 2)
        (i1, o1), (i2, o2) = cliques
-        assert str(Env(i1, o1)) == "[Broadcast{Add}(Broadcast{Add}(x, y), d)]"
+        self.failUnless(str(Env(i1, o1)) == "[Broadcast{Add}(Broadcast{Add}(x, y), d)]")
-        assert str(Env(i2, o2)) == "[Broadcast{Mul}(y, z)]"
+        self.failUnless(str(Env(i2, o2)) == "[Broadcast{Mul}(y, z)]")
 #         print g
 #         for i, o in find_cliques(g):
 #             print "-->", Env(i, [o])
@@ -116,8 +116,8 @@ class _test_cliques(unittest.TestCase):
        e = x + y + z
        g = Env([x, y, z], [e])
        lift_dimshuffle.optimize(g)
-        assert len(find_cliques(g, through_broadcast = True)) == 1
+        self.failUnless(len(find_cliques(g, through_broadcast = True)) == 1)
-        assert len(find_cliques(g, through_broadcast = False)) == 2
+        self.failUnless(len(find_cliques(g, through_broadcast = False)) == 2)
 #         print g
 #         for i, o in find_cliques(g, True):
 #             print "-->", Env(i, [o])

--- a/sparse.py
+++ b/sparse.py
@@ -320,3 +320,93 @@ def dot(x, y, grad_preserves_dense=True):
    else:
        assert y_is_sparse_result
        return transpose(Dot(y.T, x.T, grad_preserves_dense).outputs[0])
+class RowRandomTransformation(gof.op.Op):
+    """
+    Given C{x}, a (sparse) matrix with shape (exmpls, dimensions), we
+    multiply it by a deterministic random matrix of shape (dimensions,
+    length) to obtain random transformation output of shape (exmpls,
+    length).
+    Each element of the deterministic random matrix is selected uniformly
+    from [-1, +1).
+    @todo: Use another random distribution?
+    @note: This function should be written such that if length is
+    increased, we obtain the same results (except longer). Similarly,
+    the rows should be able to be permuted and get the same result.
+    @todo: This may be slow?
+    @todo: Rewrite for dense matrices too?
+    @todo: Is there any way to verify the convention that each row is
+    an example? Should I rename the variables in the code to make the
+    semantics more explicit?
+    @todo: AUTOTEST: This function should be written such that if length
+    is increased, we obtain the same results (except longer). Similarly,
+    the rows should be able to be permuted and get the same result. Also,
+    autotest that dense and spare versions of this are identical.
+    @todo: Rename? Is Row the correct name? Maybe column-wise?
+    @type  x: L{scipy.sparse.spmatrix}
+    @param x: Sparse matrix to be randomly transformed with shape (exmpls, dimensions)
+    @type  length: int
+    @param length: The number of transformations of C{x} to be performed.
+    @param initial_seed: Initial seed for the RNG.
+    @rtype: L{numpy.ndarray}
+    @return: Array with C{length} random transformations, with shape (exmpls, length)
+    """
+    import random
+    """
+    RNG used for random transformations.
+    Does not share state with rest of program.
+    @todo: Make STATIC and private. Ask James or Olivier how to make this more Pythonic.
+    """
+    _trng = random.Random()
+    def __init__(self, x, length, initial_seed=0, **kwargs):
+        """
+        @todo: Which broadcastable values should I use?
+        """
+        gof.op.Op.__init__(self, **kwargs)
+        x = assparse(x)
+        self.initial_seed = initial_seed
+        self.length = length
+        self.inputs = [x]
+        self.outputs = [tensor.Tensor(x.dtype, broadcastable=[False, False])]
+#        self.outputs = [tensor.Tensor(x.dtype, broadcastable=[True, True])]
+    def impl(self, x):
+        assert _is_sparse(x)
+        assert len(x.shape) == 2
+        (rows, cols) = x.shape
+        tot = rows * cols
+        out = numpy.zeros((rows, self.length))
+        for l in range(self.length):
+            for i in range(x.getnnz()):
+                (r, c) = x.rowcol(i)
+                assert c < cols
+                assert r < rows
+                # Choose the random entry at (l, c)
+                rngidx = l * cols + c
+                # Set the random number state for this random entry
+                # Note: This may be slow
+                self._trng.seed(rngidx + self.initial_seed)
+                # Determine the value for this entry
+                val = self._trng.uniform(-1, +1)
+    #           print "Exmpl #%d, dimension #%d => Random projection #%d has idx %d (+ seed %d) and value %f" % (r, c, j, rngidx, self.initial_seed, val)
+                out[r][l] += val * x.getdata(i)
+        return out
+    def grad(self, (x, y), (gz,)):
+        raise NotImplementedError
+    def __copy__(self):
+        return self.__class__(self.inputs[0], self.length, self.initial_seed)
+    def clone_with_new_inputs(self, *new_inputs):
+        return self.__class__(new_inputs[0], self.length, self.initial_seed)
+    def desc(self, *new_inputs):
+        return (self.__class__, self.length, self.initial_seed)
+row_random_transformation = gof.op.constructor(RowRandomTransformation)
--- a/tensor.py
+++ b/tensor.py
@@ -460,6 +460,22 @@ def broadcast(scalar_opclass, name, module_name = None, inplace_versions = True)
 def _broadcast(scalar_opclass, name, inplace_versions = True):
    return broadcast(scalar_opclass, name, 'tensor', inplace_versions)
+class Shape(Op):
+    """
+    L{Op} to return the shape of a matrix.
+    @note: Non-differentiable.
+    """
+    def __init__(self, x, **kwargs):
+        Op.__init__(self, **kwargs)
+        x = astensor(x)
+        self.inputs = [x]
+        self.outputs = [Tensor("int64", [False])]
+    def impl(self, x):
+        return numpy.asarray(x.shape)
+    def grad(self, (x,), (gz,)):
+        raise ValueError
+shape = gof.op.constructor(Shape)
 class Argmax(Op):
    """Calculate the max and argmax over a given axis"""
@@ -623,6 +639,61 @@ class Subtensor(Op, Viewer):
 subtensor = gof.op.constructor(Subtensor)
+class VerticalStack(Op):
+    """
+    Vertically stack two L{Tensor}s.
+    Stack two L{Tensor}s along the first axis (row wise). These
+    L{Tensor}s must have the same shape along all dimensions but the
+    first.
+    @attention: Because we use vstack as the implementation, if the
+    inputs have 1-dimension, the output will have 2-dimensions.
+    """
+    def __init__(self, x, y, **kwargs):
+        Op.__init__(self, **kwargs)
+        x = astensor(x)
+        y = astensor(y)
+        assert x.dtype == y.dtype
+        if x.broadcastable[1:] != y.broadcastable[1:]:
+            raise NotImplementedError
+        self.inputs = [x, y]
+        bcastable = (False, ) + x.broadcastable[1:]
+        self.outputs = [Tensor(x.dtype, bcastable)]
+    def impl(self, x, y):
+        assert x.ndim == y.ndim
+        # Make sure every dimension (save the first) is the same
+        for i in range(x.ndim): assert i == 0 or x.shape[i] == y.shape[i]
+        return numpy.vstack([x, y])
+    def grad(self, (x, y), (gz,)):
+        """
+        @todo: Make VSplit (or this grad implementation) its own L{Op},
+        that way we can do more sanity-checking::
+            assert x.ndim == y.ndim
+            # Make sure every dimension (save the first) is the same
+            for i in range(x.data.ndim): assert i == 0 or x.data.shape[i] == y.shape[i]
+            etc...
+        """
+        xs = shape(x)
+        ys = shape(y)
+        return gz[:xs[0]], gz[xs[0]:]
+vertical_stack = gof.op.constructor(VerticalStack)
+def horizontal_stack(x, y, **kwargs):
+    """
+    Horizontally stack two L{Tensor}s.
+    Stack two L{Tensor}s along the second axis (column wise). These
+    L{Tensor}s must have the same shape along all dimensions but the
+    second.
+    @note: Unlike VerticalStack, we assume that the L{Tensor}s have
+    two dimensions.
+    """
+    assert x.ndim == 2
+    assert y.ndim == 2
+    return transpose(vertical_stack(x.T, y.T, **kwargs))
 #########################
 # Linalg : Dot
 #########################