merge

eec75e98 · Olivier Breuleux · 9f8dc0ef · c7351f2d · eec75e98 · eec75e98
--- a/_test_sparse.py
+++ b/_test_sparse.py
@@ -7,6 +7,8 @@ import gradient
 from sparse import _is_dense, _is_sparse, _is_dense_result, _is_sparse_result
 from sparse import _mtypes, _mtype_to_str

+import random
+
 class T_transpose(unittest.TestCase):
    def setUp(self):
        numpy.random.seed(44)

--- a/_test_tensor.py
+++ b/_test_tensor.py
@@ -572,6 +572,17 @@ def check_eq2_both(self, inputs, output, args_in, arg_out):
    val = fn(*args_in)
    self.failUnless( numpy.all(val == arg_out), (val, arg_out))

+class T_Shape(unittest.TestCase):
+    def test_basic0(self):
+        s = shape(numpy.ones((5, 3)))
+        self.failUnless((eval_outputs([s]) == [5, 3]).all())
+    def test_basic1(self):
+        s = shape(numpy.ones((2)))
+        self.failUnless((eval_outputs([s]) == [2]).all())
+    def test_basic2(self):
+        s = shape(numpy.ones((5, 3, 10)))
+        self.failUnless((eval_outputs([s]) == [5, 3, 10]).all())
+
 class T_argmax(unittest.TestCase):
    def setUp(self):
        numpy.random.seed(123784)
@@ -680,149 +691,197 @@ class T_transpose(unittest.TestCase):
        verify_grad(self, transpose_inplace, [numpy.random.rand(2, 3)])
        verify_grad(self, transpose_inplace, [numpy.ones(3)])

-# class T_subtensor(unittest.TestCase):
-#     def test0_err_invalid(self):
-#         #it is impossible to retrieve a view of a 0-d tensor
-#         n = astensor(numpy.ones(()))
-#         try:
-#             t = n[0]
-#         except ValueError, e:
-#             self.failUnless(e[0] is Subtensor.e_invalid)
-#             return
-#         self.fail()
-#     def test1_err_bounds(self):
-#         n = astensor(numpy.ones(3))
-#         t = n[7]
-#         self.failUnless(t.owner.__class__ is Subtensor)
-#         try:
-#             tval = eval_outputs([t])
-#         except Exception, e:
-#             if e[0] != 'index out of bounds':
-#                 raise
-#             return
-#         self.fail()
-#     def test1_ok_range_finite(self):
-#         n = astensor(numpy.ones(3)*5)
-#         t = n[0:2]
-#         self.failUnless(t.owner.__class__ is Subtensor)
-#         tval = eval_outputs([t])
-#         self.failUnless(tval.shape == (2,))
-#         self.failUnless(tval[1] == 5.0)
-#     def test2_ok_range_finite(self):
-#         n = astensor(numpy.ones((3,4))*5)
-#         t = n[0:2,3]
-#         self.failUnless(t.owner.__class__ is Subtensor)
-#         tval = eval_outputs([t])
-#         self.failUnless(tval.shape == (2,))
-#         self.failUnless(tval[1] == 5.0)
-#     def test1_err_invalid(self):
-#         n = astensor(numpy.ones(1))
-#         try:
-#             t = n[0,0]
-#         except ValueError, e:
-#             self.failUnless(e[0] is Subtensor.e_invalid)
-#             return
-#         self.fail()
-#     def test1_ok_elem(self):
-#         n = astensor(numpy.ones(1)*5)
-#         t = n[0]
-#         self.failUnless(t.owner.__class__ is Subtensor)
-#         tval = eval_outputs([t])
-#         self.failUnless(tval.shape == ())
-#         self.failUnless(tval == 5.0)
-#     def test1_ok_range_infinite(self):
-#         n = astensor(numpy.ones(3)*5)
-#         t = n[1:]
-#         self.failUnless(t.owner.__class__ is Subtensor)
-#         tval = eval_outputs([t])
-#         self.failUnless(tval.shape == (2,))
-#         self.failUnless(tval[1] == 5.0)
-#     def test1_ok_strided(self):
-#         n = astensor(numpy.ones(5)*5)
-#         t = n[1::2]
-#         self.failUnless(t.owner.__class__ is Subtensor)
-#         tval = eval_outputs([t])
-#         self.failUnless(tval.shape == (2,))
-#         self.failUnless(tval[1] == 5.0)
-
-#         tval = eval_outputs([n[0:-1:2]]) #0 to 1 from the end stepping by 2
-#         self.failUnless(tval.shape == (2,))
-#         self.failUnless(tval[1] == 5.0)
-
-#     def test2_err_bounds0(self):
-#         n = astensor(numpy.ones((2,3))*5)
-#         t = n[0,4]
-#         self.failUnless(t.owner.__class__ is Subtensor)
-#         try:
-#             tval = eval_outputs([t])
-#         except IndexError, e:
-#             return
-#         self.fail()
-#     def test2_err_bounds1(self):
-#         n = astensor(numpy.ones((2,3))*5)
-#         t = n[4:5,2]
-#         self.failUnless(t.owner.__class__ is Subtensor)
-#         try:
-#             tval = eval_outputs([t])
-#         except Exception, e:
-#             if e[0] != 'index out of bounds':
-#                 raise
-#     def test2_ok_elem(self):
-#         n = astensor(numpy.asarray(range(6)).reshape((2,3)))
-#         t = n[0,2]
-#         self.failUnless(t.owner.__class__ is Subtensor)
-#         tval = eval_outputs([t])
-#         self.failUnless(tval.shape == ())
-#         self.failUnless(numpy.all(tval == 2))
-#     def test2_ok_row(self):
-#         n = astensor(numpy.asarray(range(6)).reshape((2,3)))
-#         t = n[1]
-#         self.failIf(any(n.broadcastable))
-#         self.failUnless(t.owner.__class__ is Subtensor)
-#         tval = eval_outputs([t])
-#         self.failUnless(tval.shape == (3,))
-#         self.failUnless(numpy.all(tval == [3,4,5]))
-
-#     def test2_ok_col(self):
-#         n = astensor(numpy.ones((2,3))*5)
-#         t = n[:,0]
-#         self.failUnless(t.owner.__class__ is Subtensor)
-#         self.failIf(any(n.broadcastable))
-#         tval = eval_outputs([t])
-#         self.failUnless(tval.shape == (2,))
-#         self.failUnless(numpy.all(tval == 5.0))
-
-#     def test2_ok_rows_finite(self):
-#         n = astensor(numpy.ones((4,3))*5)
-#         t = n[1:3,0]
-#         self.failUnless(t.owner.__class__ is Subtensor)
-#         tval = eval_outputs([t])
-#         self.failUnless(tval.shape == (2,))
-#         self.failUnless(numpy.all(tval == 5.0))
-
-#     def test2_ok_cols_infinite(self):
-#         n = astensor(numpy.asarray(range(12)).reshape((4,3)))
-#         t = n[1,2:]
-#         self.failUnless(t.owner.__class__ is Subtensor)
-#         tval = eval_outputs([t])
-#         self.failUnless(tval.shape == (1,))
-#         self.failUnless(numpy.all(tval == 5))
-
-#     def test2_ok_strided(self):
-#         n = astensor(numpy.asarray(range(20)).reshape((4,5)))
-#         t = n[1:4:2,1:5:2]
-#         self.failUnless(t.owner.__class__ is Subtensor)
-#         tval = eval_outputs([t])
-#         self.failUnless(tval.shape == (2,2))
-#         self.failUnless(numpy.all(tval == [[6, 8],[16, 18]]))
-
-#     def test3_ok_mat(self):
-#         n = astensor(numpy.asarray(range(24)).reshape((2,3,4)))
-#         t = n[0,0,0]
-#         self.failUnless(t.owner.__class__ is Subtensor)
-#         tval = eval_outputs([t])
-#         self.failUnless(tval.shape == ())
-#         self.failUnless(numpy.all(tval == 0))
+class T_subtensor(unittest.TestCase):
+    def setUp(self):
+        Subtensor.debug = False
+        numpy.random.seed(12353123)
+
+    def test0_err_invalid(self):
+        #it is impossible to retrieve a view of a 0-d tensor
+        n = astensor(numpy.ones(()))
+        try:
+            t = n[0]
+        except ValueError, e:
+            self.failUnless(e[0] is Subtensor.e_invalid)
+            return
+        self.fail()
+    def test1_err_bounds(self):
+        n = astensor(numpy.ones(3))
+        t = n[7]
+        self.failUnless(t.owner.__class__ is Subtensor)
+        try:
+            tval = eval_outputs([t])
+        except Exception, e:
+            if e[0] != 'index out of bounds':
+                raise
+            return
+        self.fail()
+    def test1_ok_range_finite(self):
+        n = astensor(numpy.ones(3)*5)
+        t = n[0:2]
+        self.failUnless(t.owner.__class__ is Subtensor)
+        tval = eval_outputs([t])
+        self.failUnless(tval.shape == (2,))
+        self.failUnless(tval[1] == 5.0)
+    def test2_ok_range_finite(self):
+        n = astensor(numpy.ones((3,4))*5)
+        t = n[0:2,3]
+        self.failUnless(t.owner.__class__ is Subtensor)
+        tval = eval_outputs([t])
+        self.failUnless(tval.shape == (2,))
+        self.failUnless(tval[1] == 5.0)
+    def test1_err_invalid(self):
+        n = astensor(numpy.ones(1))
+        try:
+            t = n[0,0]
+        except ValueError, e:
+            self.failUnless(e[0] is Subtensor.e_invalid)
+            return
+        self.fail()
+    def test1_ok_elem(self):
+        n = astensor(numpy.ones(1)*5)
+        t = n[0]
+        self.failUnless(t.owner.__class__ is Subtensor)
+        tval = eval_outputs([t])
+        self.failUnless(tval.shape == ())
+        self.failUnless(tval == 5.0)
+    def test1_ok_range_infinite(self):
+        #Subtensor.debug = True
+        n = astensor(numpy.ones(3)*5)
+        t = n[1:]
+        self.failUnless(t.owner.__class__ is Subtensor)
+        tval = eval_outputs([t])
+        self.failUnless(tval.shape == (2,))
+        self.failUnless(tval[1] == 5.0)
+    def test1_ok_strided(self):
+        n = astensor(numpy.ones(5)*5)
+        t = n[1::2]
+        self.failUnless(t.owner.__class__ is Subtensor)
+        tval = eval_outputs([t])
+        self.failUnless(tval.shape == (2,))
+        self.failUnless(tval[1] == 5.0)
+
+        tval = eval_outputs([n[0:-1:2]]) #0 to 1 from the end stepping by 2
+        self.failUnless(tval.shape == (2,))
+        self.failUnless(tval[1] == 5.0)
+
+    def test2_err_bounds0(self):
+        n = astensor(numpy.ones((2,3))*5)
+        t = n[0,4]
+        self.failUnless(t.owner.__class__ is Subtensor)
+        try:
+            tval = eval_outputs([t])
+        except IndexError, e:
+            return
+        self.fail()
+    def test2_err_bounds1(self):
+        n = astensor(numpy.ones((2,3))*5)
+        t = n[4:5,2]
+        self.failUnless(t.owner.__class__ is Subtensor)
+        try:
+            tval = eval_outputs([t])
+        except Exception, e:
+            if e[0] != 'index out of bounds':
+                raise
+    def test2_ok_elem(self):
+        n = astensor(numpy.asarray(range(6)).reshape((2,3)))
+        t = n[0,2]
+        self.failUnless(t.owner.__class__ is Subtensor)
+        tval = eval_outputs([t])
+        self.failUnless(tval.shape == ())
+        self.failUnless(numpy.all(tval == 2))
+    def test2_ok_row(self):
+        n = astensor(numpy.asarray(range(6)).reshape((2,3)))
+        t = n[1]
+        self.failIf(any(n.broadcastable))
+        self.failUnless(t.owner.__class__ is Subtensor)
+        tval = eval_outputs([t])
+        self.failUnless(tval.shape == (3,))
+        self.failUnless(numpy.all(tval == [3,4,5]))
+
+    def test2_ok_col(self):
+        n = astensor(numpy.ones((2,3))*5)
+        t = n[:,0]
+        self.failUnless(t.owner.__class__ is Subtensor)
+        self.failIf(any(n.broadcastable))
+        tval = eval_outputs([t])
+        self.failUnless(tval.shape == (2,))
+        self.failUnless(numpy.all(tval == 5.0))
+
+    def test2_ok_rows_finite(self):
+        n = astensor(numpy.ones((4,3))*5)
+        t = n[1:3,0]
+        self.failUnless(t.owner.__class__ is Subtensor)
+        tval = eval_outputs([t])
+        self.failUnless(tval.shape == (2,))
+        self.failUnless(numpy.all(tval == 5.0))
+
+    def test2_ok_cols_infinite(self):
+        n = astensor(numpy.asarray(range(12)).reshape((4,3)))
+        t = n[1,2:]
+        self.failUnless(t.owner.__class__ is Subtensor)
+        tval = eval_outputs([t])
+        self.failUnless(tval.shape == (1,))
+        self.failUnless(numpy.all(tval == 5))
+
+    def test2_ok_strided(self):
+        n = astensor(numpy.asarray(range(20)).reshape((4,5)))
+        t = n[1:4:2,1:5:2]
+        self.failUnless(t.owner.__class__ is Subtensor)
+        tval = eval_outputs([t])
+        self.failUnless(tval.shape == (2,2))
+        self.failUnless(numpy.all(tval == [[6, 8],[16, 18]]))
+
+    def test3_ok_mat(self):
+        n = astensor(numpy.asarray(range(24)).reshape((2,3,4)))
+        t = n[0,0,0]
+        self.failUnless(t.owner.__class__ is Subtensor)
+        tval = eval_outputs([t])
+        self.failUnless(tval.shape == ())
+        self.failUnless(numpy.all(tval == 0))
+
+
+    def test_grad_1d(self):
+        n = astensor(numpy.random.rand(2,3))
+        z = scal.constant(0)
+        t = n[z:,z]
+        gn = gradient.grad(sum(exp(t)), n)
+        gval = eval_outputs([gn])
+        s0 = 'array([ 2.05362099,  0.        ,  0.        ])'
+        s1 = 'array([ 1.55009327,  0.        ,  0.        ])'
+        self.failUnless(repr(gval[0,:]) == s0)
+        self.failUnless(repr(gval[1,:]) == s1)
+
+    def test_grad_0d(self):
+        n = astensor(numpy.random.rand(2,3))
+        t = n[1,0]
+        gn = gradient.grad(sum(exp(t)), n)
+        gval = eval_outputs([gn])
+        g0 = repr(gval[0,:])
+        g1 = repr(gval[1,:])
+        s0 = 'array([ 0.,  0.,  0.])'
+        s1 = 'array([ 1.55009327,  0.        ,  0.        ])'
+        self.failUnless(g0 == s0, (g0, s0))
+        self.failUnless(g1 == s1, (g1, s1))
+
+
+
+class T_Stack(unittest.TestCase):
+    def test_hstack(self):
+        a = astensor(numpy.array([[1, 2, 3], [4, 5, 6]]), broadcastable=[False,False])
+        b = astensor(numpy.array([[7], [8]]), broadcastable=[False,False])
+        s = horizontal_stack(a, b)
+        c = numpy.array([[1, 2, 3, 7], [4, 5, 6, 8]])
+        self.failUnless((eval_outputs([s]) == c).all())
+    def test_vstack(self):
+        a = astensor(numpy.array([[1, 2, 3], [4, 5, 6]]), broadcastable=[False,False])
+        b = astensor(numpy.array([[7, 8, 9]]), broadcastable=[False,False])
+        s = vertical_stack(a, b)
+        c = numpy.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+        self.failUnless((eval_outputs([s]) == c).all())
+
+
+


 # class T_add(unittest.TestCase):
@@ -964,7 +1023,6 @@ class T_transpose(unittest.TestCase):
 #             self.fail()
 #         except ValueError, e:
 #             self.failUnless('shape mismatch' in str(e))
-        
 #         try:
 #             check_eq2_c(self, [a,b], Mul(a,b).out,
 #                         [numpy.ones(3), numpy.ones(4)], 1.0)
@@ -1284,7 +1342,34 @@ class t_gemm(unittest.TestCase):
                return
        self.fail()

+class T_tensorfromscalar(unittest.TestCase):
+    def test0(self):
+        s = scal.constant(56)
+        t = tensor_from_scalar(s)
+        self.failUnless(t.owner.__class__ is TensorFromScalar)
+        self.failUnless(t.broadcastable == (), t.broadcastable)
+        self.failUnless(t.ndim == 0, t.ndim)
+        self.failUnless(t.dtype == s.dtype)

+        v = eval_outputs([t])
+
+        self.failUnless(v == 56, v)
+        self.failUnless(isinstance(v, numpy.ndarray))
+        self.failUnless(v.shape == (), v.shape)
+
+    def test1(self):
+        s = scal.constant(56)
+        t = astensor(s)
+        self.failUnless(t.owner.__class__ is TensorFromScalar)
+        self.failUnless(t.broadcastable == (), t.broadcastable)
+        self.failUnless(t.ndim == 0, t.ndim)
+        self.failUnless(t.dtype == s.dtype)
+
+        v = eval_outputs([t])
+
+        self.failUnless(v == 56, v)
+        self.failUnless(isinstance(v, numpy.ndarray))
+        self.failUnless(v.shape == (), v.shape)


 # def _tensor(data, broadcastable=None, name=None):
@@ -1424,4 +1509,8 @@ class t_gemm(unittest.TestCase):

 if __name__ == '__main__':
    unittest.main()
+<<<<<<< /u/breuleuo/hg/theano2/_test_tensor.py
    #AddTester('test_grad').debug()
+=======
+    
+>>>>>>> /tmp/_test_tensor.py~other.dM43H3
--- a/_test_tensor_opt.py
+++ b/_test_tensor_opt.py
@@ -25,37 +25,37 @@ class _test_inplace_opt(unittest.TestCase):
        x, y, z = inputs()
        e = x + y + z
        g = Env([x, y], [e])
-        assert str(g) == "[Broadcast{Add}(Broadcast{Add}(x, y), z)]"
+        self.failUnless(str(g) == "[Broadcast{Add}(Broadcast{Add}(x, y), z)]")
        inplace_optimizer.optimize(g)
-        assert str(g) == "[Broadcast{Add}{0: 0}(Broadcast{Add}{0: 0}(x, y), z)]"
+        self.failUnless(str(g) == "[Broadcast{Add}{0: 0}(Broadcast{Add}{0: 0}(x, y), z)]")

    def test_multiple_uses(self):
        x, y, z = inputs()
        e0 = x + y
        e1 = x * y
        g = Env([x, y], [e0, e1])
-        assert str(g) == "[Broadcast{Add}(x, y), Broadcast{Mul}(x, y)]"
+        self.failUnless(str(g) == "[Broadcast{Add}(x, y), Broadcast{Mul}(x, y)]")
        inplace_optimizer.optimize(g)
-        assert str(g) == "[Broadcast{Add}{0: 0}(x, y), Broadcast{Mul}(x, y)]" \
-            or str(g) == "[Broadcast{Add}(x, y), Broadcast{Mul}{0: 0}(x, y)]"
+        self.failUnless(str(g) == "[Broadcast{Add}{0: 0}(x, y), Broadcast{Mul}(x, y)]" \
+            or str(g) == "[Broadcast{Add}(x, y), Broadcast{Mul}{0: 0}(x, y)]")

    def test_user_inplace(self):
        x, y, z = inputs()
        e0 = x + y
        e1 = tensor.mul_inplace(x, y)
        g = Env([x, y], [e0, e1])
-        assert str(g) == "[Broadcast{Add}(x, y), Broadcast{Mul}{0: 0}(x, y)]"
+        self.failUnless(str(g) == "[Broadcast{Add}(x, y), Broadcast{Mul}{0: 0}(x, y)]")
        inplace_optimizer.optimize(g)
-        assert str(g) == "[Broadcast{Add}(x, y), Broadcast{Mul}{0: 0}(x, y)]"
+        self.failUnless(str(g) == "[Broadcast{Add}(x, y), Broadcast{Mul}{0: 0}(x, y)]")

    def test_inplace_on_second_argument(self):
        x, y, z = inputs()
        e0 = x + y
        e1 = tensor.mul_inplace(x, z)
        g = Env([x, y], [e0, e1])
-        assert str(g) == "[Broadcast{Add}(x, y), Broadcast{Mul}{0: 0}(x, z)]"
+        self.failUnless(str(g) == "[Broadcast{Add}(x, y), Broadcast{Mul}{0: 0}(x, z)]")
        inplace_optimizer.optimize(g)
-        assert str(g) == "[Broadcast{Add}{0: 1}(x, y), Broadcast{Mul}{0: 0}(x, z)]"
+        self.failUnless(str(g) == "[Broadcast{Add}{0: 1}(x, y), Broadcast{Mul}{0: 0}(x, z)]")


 class _test_dimshuffle_lift(unittest.TestCase):
@@ -64,23 +64,23 @@ class _test_dimshuffle_lift(unittest.TestCase):
        x, y, z = inputs()
        e = ds(ds(x, (1, 0)), (1, 0))
        g = Env([x], [e])
-        assert str(g) == "[DimShuffle{10}(DimShuffle{10}(x))]"
+        self.failUnless(str(g) == "[InplaceDimShuffle{1,0}(InplaceDimShuffle{1,0}(x))]")
        lift_dimshuffle.optimize(g)
-        assert str(g) == "[x]"
+        self.failUnless(str(g) == "[x]")

    def test_merge2(self):
        x, y, z = inputs()
        e = ds(ds(x, (1, 'x', 0)), (2, 0, 'x', 1))
        g = Env([x], [e])
-        self.failUnless(str(g) == "[DimShuffle{20x1}(DimShuffle{1x0}(x))]", str(g))
+        self.failUnless(str(g) == "[InplaceDimShuffle{2,0,x,1}(InplaceDimShuffle{1,x,0}(x))]", str(g))
        lift_dimshuffle.optimize(g)
-        self.failUnless(str(g) == "[DimShuffle{01xx}(x)]", str(g))
+        self.failUnless(str(g) == "[InplaceDimShuffle{0,1,x,x}(x)]", str(g))

    def test_elim3(self):
        x, y, z = inputs()
        e = ds(ds(ds(x, (0, 'x', 1)), (2, 0, 'x', 1)), (1, 0))
        g = Env([x], [e])
-        self.failUnless(str(g) == "[DimShuffle{10}(DimShuffle{20x1}(DimShuffle{0x1}(x)))]", str(g))
+        self.failUnless(str(g) == "[InplaceDimShuffle{1,0}(InplaceDimShuffle{2,0,x,1}(InplaceDimShuffle{0,x,1}(x)))]", str(g))
        lift_dimshuffle.optimize(g)
        self.failUnless(str(g) == "[x]", str(g))

@@ -88,9 +88,9 @@ class _test_dimshuffle_lift(unittest.TestCase):
        x, y, z = inputs([0]*1, [0]*2, [0]*3)
        e = x + y + z
        g = Env([x, y, z], [e])
-        self.failUnless(str(g) == "[Broadcast{Add}(DimShuffle{x01}(Broadcast{Add}(DimShuffle{x0}(x), y)), z)]", str(g))
+        self.failUnless(str(g) == "[Broadcast{Add}(InplaceDimShuffle{x,0,1}(Broadcast{Add}(InplaceDimShuffle{x,0}(x), y)), z)]", str(g))
        lift_dimshuffle.optimize(g)
-        self.failUnless(str(g) == "[Broadcast{Add}(Broadcast{Add}(DimShuffle{xx0}(x), DimShuffle{x01}(y)), z)]", str(g))
+        self.failUnless(str(g) == "[Broadcast{Add}(Broadcast{Add}(InplaceDimShuffle{x,x,0}(x), InplaceDimShuffle{x,0,1}(y)), z)]", str(g))


 class _test_cliques(unittest.TestCase):
@@ -103,10 +103,10 @@ class _test_cliques(unittest.TestCase):
        e = x + y + d
        g = Env([x, y, z], [e])
        cliques = find_cliques(g)
-        assert len(cliques) == 2
+        self.failUnless(len(cliques) == 2)
        (i1, o1), (i2, o2) = cliques
-        assert str(Env(i1, o1)) == "[Broadcast{Add}(Broadcast{Add}(x, y), d)]"
-        assert str(Env(i2, o2)) == "[Broadcast{Mul}(y, z)]"
+        self.failUnless(str(Env(i1, o1)) == "[Broadcast{Add}(Broadcast{Add}(x, y), d)]")
+        self.failUnless(str(Env(i2, o2)) == "[Broadcast{Mul}(y, z)]")
 #         print g
 #         for i, o in find_cliques(g):
 #             print "-->", Env(i, [o])
@@ -116,8 +116,8 @@ class _test_cliques(unittest.TestCase):
        e = x + y + z
        g = Env([x, y, z], [e])
        lift_dimshuffle.optimize(g)
-        assert len(find_cliques(g, through_broadcast = True)) == 1
-        assert len(find_cliques(g, through_broadcast = False)) == 2
+        self.failUnless(len(find_cliques(g, through_broadcast = True)) == 1)
+        self.failUnless(len(find_cliques(g, through_broadcast = False)) == 2)
 #         print g
 #         for i, o in find_cliques(g, True):
 #             print "-->", Env(i, [o])

--- a/elemwise.py
+++ b/elemwise.py
@@ -9,6 +9,9 @@ import gof
 from gof.python25 import all


+# tensor depends on elemwise to provide definitions for several ops
+# but elemwise needs to make Tensor instances, so we have these as
+# placeholders and the tensor module fills them
 def as_tensor(data):
    raise Exception("Circular dependencies prevent using this here. import tensor before elemwise")

@@ -30,11 +33,11 @@ class DimShuffle(Op):
    """
    Usage: DimShuffle(new_order, inplace = True)

-    * new_order: a list representing the relationship between the
-                 input's dimensions and the output's dimensions. Each
-                 element of the list can either be an index or 'x'.
-    * inplace: if True, the output will be a view of the input.
-               If False, the output will be a copy of the input.
+     - new_order: a list representing the relationship between the
+                  input's dimensions and the output's dimensions. Each
+                  element of the list can either be an index or 'x'.
+     - inplace: if True, the output will be a view of the input.
+                If False, the output will be a copy of the input.

    If j = new_order[i] is an index, the output's ith dimension
      will be the input's jth dimension.
@@ -47,6 +50,7 @@ class DimShuffle(Op):

    Examples:
      # t<n> represents a n-d tensor
+      DimShuffle(t0, ['x']) -> make a 0d (scalar) into a 1d vector
      DimShuffle(t2, [0, 1]) -> identity
      DimShuffle(t2, [1, 0]) -> inverts the first and second dimensions
      DimShuffle(t1, ['x', 0]) -> make a row out of a 1d vector
@@ -54,6 +58,8 @@ class DimShuffle(Op):
      DimShuffle(t3, [2, 0, 1]) -> like doing t3.transpose((2, 0, 1)) in numpy
      DimShuffle(t2, [0, 'x', 1]) -> like doing t3.reshape((t3.shape[0], 1, t3.shape[1])) in numpy
      DimShuffle(t2, [1, 'x', 0]) -> like doing t3.T.reshape((t3.shape[0], 1, t3.shape[1])) in numpy
+
+    @todo: Default value for inplace should be False! Unsafe optimizations should be explicitly enabled.
    """
    
    def __init__(self, input_broadcastable, new_order, inplace = True):
@@ -113,7 +119,10 @@ class DimShuffle(Op):
        return hash(self.inplace) ^ hash(self.new_order) ^ hash(self.input_broadcastable)

    def __str__(self):
-        return "DimShuffle{%s}" % "".join(str(x) for x in self.new_order)
+        if self.inplace:
+            return "InplaceDimShuffle{%s}" % ",".join(str(x) for x in self.new_order)
+        else:
+            return "DimShuffle{%s}" % ",".join(str(x) for x in self.new_order)

    def perform(self, node, (input, ), (storage, )):
        # drop

--- a/gof/_test_graph.py
+++ b/gof/_test_graph.py

-
+from collections import deque
 import unittest
 from graph import *

@@ -7,6 +7,30 @@ from op import Op
 from type import Type
 from graph import Result

+def inputs(result_list):
+    """
+    @type result_list: list of L{Result}
+    @param result_list: output L{Result}s (from which to search backward through owners)
+    @returns: the list of L{Result}s with no owner, in the order found by a
+    left-recursive depth-first search started at the L{Result}s in result_list.
+
+    """
+    def expand(r):
+        if r.owner:
+            l = list(r.owner.inputs)
+            l.reverse()
+            return l
+    dfs_results = stack_search(deque(result_list), expand, 'dfs')
+    rval = [r for r in dfs_results if r.owner is None]
+    #print rval, _orig_inputs(o)
+    return rval
+
+if 1:
+    testcase = unittest.TestCase
+else:
+    testcase = object
+    realtestcase = unittest.TestCase
+


 class MyType(Type):
@@ -18,10 +42,10 @@ class MyType(Type):
        return isinstance(other, MyType) and other.thingy == self.thingy

    def __str__(self):
-        return str(self.thingy)
+        return 'R%s' % str(self.thingy)

    def __repr__(self):
-        return str(self.thingy)
+        return 'R%s' % str(self.thingy)

 def MyResult(thingy):
    return Result(MyType(thingy), None, None)
@@ -75,43 +99,44 @@ MyOp = MyOp()
 #         self.outputs = [MyResult(sum([input.thingy for input in inputs]))]


-class _test_inputs(unittest.TestCase):
+class _test_inputs(testcase):

    def test_straightforward(self):
        r1, r2 = MyResult(1), MyResult(2)
        node = MyOp.make_node(r1, r2)
-        assert inputs(node.outputs) == set([r1, r2])
+        assert inputs(node.outputs) == [r1, r2]

    def test_deep(self):
        r1, r2, r5 = MyResult(1), MyResult(2), MyResult(5)
        node = MyOp.make_node(r1, r2)
        node2 = MyOp.make_node(node.outputs[0], r5)
-        assert inputs(node2.outputs) == set([r1, r2, r5])
+        i = inputs(node2.outputs)
+        self.failUnless(i == [r1, r2, r5], i)

 #     def test_unreached_inputs(self):
 #         r1, r2, r5 = MyResult(1), MyResult(2), MyResult(5)
-#         node = MyOp.make_node(r1, r2)
-#         node2 = MyOp.make_node(node.outputs[0], r5)
+#         op = MyOp(r1, r2)
+#         op2 = MyOp(op.outputs[0], r5)
 #         try:
 #             # function doesn't raise if we put False instead of True
-#             ro = results_and_orphans([r1, r2, node2.outputs[0]], node.outputs, True)
-#             self.fail()
+#             ro = results_and_orphans([r1, r2, op2.outputs[0]], op.outputs, True)
 #         except Exception, e:
 #             if e[0] is results_and_orphans.E_unreached:
 #                 return
-#             raise
+#         self.fail()


-class _test_orphans(unittest.TestCase):
+class _test_orphans(testcase):

    def test_straightforward(self):
        r1, r2, r5 = MyResult(1), MyResult(2), MyResult(5)
        node = MyOp.make_node(r1, r2)
        node2 = MyOp.make_node(node.outputs[0], r5)
-        assert orphans([r1, r2], node2.outputs) == set([r5])
+        orph = orphans([r1, r2], node2.outputs)
+        self.failUnless(orph == [r5], orph)
    

-class _test_as_string(unittest.TestCase):
+class _test_as_string(testcase):

    leaf_formatter = lambda self, leaf: str(leaf.type)
    node_formatter = lambda self, node, argstrings: "%s(%s)" % (node.op,
@@ -125,29 +150,31 @@ class _test_as_string(unittest.TestCase):
    def test_straightforward(self):
        r1, r2 = MyResult(1), MyResult(2)
        node = MyOp.make_node(r1, r2)
-        assert self.str([r1, r2], node.outputs) == ["MyOp(1, 2)"]
+        s = self.str([r1, r2], node.outputs)
+        self.failUnless(s == ["MyOp(R1, R2)"], s)

    def test_deep(self):
        r1, r2, r5 = MyResult(1), MyResult(2), MyResult(5)
        node = MyOp.make_node(r1, r2)
        node2 = MyOp.make_node(node.outputs[0], r5)
-        assert self.str([r1, r2, r5], node2.outputs) == ["MyOp(MyOp(1, 2), 5)"]
+        s = self.str([r1, r2, r5], node2.outputs)
+        self.failUnless(s == ["MyOp(MyOp(R1, R2), R5)"], s)

    def test_multiple_references(self):
        r1, r2, r5 = MyResult(1), MyResult(2), MyResult(5)
        node = MyOp.make_node(r1, r2)
        node2 = MyOp.make_node(node.outputs[0], node.outputs[0])
-        assert self.str([r1, r2, r5], node2.outputs) == ["MyOp(*1 -> MyOp(1, 2), *1)"]
+        assert self.str([r1, r2, r5], node2.outputs) == ["MyOp(*1 -> MyOp(R1, R2), *1)"]

    def test_cutoff(self):
        r1, r2, r5 = MyResult(1), MyResult(2), MyResult(5)
        node = MyOp.make_node(r1, r2)
        node2 = MyOp.make_node(node.outputs[0], node.outputs[0])
-        assert self.str(node.outputs, node2.outputs) == ["MyOp(3, 3)"]
-        assert self.str(node2.inputs, node2.outputs) == ["MyOp(3, 3)"]
+        assert self.str(node.outputs, node2.outputs) == ["MyOp(R3, R3)"]
+        assert self.str(node2.inputs, node2.outputs) == ["MyOp(R3, R3)"]


-class _test_clone(unittest.TestCase):
+class _test_clone(testcase):

    leaf_formatter = lambda self, leaf: str(leaf.type)
    node_formatter = lambda self, node, argstrings: "%s(%s)" % (node.op,
@@ -162,7 +189,7 @@ class _test_clone(unittest.TestCase):
        r1, r2 = MyResult(1), MyResult(2)
        node = MyOp.make_node(r1, r2)
        _, new = clone([r1, r2], node.outputs, False)
-        assert self.str([r1, r2], new) == ["MyOp(1, 2)"]
+        assert self.str([r1, r2], new) == ["MyOp(R1, R2)"]

    def test_copy(self):
        r1, r2, r5 = MyResult(1), MyResult(2), MyResult(5)
@@ -181,14 +208,89 @@ class _test_clone(unittest.TestCase):
        _, new = clone([r1, r2, r5], node.outputs, False)
        new_node = new[0].owner
        new_node.inputs = MyResult(7), MyResult(8)
+        assert self.str(inputs(new_node.outputs), new_node.outputs) == ["MyOp(R7, R8)"]
+        assert self.str(inputs(node.outputs), node.outputs) == ["MyOp(MyOp(R1, R2), R5)"]
+
+def prenode(obj):
+    if isinstance(obj, Result): 
+        if obj.owner:
+            return [obj.owner]
+    if isinstance(obj, Op):
+        return obj.inputs
+
+class _test_toposort(testcase):
+    def test0(self):
+        """Test a simple graph"""
+        r1, r2, r5 = MyResult(1), MyResult(2), MyResult(5)
+        o = MyOp(r1, r2)
+        o2 = MyOp(o.outputs[0], r5)

-        assert self.str(inputs(new_node.outputs), new_node.outputs) == ["MyOp(7, 8)"]
-        assert self.str(inputs(node.outputs), node.outputs) == ["MyOp(MyOp(1, 2), 5)"]
+        all = general_toposort(o2.outputs, prenode)
+        self.failUnless(all == [r5, r2, r1, o, o.outputs[0], o2, o2.outputs[0]], all)

+        all = io_toposort([r5], o2.outputs)
+        self.failUnless(all == [o, o2], all)

+    def test1(self):
+        """Test a graph with double dependencies"""
+        r1, r2, r5 = MyResult(1), MyResult(2), MyResult(5)
+        o = MyOp(r1, r1)
+        o2 = MyOp(o.outputs[0], r5)
+        all = general_toposort(o2.outputs, prenode)
+        self.failUnless(all == [r5, r1, o, o.outputs[0], o2, o2.outputs[0]], all)

-if __name__ == '__main__':
-    unittest.main()
+    def test2(self):
+        """Test a graph where the inputs have owners"""
+        r1, r2, r5 = MyResult(1), MyResult(2), MyResult(5)
+        o = MyOp(r1, r1)
+        r2b = o.outputs[0]
+        o2 = MyOp(r2b, r2b)
+        all = io_toposort([r2b], o2.outputs)
+        self.failUnless(all == [o2], all)
+
+        o2 = MyOp(r2b, r5)
+        all = io_toposort([r2b], o2.outputs)
+        self.failUnless(all == [o2], all)
+
+    def test3(self):
+        """Test a graph which is not connected"""
+        r1, r2, r3, r4 = MyResult(1), MyResult(2), MyResult(3), MyResult(4)
+        o0 = MyOp(r1, r2)
+        o1 = MyOp(r3, r4)
+        all = io_toposort([r1, r2, r3, r4], o0.outputs + o1.outputs)
+        self.failUnless(all == [o1,o0], all)
+
+    def test4(self):
+        """Test inputs and outputs mixed together in a chain graph"""
+        r1, r2, r3, r4 = MyResult(1), MyResult(2), MyResult(3), MyResult(4)
+        o0 = MyOp(r1, r2)
+        o1 = MyOp(o0.outputs[0], r1)
+        all = io_toposort([r1, o0.outputs[0]], [o0.outputs[0], o1.outputs[0]])
+        self.failUnless(all == [o1], all)
+
+    def test5(self):
+        """Test when outputs have clients"""
+        r1, r2, r3, r4 = MyResult(1), MyResult(2), MyResult(3), MyResult(4)
+        o0 = MyOp(r1, r2)
+        o1 = MyOp(o0.outputs[0], r4)
+        all = io_toposort([], o0.outputs)
+        self.failUnless(all == [o0], all)



+if __name__ == '__main__':
+    if 1:
+        #run all tests
+        unittest.main()
+    elif 1:
+        #load some TestCase classes
+        suite = unittest.TestLoader()
+        suite = suite.loadTestsFromTestCase(_test_toposort)
+
+        #run just some of them
+        unittest.TextTestRunner(verbosity=2).run(suite)
+
+    else:
+        #run just a single test
+        _test_toposort('test0').debug()
+
--- a/gof/graph.py
+++ b/gof/graph.py

 from copy import copy
+from collections import deque

 import utils
 from utils import object2
@@ -160,8 +161,6 @@ def as_apply(x):
        return node
    else:
        raise TypeError("Cannot map %s to Apply" % x)
-    
-

 @deprecated
 def inputs(o):
@@ -184,55 +183,105 @@ def inputs(o):
        seek(output)
    return results

+def stack_search(start, expand, mode='bfs', build_inv = False):
+    """Search through L{Result}s, either breadth- or depth-first
+    @type start: deque
+    @param start: search from these nodes
+    @type explore: function
+    @param explore: when we get to a node, add explore(node) to the list of
+                    nodes to visit.  This function should return a list, or None
+    @rtype: list of L{Result}
+    @return: the list of L{Result}s in order of traversal.
+    
+    @note: a L{Result} will appear at most once in the return value, even if it
+    appears multiple times in the start parameter.  
+
+    @postcondition: every element of start is transferred to the returned list.
+    @postcondition: start is empty.
+    """

-# def results_and_orphans(i, o, except_unreachable_input=False):
-#     """
-#     @type i: list
-#     @param i: input L{Result}s
-#     @type o: list
-#     @param o: output L{Result}s
-
-#     Returns the pair (results, orphans). The former is the set of
-#     L{Result}s that are involved in the subgraph that lies between i and
-#     o. This includes i, o, orphans(i, o) and all results of all
-#     intermediary steps from i to o. The second element of the returned
-#     pair is orphans(i, o).
-#     """
-#     results = set()
-#     i = set(i)
-# #    results.update(i)
-#     incomplete_paths = []
-#     reached = set()
-
-#     def helper(r, path):
-#         if r in i:
-#             reached.add(r)
-#             results.update(path)
-#         elif r.owner is None:
-#             incomplete_paths.append(path)
-#         else:
-#             op = r.owner
-#             for r2 in op.inputs:
-#                 helper(r2, path + [r2])
-
-#     for output in o:
-#         helper(output, [output])
-
-#     orphans = set()
-#     for path in incomplete_paths:
-#         for r in path:
-#             if r not in results:
-#                 orphans.add(r)
-#                 break
-
-#     if except_unreachable_input and len(i) != len(reached):
-#         raise Exception(results_and_orphans.E_unreached)
-
-#     results.update(orphans)
+    if mode not in ('bfs', 'dfs'):
+        raise ValueError('mode should be bfs or dfs', mode)
+    rval_set = set()
+    rval_list = list()
+    start_pop = start.popleft if mode is 'bfs' else start.pop
+    expand_inv = {}
+    while start:
+        l = start_pop()
+        if id(l) not in rval_set:
+            rval_list.append(l)
+            rval_set.add(id(l))
+            expand_l = expand(l)
+            if expand_l:
+                if build_inv:
+                    for r in expand_l:
+                        expand_inv.setdefault(r, []).append(l)
+                start.extend(expand_l)
+    assert len(rval_list) == len(rval_set)
+    if build_inv:
+        return rval_list, expand_inv
+    return rval_list
+
+
+@utils.deprecated('gof.graph', 'is this function ever used?')
+def inputs(result_list):
+    """
+    @type result_list: list of L{Result}
+    @param result_list: output L{Result}s (from which to search backward through owners)
+    @returns: the list of L{Result}s with no owner, in the order found by a
+    left-recursive depth-first search started at the L{Result}s in result_list.

+    """
+    def expand(r):
+        if r.owner:
+            l = list(r.owner.inputs)
+            l.reverse()
+            return l
+    dfs_results = stack_search(deque(result_list), expand, 'dfs')
+    rval = [r for r in dfs_results if r.owner is None]
+    #print rval, _orig_inputs(o)
+    return rval
+
+
+# def results_and_orphans(r_in, r_out, except_unreachable_input=False):
+#     r_in_set = set(r_in)
+#     class Dummy(object): pass
+#     dummy = Dummy()
+#     dummy.inputs = r_out
+#     def expand_inputs(io):
+#         if io in r_in_set:
+#             return None
+#         try:
+#             return [io.owner] if io.owner != None else None
+#         except AttributeError:
+#             return io.inputs
+#     ops_and_results, dfsinv = stack_search(
+#             deque([dummy]),
+#             expand_inputs, 'dfs', True)
+
+#     if except_unreachable_input:
+#         for r in r_in:
+#             if r not in dfsinv:
+#                 raise Exception(results_and_orphans.E_unreached)
+#     clients = stack_search(
+#             deque(r_in), 
+#             lambda io: dfsinv.get(io,None), 'dfs')
+    
+#     ops_to_compute = [o for o in clients if is_op(o) and o is not dummy]
+#     results = []
+#     for o in ops_to_compute:
+#         results.extend(o.inputs)
+#     results.extend(r_out)
+
+#     op_set = set(ops_to_compute)
+#     assert len(ops_to_compute) == len(op_set)
+#     orphans = [r for r in results \
+#             if (r.owner not in op_set) and (r not in r_in_set)]
 #     return results, orphans
+
 # results_and_orphans.E_unreached = 'there were unreachable inputs'

+
 def results_and_orphans(i, o):
    results = set()
    orphans = set()
@@ -249,7 +298,6 @@ def results_and_orphans(i, o):
    for output in o:
        helper(output)
    return results, orphans
-    


 def ops(i, o):
@@ -370,61 +418,70 @@ def clone_get_equiv(i, o, copy_inputs_and_orphans = False):

    return d

-#     d = {}
-
-#     for input in i:
-#         if copy_inputs_and_orphans:
-#             d[input] = copy(input)
-#         else:
-#             d[input] = input
-
-#     def clone_helper(result):
-#         if result in d:
-#             return d[result]
-#         op = result.owner
-#         if not op: # result is an orphan
-#             if copy_inputs_and_orphans:
-#                 d[result] = copy(result)
-#             else:
-#                 d[result] = result
-#             return d[result]
-#         else:
-#             new_op = op.clone_with_new_inputs(*[clone_helper(input) for input in op.inputs])
-#             d[op] = new_op
-#             for output, new_output in zip(op.outputs, new_op.outputs):
-#                 d[output] = new_output
-#             return d[result]
-
-#     for output in o:
-#         clone_helper(output)
-
-#     return d
+def general_toposort(r_out, deps):
+    """
+    @note: deps(i) should behave like a pure function (no funny business with
+    internal state)
+
+    @note: deps(i) can/should be cached by the deps function to be fast
+    """
+    deps_cache = {}
+    def _deps(io):
+        if io not in deps_cache:
+            d = deps(io)
+            if d:
+                deps_cache[io] = list(d)
+            else:
+                deps_cache[io] = d
+            return d
+        else:
+            return deps_cache[io]
+
+    assert isinstance(r_out, (tuple, list, deque))
+
+    reachable, clients = stack_search( deque(r_out), _deps, 'dfs', True)
+    sources = deque([r for r in reachable if not deps_cache.get(r, None)])
+
+    rset = set()
+    rlist = []
+    while sources:
+        node = sources.popleft()
+        if node not in rset:
+            rlist.append(node)
+            rset.add(node)
+            for client in clients.get(node, []):
+                deps_cache[client] = [a for a in deps_cache[client] if a is not node]
+                if not deps_cache[client]:
+                    sources.append(client)
+
+    if len(rlist) != len(reachable):
+        print ''
+        print reachable
+        print rlist
+
+        raise 'failed to complete topological sort of given nodes'
+
+    return rlist


 def io_toposort(i, o, orderings = {}):
-    """
-    @type i: list
-    @param i: input L{Result}s
-    @type o: list
-    @param o: output L{Result}s
-    @param orderings: {op: [requirements for op]} (defaults to {})
+    iset = set(i)
+    def deps(obj):
+        rval = []
+        if obj not in iset:
+            if isinstance(obj, result.Result): 
+                if obj.owner:
+                    rval = [obj.owner]
+            if isinstance(obj, op.Op):
+                rval = list(obj.inputs)
+            rval.extend(orderings.get(obj, []))
+        else:
+            assert not orderings.get(obj, [])
+        return rval
+    topo = general_toposort(o, deps)
+    return [o for o in topo if isinstance(o, op.Op)]

-    @rtype: ordered list
-    @return: L{Op}s that belong in the subgraph between i and o which
-    respects the following constraints:
-     - all inputs in i are assumed to be already computed
-     - the L{Op}s that compute an L{Op}'s inputs must be computed before it
-     - the orderings specified in the optional orderings parameter must be satisfied

-    Note that this function does not take into account ordering information
-    related to destructive operations or other special behavior.
-    """
-    prereqs_d = copy(orderings)
-    all = ops(i, o)
-    for op in all:
-        asdf = set([input.owner for input in op.inputs if input.owner and input.owner in all])
-        prereqs_d.setdefault(op, set()).update(asdf)
-    return utils.toposort(prereqs_d)


 default_leaf_formatter = str
@@ -459,6 +516,8 @@ def as_string(i, o,
    exist for viewing convenience).
    """

+    i = set(i)
+
    orph = orphans(i, o)
    
    multi = set()
@@ -546,4 +605,82 @@ class Graph:



+if 0:
+    #these were the old implementations
+    # they were replaced out of a desire that graph search routines would not
+    # depend on the hash or id of any node, so that it would be deterministic
+    # and consistent between program executions.
+    @utils.deprecated('gof.graph', 'preserving only for review')
+    def _results_and_orphans(i, o, except_unreachable_input=False):
+        """
+        @type i: list
+        @param i: input L{Result}s
+        @type o: list
+        @param o: output L{Result}s
+
+        Returns the pair (results, orphans). The former is the set of
+        L{Result}s that are involved in the subgraph that lies between i and
+        o. This includes i, o, orphans(i, o) and all results of all
+        intermediary steps from i to o. The second element of the returned
+        pair is orphans(i, o).
+        """
+        results = set()
+        i = set(i)
+        results.update(i)
+        incomplete_paths = []
+        reached = set()
+
+        def helper(r, path):
+            if r in i:
+                reached.add(r)
+                results.update(path)
+            elif r.owner is None:
+                incomplete_paths.append(path)
+            else:
+                op = r.owner
+                for r2 in op.inputs:
+                    helper(r2, path + [r2])
+
+        for output in o:
+            helper(output, [output])
+
+        orphans = set()
+        for path in incomplete_paths:
+            for r in path:
+                if r not in results:
+                    orphans.add(r)
+                    break
+
+        if except_unreachable_input and len(i) != len(reached):
+            raise Exception(results_and_orphans.E_unreached)
+
+        results.update(orphans)
+
+        return results, orphans
+
+
+    def _io_toposort(i, o, orderings = {}):
+        """
+        @type i: list
+        @param i: input L{Result}s
+        @type o: list
+        @param o: output L{Result}s
+        @param orderings: {op: [requirements for op]} (defaults to {})
+
+        @rtype: ordered list
+        @return: L{Op}s that belong in the subgraph between i and o which
+        respects the following constraints:
+         - all inputs in i are assumed to be already computed
+         - the L{Op}s that compute an L{Op}'s inputs must be computed before it
+         - the orderings specified in the optional orderings parameter must be satisfied
+
+        Note that this function does not take into account ordering information
+        related to destructive operations or other special behavior.
+        """
+        prereqs_d = copy(orderings)
+        all = ops(i, o)
+        for op in all:
+            asdf = set([input.owner for input in op.inputs if input.owner and input.owner in all])
+            prereqs_d.setdefault(op, set()).update(asdf)
+        return utils.toposort(prereqs_d)

--- a/gof/op.py
+++ b/gof/op.py
@@ -35,13 +35,14 @@ class Op(object2):
    # Python implementation #
    #########################

-    def impl(self, node, inputs, output_storage):
+    def perform(self, node, inputs, output_storage):
        """
        Calculate the function on the inputs and put the results in the
        output storage.

        - inputs: sequence of inputs (immutable)
-        - outputs: mutable list
+        - output_storage: list of mutable 1-element lists (do not change
+                          the length of these lists)

        The output_storage list might contain data. If an element of
        output_storage is not None, it is guaranteed that it was produced
@@ -50,36 +51,10 @@ class Op(object2):
        """
        raise AbstractFunctionError()

-
    #####################
    # C code generation #
    #####################

-#     def c_validate_update(self, inputs, outputs, sub):
-#         """
-#         Returns templated C code that checks that the inputs to this
-#         function can be worked on. If a failure occurs, set an
-#         Exception and insert "%(fail)s".
-        
-#         You may use the variable names defined by c_var_names() in
-#         the template.
-
-#         Note: deprecated!!
-#         @todo: Merge this with c_code.
-#         """
-#         raise AbstractFunctionError()
-
-#     def c_validate_update_cleanup(self, inputs, outputs, sub):
-#         """
-#         Clean up things allocated by L{c_validate}().
-
-#         Note: deprecated!! 
-#         @todo: Merge this with c_code.
-#         """
-#         raise AbstractFunctionError()
-#         raise AbstractFunctionError('%s.c_validate_update_cleanup ' \
-#                 % self.__class__.__name__)
-
    def c_code(self, node, name, inputs, outputs, sub):
        """Return the C implementation of an Op.

@@ -151,28 +126,3 @@ class PropertiedOp(Op):
            return "%s{%s}" % (self.__class__.__name__, ", ".join("%s=%s" % (k, v) for k, v in self.__dict__.items() if k != "name"))


-
-
-# #TODO: consider adding a flag to the base class that toggles this behaviour
-# class GuardedOp(Op):
-#     """An Op that disallows input properties to change after construction"""
-
-#     def set_input(self, i, new):
-#         old = self._inputs[i]
-#         if old is new:
-#             return
-#         try:
-#             if not old.same_properties(new):
-#                 raise TypeError("The new input must have the same properties as the previous one.")
-#         except AbstractFunctionError:
-#             pass
-#         Op.set_input(self, i, new)
-
-#     def set_inputs(self, new):
-#         if not hasattr(self, '_inputs') or self_inputs is None:
-#             Op.set_inputs(self, new)
-#         else:
-#             if not len(new) == len(self._inputs):
-#                 raise TypeError("The new inputs are not as many as the previous ones.")
-#             for i, new in enumerate(new):
-#                 self.set_input(i, new)
--- a/gof/utils.py
+++ b/gof/utils.py
@@ -38,6 +38,31 @@ class scratchpad:



+def deprecated(filename, msg=''):
+    """Decorator which will print a warning message on the first call.
+    
+    Use it like this:
+
+    @deprecated('myfile', 'do something different...')
+    def fn_name(...)
+        ...
+
+    And it will print
+
+    WARNING myfile.fn_name deprecated. do something different...
+
+    """
+    def _deprecated(f):
+        printme = [True]
+        def g(*args, **kwargs):
+            if printme[0]:
+                print 'WARNING: %s.%s deprecated. %s'\
+                        % (filename, f.__name__, msg)
+                printme[0] = False
+            return f(*args, **kwargs)
+        return g
+    return _deprecated
+
 def uniq(seq):
    #TODO: consider building a set out of seq so that the if condition is constant time -JB
    return [x for i, x in enumerate(seq) if seq.index(x) == i]
@@ -55,6 +80,7 @@ def difference(seq1, seq2):
        # -> use O(len(seq1) * len(seq2)) algo
        return [x for x in seq1 if x not in seq2]

+
 def partition(f, seq):
    seqt = []
    seqf = []

--- a/sparse.py
+++ b/sparse.py
@@ -368,368 +368,3 @@ def dot(x, y, grad_preserves_dense=True):
    else:
        assert y_is_sparse_result
        return transpose(Dot(grad_preserves_dense)(y.T, x.T))
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-# """
-# Classes for handling sparse matrices.
-
-# To read about different sparse formats, see U{http://www-users.cs.umn.edu/~saad/software/SPARSKIT/paper.ps}.
-
-# @todo: Automatic methods for determining best sparse format?
-# """
-
-# import copy #for __copy__
-# import numpy
-# from scipy import sparse
-
-# import gof.op, gof.result
-# import tensor
-
-
-# """ Types of sparse matrices to use for testing """
-# _mtypes = [sparse.csc_matrix, sparse.csr_matrix]
-# #_mtypes = [sparse.csc_matrix, sparse.csr_matrix, sparse.dok_matrix, sparse.lil_matrix, sparse.coo_matrix]
-# _mtype_to_str = {sparse.csc_matrix: "csc", sparse.csr_matrix: "csr"}
-
-
-# ## Type checking
-
-# def _is_sparse_result(x):
-#     """
-#     @rtype: boolean
-#     @return: True iff x is a L{SparseResult} (and not a L{tensor.Tensor})
-#     """
-#     if not isinstance(x, SparseResult) and not isinstance(x, tensor.Tensor):
-#         raise NotImplementedError("_is_sparse should only be called on sparse.SparseResult or tensor.Tensor, not,", x)
-#     return isinstance(x, SparseResult)
-# def _is_dense_result(x):
-#     """
-#     @rtype: boolean
-#     @return: True unless x is a L{SparseResult} (and not a L{tensor.Tensor})
-#     """
-#     if not isinstance(x, SparseResult) and not isinstance(x, tensor.Tensor):
-#         raise NotImplementedError("_is_sparse should only be called on sparse.SparseResult or tensor.Tensor, not,", x)
-#     return isinstance(x, tensor.Tensor)
-
-# def _is_sparse(x):
-#     """
-#     @rtype: boolean
-#     @return: True iff x is a L{scipy.sparse.spmatrix} (and not a L{numpy.ndarray})
-#     """
-#     if not isinstance(x, sparse.spmatrix) and not isinstance(x, numpy.ndarray):
-#         raise NotImplementedError("_is_sparse should only be called on sparse.scipy.sparse.spmatrix or numpy.ndarray, not,", x)
-#     return isinstance(x, sparse.spmatrix)
-# def _is_dense(x):
-#     """
-#     @rtype: boolean
-#     @return: True unless x is a L{scipy.sparse.spmatrix} (and not a L{numpy.ndarray})
-#     """
-#     if not isinstance(x, sparse.spmatrix) and not isinstance(x, numpy.ndarray):
-#         raise NotImplementedError("_is_sparse should only be called on sparse.scipy.sparse.spmatrix or numpy.ndarray, not,", x)
-#     return isinstance(x, numpy.ndarray)
-
-
-
-# # Wrapper type
-
-# def assparse(sp, **kwargs):
-#     """
-#     Wrapper around SparseResult constructor.
-#     @param sp:  A sparse matrix. assparse reads dtype and format properties
-#                 out of this sparse matrix.
-#     @return:    SparseResult version of sp.
-
-#     @todo Verify that sp is sufficiently sparse, and raise a warning if it is not
-#     """
-#     if isinstance(sp, SparseResult):
-#         rval = sp
-#     else:
-#         # @todo Verify that sp is sufficiently sparse, and raise a
-#         # warning if it is not
-#         rval = SparseResult(str(sp.dtype), sp.format, **kwargs)
-#         rval.data = sp
-#     assert _is_sparse_result(rval)
-#     return rval
-
-# class SparseResult(gof.result.Result):
-#     """
-#     @type _dtype: numpy dtype string such as 'int64' or 'float64' (among others)
-#     @type _format: string
-#     @ivar _format: The sparse storage strategy.
-
-#     @note As far as I can tell, L{scipy.sparse} objects must be matrices, i.e. have dimension 2.
-#     """
-#     format_cls = {
-#             'csr' : sparse.csr_matrix,
-#             'csc' : sparse.csc_matrix
-#             }
-#     dtype_set = set(['int', 'int32', 'int64', 'float32', 'float64'])
-
-#     def __init__(self, dtype, format, **kwargs):
-#         """
-#         Fundamental way to create a sparse node.
-#         @param dtype:   Type of numbers in the matrix.
-#         @param format:  The sparse storage strategy.
-#         @return         An empty SparseResult instance.
-#         """
-
-#         gof.Result.__init__(self, **kwargs)
-#         if dtype in SparseResult.dtype_set:
-#             self._dtype = dtype
-#         assert isinstance(format, str)
-
-#         #print format, type(format), SparseResult.format_cls.keys(), format in SparseResult.format_cls
-#         if format in SparseResult.format_cls:
-#             self._format = format
-#         else:
-#             raise NotImplementedError('unsupported format "%s" not in list' % format, SparseResult.format_cls.keys())
-
-#     def filter(self, value):
-#         if isinstance(value, SparseResult.format_cls[self.format])\
-#                 and value.dtype == self.dtype:
-#                     return value
-#         #print 'pass-through failed', type(value)
-#         sp = SparseResult.format_cls[self.format](value)
-#         if str(sp.dtype) != self.dtype:
-#             raise NotImplementedError()
-#         if sp.format != self.format:
-#             raise NotImplementedError()
-#         return sp
-
-#     def __copy__(self):
-#         if self.name is not None:
-#             rval = SparseResult(self._dtype, self._format, name=self.name)
-#         else:
-#             rval = SparseResult(self._dtype, self._format)
-#         rval.data = copy.copy(self.data)
-#         return rval
-
-
-#     dtype = property(lambda self: self._dtype)
-#     format = property(lambda self: self._format)
-#     T = property(lambda self: transpose(self), doc = "Return aliased transpose of self (read-only)")
-
-
-#     def __add__(left, right): return add(left, right)
-#     def __radd__(right, left): return add(left, right)
-
-# #
-# # Conversion
-# #
-
-# # convert a sparse matrix to an ndarray
-# class DenseFromSparse(gof.op.Op):
-#     def __init__(self, x, **kwargs):
-#         gof.op.Op.__init__(self, **kwargs)
-#         self.inputs = [assparse(x)]
-#         self.outputs = [tensor.Tensor(x.dtype,[0,0])]
-#     def impl(self, x):
-#         assert _is_sparse(x)
-#         return numpy.asarray(x.todense())
-#     def grad(self, (x,), (gz,)):
-#         assert _is_sparse_result(x) and _is_dense_result(gz)
-#         return sparse_from_dense(gz, x.format),
-# dense_from_sparse = gof.op.constructor(DenseFromSparse)
-
-# class SparseFromDense(gof.op.Op):
-#     def __init__(self, x, format, **kwargs):
-#         gof.op.Op.__init__(self, **kwargs)
-#         if isinstance(format, gof.result.Result):
-#             self.inputs = [tensor.astensor(x), format]
-#         else:
-#             self.inputs =  [tensor.astensor(x), gof.result.PythonResult()]
-#             self.inputs[1].data = format
-#         self.outputs = [SparseResult(x.dtype, self.inputs[1].data)]
-#     def impl(self, x, fmt):
-#         # this would actually happen anyway when we try to assign to
-#         # self.outputs[0].data, but that seems hackish -JB
-#         assert _is_dense(x)
-#         return SparseResult.format_cls[fmt](x)
-#     def grad(self, (x, fmt), (gz,)):
-#         assert _is_dense_result(x) and _is_sparse_result(gz)
-#         return dense_from_sparse(gz), None
-# sparse_from_dense = gof.op.constructor(SparseFromDense)
-
-# # Linear Algebra
-
-# class Transpose(gof.op.Op):
-#     format_map = {
-#             'csr' : 'csc',
-#             'csc' : 'csr'}
-#     def __init__(self, x, **kwargs):
-#         gof.op.Op.__init__(self, **kwargs)
-#         x = assparse(x)
-#         self.inputs = [x]
-#         self.outputs = [SparseResult(x.dtype, Transpose.format_map[x.format])]
-#     def impl(self, x):
-#         assert _is_sparse(x)
-#         return x.transpose() 
-#     def grad(self, (x,), (gz,)):
-#         assert _is_sparse_result(x) and _is_sparse_result(gz)
-#         return transpose(gz),
-# transpose = gof.op.constructor(Transpose)
-
-# class AddSS(gof.op.Op):
-#     ''' Add two sparse matrices '''
-#     def __init__(self, x, y, **kwargs):
-#         gof.op.Op.__init__(self, **kwargs)
-#         x, y = [assparse(x), assparse(y)]
-#         self.inputs = [x, y]
-#         if x.dtype != y.dtype:
-#             raise NotImplementedError()
-#         if x.format != y.format:
-#             raise NotImplementedError()
-#         self.outputs = [SparseResult(x.dtype, x.format)]
-#     def impl(self, x,y): 
-#         assert _is_sparse(x) and _is_sparse(y)
-#         return x + y
-#     def grad(self, (x, y), (gz,)):
-#         assert _is_sparse_result(x) and _is_sparse_result(y)
-#         assert _is_sparse_result(gz)
-#         return gz, gz
-# add_s_s = gof.op.constructor(AddSS)
-# class AddSD(gof.op.Op):
-#     ''' Add a sparse and a dense matrix '''
-#     def __init__(self, x, y, **kwargs):
-#         gof.op.Op.__init__(self, **kwargs)
-#         x, y = [assparse(x), tensor.astensor(y)]
-#         self.inputs = [x, y]
-#         if x.dtype != y.dtype:
-#             raise NotImplementedError()
-#         # The magic number two here arises because L{scipy.sparse}
-#         # objects must be matrices (have dimension 2)
-#         assert len(y.broadcastable) == 2
-#         self.outputs = [tensor.Tensor(y.dtype, y.broadcastable)]
-#     def impl(self, x,y): 
-#         assert _is_sparse(x) and _is_dense(y)
-#         return x + y
-#     def grad(self, (x, y), (gz,)):
-#         assert _is_sparse_result(x) and _is_dense_result(y)
-#         assert _is_dense_result(gz)
-#         return SparseFromDense(gz), gz
-# add_s_d = gof.op.constructor(AddSD)
-# def add(x,y):
-#     """
-#     Add two matrices, at least one of which is sparse.
-#     """
-#     if hasattr(x, 'getnnz'): x = assparse(x)
-#     if hasattr(y, 'getnnz'): y = assparse(y)
-    
-#     x_is_sparse_result = _is_sparse_result(x)
-#     y_is_sparse_result = _is_sparse_result(y)
-
-#     assert x_is_sparse_result or y_is_sparse_result
-#     if x_is_sparse_result and y_is_sparse_result: return add_s_s(x,y)
-#     elif x_is_sparse_result and not y_is_sparse_result: return add_s_d(x,y)
-#     elif y_is_sparse_result and not x_is_sparse_result: return add_s_d(y,x)
-#     else: raise NotImplementedError()
-
-
-# class Dot(gof.op.Op):
-#     """
-#     Attributes:
-#     grad_preserves_dense - a boolean flags [default: True].
-#     grad_preserves_dense controls whether gradients with respect to inputs
-#     are converted to dense matrices when the corresponding input y is
-#     dense (not in a L{SparseResult} wrapper). This is generally a good idea
-#     when L{Dot} is in the middle of a larger graph, because the types
-#     of gy will match that of y. This conversion might be inefficient if
-#     the gradients are graph outputs though, hence this mask.
-
-#     @todo: Simplify code by splitting into DotSS and DotSD.
-#     """
-#     def __init__(self, x, y, grad_preserves_dense=True):
-#         """
-#         Because of trickiness of implementing, we assume that the left argument x is SparseResult (not dense)
-#         """
-#         if x.dtype != y.dtype:
-#             raise NotImplementedError()
-
-#         assert _is_sparse_result(x)
-#         # These are the conversions performed by scipy.sparse.dot
-#         if x.format == "csc" or x.format == "coo":
-#             myformat = "csc"
-#         elif x.format == "csr":
-#             myformat = "csr"
-#         else:
-#             raise NotImplementedError()
-
-#         self.inputs = [x, y]    # Need to convert? e.g. assparse
-#         self.outputs = [SparseResult(x.dtype, myformat)]
-#         self.grad_preserves_dense = grad_preserves_dense
-#     def perform(self):
-#         """
-#         @todo: Verify that output is sufficiently sparse, and raise a warning if it is not
-#         @todo: Also determine that we are storing the output in the best storage format?
-#         """
-#         self.outputs[0].data = self.inputs[0].data.dot(self.inputs[1].data)
-#     def grad(self, (x, y), (gz,)):
-#         assert _is_sparse_result(gz)
-#         rval = [dot(gz, y.T), dot(x.T, gz)]
-#         assert _is_sparse_result(x)
-#         if _is_dense_result(y):
-#             if self.grad_preserves_dense:
-#                 rval[1] = dense_from_sparse(rval[1])
-#         return rval
-#     def __copy__(self):
-#         return self.__class__(self.inputs[0], self.inputs[1], self.grad_preserves_dense)
-#     def clone_with_new_inputs(self, *new_inputs):
-#         return self.__class__(new_inputs[0], new_inputs[1], self.grad_preserves_dense)
-# def dot(x, y, grad_preserves_dense=True):
-#     """
-#     @todo: Maybe the triple-transposition formulation (when x is dense)
-#     is slow. See if there is a direct way to do this.
-#     """
-#     if hasattr(x, 'getnnz'): x = assparse(x)
-#     if hasattr(y, 'getnnz'): y = assparse(y)
-
-#     x_is_sparse_result = _is_sparse_result(x)
-#     y_is_sparse_result = _is_sparse_result(y)
-#     if not x_is_sparse_result and not y_is_sparse_result:
-#         raise TypeError()
-#     if x_is_sparse_result:
-#         return Dot(x, y, grad_preserves_dense).outputs[0]
-#     else:
-#         assert y_is_sparse_result
-#         return transpose(Dot(y.T, x.T, grad_preserves_dense).outputs[0])
--- a/tensor.py
+++ b/tensor.py
@@ -334,8 +334,10 @@ class _tensor_py_operators:
    T = property(lambda self: transpose(self))

    #SLICING
-    def __getitem__(self, item): return subtensor(self, item)
-    def __getslice__(self, *args): return subtensor(self, slice(*args))
+    def __getitem__(self, args): return Subtensor.from_idxs(self,
+            args).outputs[0]
+    def __getslice__(self, *args): return Subtensor.from_idxs(self,
+            (slice(*args),)).outputs[0]

    #COPYING
    def copy(self): return tensor_copy(self)
@@ -356,15 +358,43 @@ s2t.TensorConstant = TensorConstant
 s2t.TensorValue = TensorValue


+#########################
+# Casting Operations
+#########################
+
+class TensorFromScalar(Op):
+    def make_node(self, s):
+        assert isinstance(s.type, scal.Scalar)
+        return Apply(self,
+                     [s],
+                     [tensor(dtype = s.type.dtype,
+                             broadcastable = ())])
+    def perform(self, node, (s, ), (out, )):
+        out[0] = numpy.asarray(s)
+    def grad(self, (s,), (dt,)):
+        raise NotImplementedError('todo: ScalarFromTensor')
+tensor_from_scalar = TensorFromScalar()

-############################
-# Supporting Ops
-############################

 ##########################
 # Unary Operations
 ##########################
-    
+
+class Shape(Op):
+    """
+    L{Op} to return the shape of a matrix.
+
+    @note: Non-differentiable.
+    """
+    def make_node(self, x):
+        x = as_tensor(x)
+        return Apply(self, [x], [ivector()])
+    def perform(self, node, (x, ), (out, )):
+        out[0] = numpy.asarray(x.shape)
+    def grad(self, (x,), (gz,)):
+        raise ValueError
+shape = Shape()
+
 class Argmax(Op):
    """Calculate the max and argmax over a given axis"""
    nin=2 # tensor, axis
@@ -470,50 +500,223 @@ transpose_inplace = TransposeInplace()
 def transpose(x, **kwargs):
    return transpose_inplace(tensor_copy(x), **kwargs)

-# class Subtensor(Op):
-#     nin = 2
-#     nout = 1
-#     e_invalid = 'invalid index'
-#     view_map = {0: [0]}
-#     def make_node(self, *inputs):
-#         def as_tuple_result(obj):
-#             if isinstance(obj, gof.Result):
-#                 return obj
-#             assert isinstance(obj, (list, tuple))
-#             r = gof.Constant(gof.generic, obj)
-#             return r
-# #         def pad(tplR, N):
-# #             l = list(tplR.data)
-# #             for i in range(len(l), N):
-# #                 l.append(slice(0,sys.maxint,1))
-# #             tplR.data = tuple(l)
-
-#         t, coord = args
-#         t = _as_tensor(t)
-#         coord = as_tuple_result(coord)
-#         if len(coord.data) > len(t.broadcastable):
-#             raise ValueError(Subtensor.e_invalid)
-#         # add the implicit extra unbounded slices 
-#         # e.g. n[0] on a 3d tensor pads to n[0,:,:]
-#         ###pad(coord, len(t.broadcastable))
-#         broadcastable = [False for c in coord.data if isinstance(c, slice)]
-#         self.inputs = [t, coord]
-#         self.outputs = [Tensor(t.dtype, broadcastable)]
-#     def view_map(self): 
-#         return {self.out: [self.inputs[0]]}
-#     def perform(self, node, (x, c), (out, )):
-#         if len(c) == 1:
-#             out[0] = x.__getitem__(c[0])
-#         else:
-#             out[0] = x.__getitem__(c)
-#     def grad(self, (x,), (gz,)):
-#         # - option: allocate a potentially large matrix of zeros, and fill in
-#         # the appropriate elements from gz
-#         # - option: return a sparse matrix
-#         # - option: return gz, but think about how to include a special addition
-#         # function that works on a corresponding view of the original data
-#         raise NotImplementedError() 
-# subtensor = Subtensor()
+class Subtensor_dx(Op, Viewer):
+    """Return a tensor full of zeros, except for what was sliced from x by
+    Subtensor.
+
+    @todo: pass the shape of x, rather than x itself.
+
+    @todo: add support for advanced tensor indexing (breaks current perform
+    implementation).
+    """
+    def __init__(self, inputs, idx_list, **kwargs):
+        Op.__init__(self, **kwargs) 
+        self.inputs = inputs
+        self.outputs = [Tensor(inputs[0].dtype, inputs[0].broadcastable)]
+        self.idx_list = idx_list
+
+    def perform(self):
+        x = self.inputs[0]
+        gz = self.inputs[-1]
+        cdata = []
+        for c in self.idx_list:
+            if isinstance(c, slice):
+                cdata.append(slice(
+                    None if c.start is None else self.inputs[c.start].data, 
+                    None if c.stop is None else self.inputs[c.stop].data, 
+                    None if c.step is None else self.inputs[c.step].data))
+            else:
+                d = self.inputs[c].data
+                assert 'int' in str(d.dtype)
+                cdata.append(d)
+        if len(cdata) > 1:
+            cdata = tuple(cdata) #there's a diff between tuple and list here...
+        else:
+            cdata = cdata[0]
+
+        #print cdata
+        #print gz.data
+        gx = numpy.zeros_like(x.data)
+        gx[cdata] = gz.data
+        #print gx
+
+        self.outputs[0].data = gx
+
+    def clone_with_new_inputs(self, *new_inputs):
+        assert len(self.inputs) == len(new_inputs)
+        return Subtensor_dx(new_inputs, self.idx_list)
+
+class Subtensor(Op, Viewer):
+    """Return a subtensor view
+
+    This class uses a relatively complex internal representation of the inputs
+    to remember how the input tensor x should be sliced.  The instance variable
+    idxlist is a list whose elements are either integers, or slices.  The
+    integers are indexes into the inputs array, and the start/stop/step members
+    of each slice are also integer indexes into the inputs array (or None).  The
+    inputs array is the tensor x, followed by scalar integer results.
+    
+    @todo: add support for advanced tensor indexing (in Subtensor_dx too).
+    """
+    e_invalid = 'invalid index'
+    debug = 0
+
+    @staticmethod
+    def from_idxs(x, idxs, **kwargs):
+        if Subtensor.debug:
+            print idxs, sys.maxint
+
+        def asidx(i):
+            if isinstance(i, int): return scal.constant(i)
+            if isinstance(i, scal.Scalar) and ('int' in i.dtype): return i
+            raise TypeError(Subtensor.e_invalid, i)
+
+        x = _as_tensor(x)
+        idx_list = [] # like args, but with int -> scalar.constant
+        inputs = [x] # like args, but with slices flattened
+        if not isinstance(idxs, (list, tuple)):
+            idxs = (idxs,)
+
+        for idx in idxs:
+            try:
+                ai = asidx(idx)
+                idx_list.append(len(inputs))
+                inputs.append(ai)
+            except TypeError:
+                if isinstance(idx, slice):
+                    start = None if idx.start is None else asidx(idx.start)
+                    stop  = None if idx.stop  is None else asidx(idx.stop)
+                    step  = None if idx.step  is None else asidx(idx.step)
+
+                    # If we get here, then everything got turned (successfully)
+                    # into a scal.Scalar (with integer dtype) or None
+                    if start:
+                        startpos = len(inputs)
+                        inputs.append(start)
+                    else:
+                        startpos = None
+
+                    if stop:
+                        stoppos = len(inputs)
+                        inputs.append(stop)
+                    else:
+                        stoppos = None
+
+                    if step:
+                        steppos = len(inputs)
+                        inputs.append(step)
+                    else:
+                        steppos = None
+                    idx_list.append(slice(startpos, stoppos, steppos))
+                else:
+                    raise
+
+        assert len(idxs) == len(idx_list)
+        return Subtensor( inputs, idx_list, **kwargs)
+
+    def __init__(self, inputs, idx_list, **kwargs):
+
+        if len(idx_list) > len(inputs[0].broadcastable):
+            raise ValueError(Subtensor.e_invalid, 
+                    (len(idx_list), len(inputs[0].broadcastable)))
+
+        #infer the broadcasting pattern
+        padded = list(idx_list) \
+            + [slice(0,sys.maxint,1)] * (len(inputs[0].broadcastable) - len(idx_list)) 
+        broadcastable = [False for p in padded if isinstance(p, slice)]
+
+        Op.__init__(self, **kwargs) 
+        self.inputs = inputs
+        self.outputs = [Tensor(self.inputs[0].dtype, broadcastable)]
+        self.idx_list = idx_list
+
+    def view_map(self): 
+        return {self.out: [self.inputs[0]]}
+
+    def perform(self):
+        x = self.inputs[0].data
+        cdata = []
+        for c in self.idx_list:
+            if isinstance(c, slice):
+                cdata.append(slice(
+                    None if c.start is None else self.inputs[c.start].data, 
+                    None if c.stop is None else self.inputs[c.stop].data, 
+                    None if c.step is None else self.inputs[c.step].data))
+            else:
+                d = self.inputs[c].data
+                assert 'int' in str(d.dtype)
+                cdata.append(d)
+        if len(cdata) > 1:
+            cdata = tuple(cdata) #there's a diff between tuple and list here...
+        else:
+            cdata = cdata[0]
+
+        self.outputs[0].data = x.__getitem__(cdata)
+        if Subtensor.debug:
+            print self.inputs[0].data, cdata, self.outputs[0].data
+
+    def grad(self, inputs, (gz,)):
+        return [Subtensor_dx(self.inputs + [gz], self.idx_list).outputs[0]]\
+                + [None] * (len(inputs)-1)
+
+    def clone_with_new_inputs(self, *new_inputs):
+        assert len(self.inputs) == len(new_inputs)
+        return Subtensor(new_inputs, self.idx_list)
+        
+
+class VerticalStack(Op):
+    """
+    Vertically stack two L{Tensor}s.
+    Stack two L{Tensor}s along the first axis (row wise). These
+    L{Tensor}s must have the same shape along all dimensions but the
+    first.
+
+    @attention: Because we use vstack as the implementation, if the
+    inputs have 1-dimension, the output will have 2-dimensions.
+    """
+    def make_node(self, x, y):
+        x = as_tensor(x)
+        y = as_tensor(y)
+        assert x.type.dtype == y.type.dtype
+        if x.type.broadcastable[1:] != y.type.broadcastable[1:]:
+            raise NotImplementedError
+        inputs = [x, y]
+        bcastable = (False, ) + x.type.broadcastable[1:]
+        outputs = [tensor(dtype = x.type.dtype,
+                          broadcastable = bcastable)]
+        return Apply(self, inputs, outputs)
+    def perform(self, node, (x, y), (out, )):
+        assert x.ndim == y.ndim
+        # Make sure every dimension (save the first) is the same
+        for i in range(x.ndim): assert i == 0 or x.shape[i] == y.shape[i]
+        out[0] = numpy.vstack([x, y])
+    def grad(self, (x, y), (gz,)):
+        """
+        @todo: Make VSplit (or this grad implementation) its own L{Op},
+        that way we can do more sanity-checking::
+            assert x.ndim == y.ndim
+            # Make sure every dimension (save the first) is the same
+            for i in range(x.data.ndim): assert i == 0 or x.data.shape[i] == y.shape[i]
+            etc...
+        """
+        xs = shape(x)
+        ys = shape(y)
+        return gz[:xs[0]], gz[xs[0]:]
+vertical_stack = VerticalStack()
+
+def horizontal_stack(x, y):
+    """
+    Horizontally stack two L{Tensor}s.
+    Stack two L{Tensor}s along the second axis (column wise). These
+    L{Tensor}s must have the same shape along all dimensions but the
+    second.
+
+    @note: Unlike VerticalStack, we assume that the L{Tensor}s have
+    two dimensions.
+    """
+    assert x.type.ndim == 2
+    assert y.type.ndim == 2
+    return transpose(vertical_stack(x.T, y.T))


 #########################