doc, small fixes to tests, defined more gradients

1ddd6c38 · Olivier Breuleux · 2d60d3e3 · 1ddd6c38 · 1ddd6c38 · 1ddd6c38
--- a/_test_elemwise.py
+++ b/_test_elemwise.py
@@ -7,6 +7,7 @@ import gof
 from scalar import *
+import tensor
 from elemwise import *
@@ -40,13 +41,6 @@ class _test_DimShuffle(unittest.TestCase):
        self.with_linker(gof.PerformLinker)
-#     def test_straightforward(self):
-#         x, y, z = inputs()
-#         e0 = DimShuffle(x, [1, 'x', 0]).out
-#         f = gof.PerformLinker(env([x], [e0])).make_function(inplace=True)
-#         assert f(numpy.ones((2, 3))).shape == (3, 1, 2)
 class _test_Broadcast(unittest.TestCase):
    def with_linker(self, linker):
@@ -164,27 +158,42 @@ class _test_CAReduce(unittest.TestCase):
 if __name__ == '__main__':
    unittest.main()
-#     x = modes.build(Tensor('int32', [0, 0], name = 'x'))
-#     y = modes.build(Tensor('int32', [0, 0], name = 'y'))
+# #     x = modes.build(Tensor('int32', [0, 0], name = 'x'))
-# #     x = modes.build(Tensor('float64', [0, 0], name = 'x'))
+# #     y = modes.build(Tensor('int32', [0, 0], name = 'y'))
-# #     y = modes.build(Tensor('float64', [0, 0], name = 'y'))
+#     from scalar import Scalar, composite
-#     e = Broadcast(Pow, (x, y)).out
+#     x = modes.build(Tensor('float64', [0, 0], name = 'x'))
+#     y = modes.build(Tensor('float64', [0, 0], name = 'y'))
+#     xs, ys = Scalar('float64'), Scalar('float64')
+#     e = Broadcast(composite([xs, ys], [(xs * ys) + (xs / ys) * 7.0]), (x, y)).out
 #     f = gof.CLinker(env([x, y], [e])).make_function(inplace = False)
-# #     xv = numpy.random.rand(1000, 1000)
+#     size = 2000
-# #     yv = numpy.random.rand(1000, 1000)
+#     xv = numpy.random.rand(size, size)
-# #     zv = numpy.random.rand(1000, 1000)
+#     yv = numpy.random.rand(size, size)
-#     xv = numpy.random.randint(1, 5, (1000, 1000))
+#     zv = numpy.random.rand(size, size)
-#     yv = numpy.random.randint(1, 5, (1000, 1000))
+# #     xv = numpy.random.randint(1, 5, (1000, 1000))
-#     add = numpy.frompyfunc(lambda x, y: x + y, 2, 1)
+# #     yv = numpy.random.randint(1, 5, (1000, 1000))
 # #     t0 = time.time()
 # #     for i in xrange(100):
 # #         xv / yv
 # #     print time.time() - t0
+# #     t0 = time.time()
+# #     for i in xrange(10):
+# #         f(xv, yv)
+# #     print time.time() - t0
+# #     t0 = time.time()
+# #     for i in xrange(10):
+# #         (xv * yv) + (xv / yv) * 7.0
+# #     print time.time() - t0
+#     from scipy import weave
+#     import numpy
 #     t0 = time.time()
-#     for i in xrange(100):
+#     for i in xrange(10):
-#         f(xv, yv)
+#         weave.blitz("zv = dot(xv, yv)", locals())
 #     print time.time() - t0
    # speed ratios:

--- a/_test_tensor.py
+++ b/_test_tensor.py
@@ -281,7 +281,7 @@ PowTester = make_broadcast_tester(op_class = Pow,
                                              row = (rand_ranged(1, 5, (2, 3)), rand_ranged(-3, 3, (1, 3))),
                                              column = (rand_ranged(1, 5, (2, 3)), rand_ranged(-3, 3, (2, 1))))
                                  )
-PowTester = make_broadcast_tester(op_class = PowInplace,
+PowInplaceTester = make_broadcast_tester(op_class = PowInplace,
                                         expected = lambda x, y: x ** y,
                                         good = dict(same_shapes = (rand_ranged(1, 5, (2, 3)), rand_ranged(-3, 3, (2, 3))),
                                                     scalar = (rand_ranged(1, 5, (2, 3)), rand_ranged(-3, 3, (1, 1))),
@@ -417,28 +417,36 @@ CosInplaceTester = make_broadcast_tester(op_class = CosInplace,
 TanTester = make_broadcast_tester(op_class = Tan,
                                  expected = numpy.tan,
                                  good = dict(normal = (rand_ranged(-3.14, 3.14, (2, 3)),),
+                                              shifted = (rand_ranged(3.15, 6.28, (2, 3)),)),
+                                  grad = dict(normal = (rand_ranged(-3.14, 3.14, (2, 3)),),
                                              shifted = (rand_ranged(3.15, 6.28, (2, 3)),)))
-TanInplaceTester = make_broadcast_tester(op_class = CosInplace,
+TanInplaceTester = make_broadcast_tester(op_class = TanInplace,
-                                         expected = numpy.cos,
+                                         expected = numpy.tan,
                                         good = dict(normal = (rand_ranged(-3.14, 3.14, (2, 3)),),
                                                     shifted = (rand_ranged(3.15, 6.28, (2, 3)),)),
+                                         grad = dict(normal = (rand_ranged(-3.14, 3.14, (2, 3)),),
+                                                     shifted = (rand_ranged(3.15, 6.28, (2, 3)),)),
                                         inplace = True)
 CoshTester = make_broadcast_tester(op_class = Cosh,
                                   expected = numpy.cosh,
-                                   good = _good_broadcast_unary_normal)
+                                   good = _good_broadcast_unary_normal,
+                                   grad = _grad_broadcast_unary_normal)
 CoshInplaceTester = make_broadcast_tester(op_class = CoshInplace,
                                          expected = numpy.cosh,
                                          good = _good_broadcast_unary_normal,
+                                          grad = _grad_broadcast_unary_normal,
                                          inplace = True)
 SinhTester = make_broadcast_tester(op_class = Sinh,
                                   expected = numpy.sinh,
-                                   good = _good_broadcast_unary_normal)
+                                   good = _good_broadcast_unary_normal,
+                                   grad = _grad_broadcast_unary_normal)
 SinhInplaceTester = make_broadcast_tester(op_class = SinhInplace,
                                          expected = numpy.sinh,
                                          good = _good_broadcast_unary_normal,
+                                          grad = _grad_broadcast_unary_normal,
                                          inplace = True)
 TanhTester = make_broadcast_tester(op_class = Tanh,

--- a/elemwise.py
+++ b/elemwise.py
@@ -3,15 +3,18 @@ import elemwise_cgen as cgen
 import numpy
 from gof import Op, Viewer, Destroyer
-from base_tensor import BaseTensor as Tensor
+#from base_tensor import BaseTensor as Tensor
 import scalar
 from scalar import upcast, Scalar
 import gof
 from gof.python25 import all
 def astensor(data):
-    assert isinstance(data, Tensor)
+    raise Exception("Circular dependencies prevent using this here. import tensor before elemwise")
-    return data
+def Tensor(*inputs, **kwargs):
+    raise Exception("Circular dependencies prevent using this here. import tensor before elemwise")
 ##################
@@ -20,8 +23,35 @@ def astensor(data):
 class DimShuffle(Op, Viewer):
    """
-    @todo: DOCUMENTATION? --jpt
+    Usage: DimShuffle(input, new_order, inplace = True)
+    * input: a Tensor instance
+    * new_order: a list representing the relationship between the
+                 input's dimensions and the output's dimensions. Each
+                 element of the list can either be an index or 'x'.
+    * inplace: if True, the output will be a view of the input.
+               If False, the output will be a copy of the input.
+    If j = new_order[i] is an index, the output's ith dimension
+      will be the input's jth dimension.
+    If new_order[i] is 'x', the output's ith dimension will
+      be 1 and Broadcast operations will be allowed to do broadcasting
+      over that dimension.
+    If input.broadcastable[i] == False then i must be found in new_order.
+    Broadcastable dimensions, on the other hand, can be discarded.
+    Examples:
+      # t<n> represents a n-d tensor
+      DimShuffle(t2, [0, 1]) -> identity
+      DimShuffle(t2, [1, 0]) -> inverts the first and second dimensions
+      DimShuffle(t1, ['x', 0]) -> make a row out of a 1d vector
+      DimShuffle(t1, [0, 'x']) -> make a column out of a 1d vector
+      DimShuffle(t3, [2, 0, 1]) -> like doing t3.transpose((2, 0, 1)) in numpy
+      DimShuffle(t2, [0, 'x', 1]) -> like doing t3.reshape((t3.shape[0], 1, t3.shape[1])) in numpy
+      DimShuffle(t2, [1, 'x', 0]) -> like doing t3.T.reshape((t3.shape[0], 1, t3.shape[1])) in numpy
    """
    def __init__(self, input, new_order, inplace = True):
        input = astensor(input)
@@ -44,21 +74,26 @@ class DimShuffle(Op, Viewer):
        self.inplace = inplace
+        # list of dimensions of the input to drop
        self.drop = []
-        self.augment = []
+        i2j = {} # this maps i before dropping dimensions to j after dropping dimensions so self.shuffle can be set properly later on
-        i2j = {}
        j = 0
        for i, b in enumerate(ib):
            if i not in new_order:
+                # we want to drop this dimension because it's not a value in new_order
                if b == 1:
                    self.drop.append(i)
                else:
+                    # we cannot drop non-broadcastable dimensions
                    raise NotImplementedError("You cannot drop a non-broadcastable dimension.")
            else:
                i2j[i] = j
                j += 1
+        # transposition of non-broadcastable dimensions
        self.shuffle = [i2j[x] for x in new_order if x != 'x']
+        # list of dimensions of the output that are broadcastable and were not in the original input
        self.augment = [i for i, x in enumerate(new_order) if x == 'x']
    def clone_with_new_inputs(self, *new_inputs):
@@ -77,19 +112,23 @@ class DimShuffle(Op, Viewer):
        return "DimShuffle{%s}" % "".join(str(x) for x in self.new_order)
    def perform(self):
+        # drop
        res = self.inputs[0].data
        shape = list(res.shape)
        for drop in reversed(self.drop):
            shape.pop(drop)
        res = res.reshape(shape)
+        # transpose
        res = res.transpose(self.shuffle)
+        # augment
        shape = list(res.shape)
        for augm in self.augment:
            shape.insert(augm, 1)
        res = res.reshape(shape)
+        # copy (if not inplace)
        if not self.inplace:
            res = numpy.copy(res)
@@ -106,18 +145,6 @@ class DimShuffle(Op, Viewer):
        return "%s(%s, %s)" % (self.__class__.__name__, str(self.inputs[0]), self.new_order)
-class Transpose(DimShuffle):
-    def __init__(self, input):
-        DimShuffle.__init__(self, input, range(len(input.broadcastable)-1, -1, -1), False)
-    def clone_with_new_inputs(self, *new_inputs):
-        return Transpose(new_inputs[0])
-    def __str__(self):
-        return "%s(%s)" % (self.__class__.__name__, str(self.inputs[0]))
 #################
 ### Broadcast ###
@@ -125,7 +152,36 @@ class Transpose(DimShuffle):
 class Broadcast(Op, Destroyer):
    """
-    @todo: DOCUMENTATION? --jpt
+    Generalizes a scalar op to tensors.
+    Usage: Broadcast(scalar_opclass, inputs, inplace_pattern = {})
+    * scalar_opclass: a class that extends scalar.ScalarOp, works uniquely on
+                      scalars and can be instantiated from the list of its inputs
+    * inputs: a list of Tensor instances
+    * inplace_pattern: a dictionary that maps the index of an output to the
+                       index of an input so the output is calculated inplace using
+                       the input's storage.
+    All the inputs must have the same number of dimensions. When the
+    Op is performed, for each dimension, each input's size for that
+    dimension must be the same. As a special case, it can also be 1
+    but only if the input's broadcastable flag is True for that
+    dimension. In that case, the tensor is (virtually) replicated
+    along that dimension to match the size of the others.
+    The dtypes of the outputs mirror those of the scalar Op that is
+    being generalized to tensors. However, if the calculations for an
+    output are done inplace on an input, it will keep the same dtype
+    as the input (in a nutshell, int + float -> float but int += float -> int)
+    Examples:
+      Broadcast(Add, rand(10, 5), rand(10, 5), {0 : 0}) # this does input0 += input1
+      Broadcast(Add, rand(10, 5), rand(10, 5), {0 : 1}) # this does input1 += input0
+      Broadcast(Mul, rand(10, 5), rand(1, 5)) # the second input is completed along the first dimension to match the first input
+      Broadcast(Div, rand(10, 5), rand(10, 1)) # same but along the second dimension
+      Broadcast(Div, rand(1, 5), rand(10, 1)) # the output has size (10, 5)
+      Broadcast(Log, rand(3, 4, 5))
    """
    def __init__(self, scalar_opclass, inputs, inplace_pattern = {}):
@@ -137,6 +193,7 @@ class Broadcast(Op, Destroyer):
        except (AssertionError, AttributeError):
            raise TypeError("All inputs to a Broadcast subclass must be Tensor instances and their broadcastable fields must all have the same length.", self.__class__)
+        # self.shadow is an instance of scalar_opclass used to get values for all the properties we need (dtypes, gradient, etc.)
        self.shadow = scalar_opclass(*[Scalar(dtype = t.dtype) for t in inputs])
        self.nin = self.shadow.nin
@@ -151,6 +208,9 @@ class Broadcast(Op, Destroyer):
        out_dtypes = [t.dtype for t in self.shadow.outputs]
        def get_dtype(i):
+            # If an operation is done inplace, the dtype of the output
+            # will be the same as the dtype of the input it overwrites
+            # eg int + float -> float, but int += float -> int
            input_idx = inplace_pattern.get(i, None)
            if input_idx is not None:
                return inputs[input_idx].dtype
@@ -185,14 +245,18 @@ class Broadcast(Op, Destroyer):
        shadow = self.shadow
        scalar_ograds = [Scalar(dtype = ograd.dtype) for ograd in ograds]
        scalar_igrads = shadow.grad(shadow.inputs, scalar_ograds)
+        nd = len(inputs[0].broadcastable) # this is the same for everyone
        def transform(r):
+            # From a graph of ScalarOps, make a graph of Broadcast ops.
            if r in shadow.inputs:
                return inputs[shadow.inputs.index(r)]
            if r in scalar_ograds:
                return ograds[scalar_ograds.index(r)]
            op = r.owner
            if op is None:
-                b = [1] * len(inputs[0].broadcastable)
+                # the gradient contains a constant, translate it as
+                # an equivalent Tensor of size 1 and proper number of dimensions
+                b = [1] * nd
                res = astensor(numpy.asarray(r.data).reshape(b),
                               broadcastable = b)
                return res
@@ -202,10 +266,16 @@ class Broadcast(Op, Destroyer):
        ret = []
        for scalar_igrad, input in zip(scalar_igrads, inputs):
            if scalar_igrad is None:
+                # undefined gradient
                ret.append(None)
                continue
            r = transform(scalar_igrad)
+            # list of all the dimensions that are broadcastable for that input so we
+            # can sum over them
+            # todo: only count dimensions that were effectively broadcasted
            to_sum = [i for i, bcast in enumerate(input.broadcastable) if bcast]
            if to_sum:
                shuffle = []
                j = 0
@@ -229,6 +299,7 @@ class Broadcast(Op, Destroyer):
                odat = output.data
                shape = [max(values) for values in zip(*[input.data.shape for input in self.inputs])]
                if odat is not None:
+                    # reuse storage if we can
                    odat.resize(shape, refcheck = 0)
                else:
                    odat = numpy.ndarray(shape, dtype = output.dtype)
@@ -325,7 +396,34 @@ class Broadcast(Op, Destroyer):
 def make_broadcast(scalar_opclass, inplace_pattern = {}, name = None):
+    if name is None:
+        name = "Tensor" + scalar_opclass.__name__
+    scalar_name = scalar_opclass.__name__
+    previous_doc = Broadcast.__doc__
+    scalar_doc = scalar_opclass.__doc__
+    if scalar_doc:
+        scalar_doc = """
+    %(scalar_name)s documentation:
+        %(scalar_doc)s
+        """ % locals()
+    doc = """
+    Usage: %(name)s(*inputs)
+    Equivalent to: Broadcast(%(scalar_name)s, inputs, %(inplace_pattern)s)
+    Performs Scalar %(scalar_name)s on each element of the
+    input tensors.
+    %(scalar_doc)s
+    Documention for Broadcast:
+    ==================================================
+    %(previous_doc)s
+    ==================================================
+    """ % locals()
    class New(Broadcast):
+        __doc__ = doc
        def __init__(self, *inputs):
            Broadcast.__init__(self, scalar_opclass, inputs, inplace_pattern)
        def clone_with_new_inputs(self, *new_inputs):
@@ -333,10 +431,7 @@ def make_broadcast(scalar_opclass, inplace_pattern = {}, name = None):
        @classmethod
        def desc(cls):
            return (Broadcast, scalar_opclass, tuple(inplace_pattern.items()))
-    if name is not None:
    New.__name__ = name
-    else:
-        New.__name__ = "Tensor" + scalar_opclass.__name__
    return New
 def wrap_broadcast(op):
@@ -353,6 +448,8 @@ def wrap_broadcast(op):
            else:
                args.append(DimShuffle(input, ['x']*difference + range(length)).out)
        return op(*args)
+    instantiate.__name__ = "instantiate{%s}" % op.__name__
+    instantiate.__doc__ = op.__doc__
    return instantiate
@@ -363,26 +460,26 @@ def wrap_broadcast(op):
 class CAReduce(Op):
    """
-    CAReduce(scalar_op, inputs, dimensions_to_reduce = None, init = None, shortcut = False)
+    Usage: CAReduce(scalar_opclass, inputs, dimensions_to_reduce = None)
-    The number of inputs must be the difference between the number of
+    * scalar_opclass: a binary scalar op with only one output.
-    outputs of scalar_op and its number of inputs. L{CAReduce} holds
+                      It will be instantiated as such:
-    scalar states, the accumulators, in proportion to the number of
+                      scalar_opclass.__init__([Scalar(t.dtype) for t in inputs])
-    outputs of scalar_op and it updates them iteratively::
+                      It must be commutative and associative.
+    * inputs: list of Tensor instances
-      for x, y, ... in input0, input1, ...
+    * dimensions_to_reduce: list of dimensions that we want to reduce
-        scalar_state <- scalar_op(scalar_state, x, y, ...)}
+                            if None, all dimensions are reduced
-    The initial states are init if provided (they must be scalars),
+    The output will have the same shape as the input minus the reduced
-    else if there are as many states as inputs, a sample from each
+    dimensions. It will contain the result of accumulating all values
-    input will be taken as initialization, else an error will be
+    over the reduced dimensions using the specified scalar op.
-    raised.
+    Examples:
-    If shortcut is True and the scalar op has a 'tbd' field, the
+     CAReduce(Add, inputs) -> sum(inputs)
-    iteration will try to stop as soon as it encounters the value
+     CAReduce(Mul, inputs) -> product(inputs)
-    specified for that field and will return it immediately, eg
+     CAReduce(Or, inputs) -> any(inputs) # not lazy
-    multiply/and will return 0 at first sight of 0 and 'or' will
+     CAReduce(And, inputs) -> all(inputs) # not lazy
-    return 1 at first sight of 1.
+     CAReduce(Xor, inputs) -> sum(inputs != 0) % 2
    In order to optimize memory usage patterns, L{CAReduce} makes zero
    guarantees on the order in which it iterates over the dimensions
@@ -520,13 +617,31 @@ class CAReduce(Op):
 def make_reduce(scalar_opclass, name = None):
-    if getattr(scalar_opclass, 'commutative', True) \
+    if getattr(scalar_opclass, 'commutative', False) \
-            and getattr(scalar_opclass, 'associative', True):
+            and getattr(scalar_opclass, 'associative', False):
        reducer = CAReduce
    else:
        raise NotImplementedError("The scalar op class to reduce must be commutative and associative.")
+    scalar_name = scalar_opclass.__name__
+    if name is None:
+        name = "Reduce" + scalar_name
+    previous_doc = reducer.__doc__
+    doc = """
+    Usage: %(name)s(input, axis)
+    Equivalent to: CAReduce(%(scalar_name)s, input, axis)
+    Reduces the input over the specified axis.
+    Documention for CAReduce:
+    ==================================================
+    %(previous_doc)s
+    ==================================================
+    """ % locals()
    class New(reducer):
+        __doc__ = doc
        def __init__(self, *inputs, **kwargs):
            reducer.__init__(self, scalar_opclass, inputs, kwargs.get('axis', None))
        def clone_with_new_inputs(self, *new_inputs):
@@ -540,13 +655,12 @@ def make_reduce(scalar_opclass, name = None):
                return "%s(%s, axis = %s)" % (self.__class__.__name__,
                                              str(input),
                                              self.dimensions_to_reduce)
-    if name is not None:
    New.__name__ = name
-    else:
-        New.__name__ = "Reduce" + scalar_opclass.__name__
    return New
-class Sum(make_reduce(scalar.Add)):
+_Sum = make_reduce(scalar.Add, '_Sum')
+class Sum(_Sum):
+    __doc__ = _Sum.__doc__
    def grad(self, (x, ), (gz, )):
        if self.dimensions_to_reduce == ():
            return gz,

--- a/gof/op.py
+++ b/gof/op.py
@@ -16,7 +16,7 @@ __all__ = ['Op',
           ]
-def constructor(op_cls):
+def constructor(op_cls, name = None):
    """Make an Op look like a L{Result}-valued function."""
    def f(*args, **kwargs):
        op = op_cls(*args, **kwargs)
@@ -24,6 +24,17 @@ def constructor(op_cls):
            return op.outputs
        else:
            return op.outputs[0]
+    opname = op_cls.__name__
+    if name is None:
+        name = "constructor{%s}" % opname
+    f.__name__ = name
+    doc = op_cls.__doc__
+    f.__doc__ = """
+    Constructor for %(opname)s:
+    %(doc)s
+    """ % locals()
    return f
 class Op(object):

--- a/scalar.py
+++ b/scalar.py
@@ -246,6 +246,8 @@ class FloatUnaryScalarOp(UnaryScalarOp):
 class Add(ScalarOp):
    identity = 0
+    commutative = True
+    associative = True
    def impl(self, *inputs):
        return sum(inputs)
    def c_code(self, inputs, (z, ), sub):
@@ -258,6 +260,8 @@ class Add(ScalarOp):
 class Mul(ScalarOp):
    identity = 1
+    commutative = True
+    associative = True
    def impl(self, *inputs):
        return numpy.product(inputs)
    def c_code(self, inputs, (z, ), sub):
@@ -424,27 +428,37 @@ class Tan(FloatUnaryScalarOp):
    def impl(self, x):
        return math.tan(x)
    def grad(self, (x, ), (gz, )):
-        raise NotImplementedError()
+        return gz / (cos(x) ** 2),
    def c_code(self, (x, ), (z, ), sub):
        return "%(z)s = tan(%(x)s);" % locals()
 class Cosh(FloatUnaryScalarOp):
+    """
+    sinh(x) = (exp(x) + exp(-x)) / 2
+    """
    def impl(self, x):
        return math.cosh(x)
    def grad(self, (x, ), (gz, )):
-        raise NotImplementedError()
+        return gz * sinh(x),
    def c_code(self, (x, ), (z, ), sub):
        return "%(z)s = cosh(%(x)s);" % locals()
 class Sinh(FloatUnaryScalarOp):
+    """
+    sinh(x) = (exp(x) - exp(-x)) / 2
+    """
    def impl(self, x):
        return math.sinh(x)
    def grad(self, (x, ), (gz, )):
-        raise NotImplementedError()
+        return gz * cosh(x),
    def c_code(self, (x, ), (z, ), sub):
        return "%(z)s = sinh(%(x)s);" % locals()
 class Tanh(FloatUnaryScalarOp):
+    """
+    tanh(x) = sinh(x) / cosh(x)
+            = (exp(2*x) - 1) / (exp(2*x) + 1)
+    """
    def impl(self, x):
        return math.tanh(x)
    def grad(self, (x, ), (gz, )):

--- a/tensor.py
+++ b/tensor.py
@@ -218,15 +218,14 @@ TensorCopy, tensor_copy = broadcast(scal.Identity, 'TensorCopy', False)
 # View Operations
 ##########################
-class TransposeInplace(_Op, Viewer):
+class TransposeInplace(s2t.DimShuffle):
-    def view_map(self):
-        return {self.out: [self.inputs[0]]}
+    def __init__(self, input):
-    def propagate_broadcastable(self, x):
+        s2t.DimShuffle.__init__(self, input, range(len(input.broadcastable)-1, -1, -1), True)
-        rval = list(x)
-        rval.reverse()
+    def perform(self):
-        return [rval]
+        self.outputs[0].data = self.inputs[0].data.T
-    def impl(self, x):
-        return x.T #numpy's transpose
    def grad(self, (x,), (gz,)):
        return transpose(gz),
@@ -238,6 +237,7 @@ class TransposeInplace(_Op, Viewer):
        }
        %(z)s = transposed;
        """ % locals()
 transpose_inplace = gof.op.constructor(TransposeInplace)
 def transpose(x, **kwargs):
    return transpose_inplace(tensor_copy(x), **kwargs)