* Moved Reshape, Flatten and Tile ops from hpu/conv/sp.py to tensor/basic.py

* Implemented TensorDot op (see numpy documentation for details) * Added respective test cases under tests/test_basic.py * Print node now accepts a list of things to print, which can be input node attributes or functions (eg. "__repr__"). By default, will print input.__str__() as before

* Moved Reshape, Flatten and Tile ops from hpu/conv/sp.py to tensor/basic.py
169a80b1 · desjagui@atchoum.iro.umontreal.ca · 19e4cfb9 · 169a80b1 · 169a80b1 · 169a80b1
--- a/theano/printing.py
+++ b/theano/printing.py
@@ -8,11 +8,19 @@ from gof import Op, Apply

 class Print(Op):
    """This identity-like Op has the side effect of printing a message followed by its inputs
-    when it runs.
+    when it runs. Default behaviour is to print the __str__ representation. Optionally, one 
+    can pass a list of the input member functions to execute, or attributes to print.
+    
+    @type message: String
+    @param message: string to preprend to the output
+    @type attrs: list of Strings
+    @param attrs: list of input node attributes or member functions to print. Functions are
+    identified through callable(), executed and their return value printed.
    """
    view_map={0:[0]}
-    def __init__(self,message=""):
+    def __init__(self,message="", attrs=("__str__",)):
        self.message=message
+        self.attrs=attrs

    def make_node(self,xin):
        xout = xin.type.make_result()
@@ -22,7 +30,9 @@ class Print(Op):
        xin, = inputs
        xout, = output_storage
        xout[0] = xin
-        print self.message,xin
+        for attr in self.attrs:
+            temp = getattr(xin, attr)
+            print self.message, attr,'=', temp() if callable(temp) else temp

    def grad(self,input,output_gradients):
        return output_gradients

--- a/theano/tensor/basic.py
+++ b/theano/tensor/basic.py
@@ -79,6 +79,7 @@ def as_tensor(x, name = None):
     - `TypeError`: raised if `x` cannot be converted to a Tensor Result

    """
+
    if isinstance(x, gof.Apply):
        #TODO: use Apply's default output mechanism
        if len(x.outputs) != 1:
@@ -1659,6 +1660,8 @@ def get_vector_length(v):
    cases.

    """
+    if not isinstance(v, gof.Result):
+        v = constant(v)
    if v.ndim != 1:
        raise TypeError('argument must be symbolic vector')
    if isinstance(v, gof.Constant) and v.type.ndim == 1:
@@ -1766,6 +1769,106 @@ class MakeVectorPrinter:
 pprint.assign(lambda pstate, r: r.owner and isinstance(r.owner.op, MakeVector), MakeVectorPrinter())


+class Reshape(Op):
+    """Perform a reshape operation of the input x to the new shape shp.
+    The number of dimensions to which to reshape to (ndim) must be known at graph 
+    build time."""
+    view_map = {0: [0]} #output 0 is potentially aliased to inputs [0]
+    def __init__(self, ndim):
+        self.ndim = ndim
+    def __eq__(self, other):
+        return (type(other) is Reshape) and (other.ndim == self.ndim)
+    def __hash__(self):
+        return hash(Reshape) ^ hash(self.ndim)
+    def make_node(self, x, shp):
+        x = as_tensor(x)
+        shp = as_tensor(shp)
+        return gof.Apply(self, [x, shp], [tensor(x.type.dtype, [False]*self.ndim)])
+    def perform(self, node, (x, shp), (out,)):
+        if (len(shp) != self.ndim):
+            raise ValueError('shape argument to Reshape.perform has incorrect length %i'
+                    ', should be %i' % (len(shp), self.ndim), shp)
+        try:
+            out[0] = numpy.reshape(x, shp)
+        except:
+            raise ValueError('Cannot reshape input of shape %s to shape %s' % (x.shape,shp))
+    def grad(self, (x, shp), (g_out,)):
+        return [reshape(g_out, shape(x), ndim=x.ndim), None]
+
+def reshape(x, newshape, ndim=None):
+    if not hasattr(reshape, 'op'):
+        reshape.op = {}
+    if ndim is None:
+        ndim = get_vector_length(newshape)
+    if ndim not in reshape.op:
+        reshape.op[ndim] = Reshape(ndim)
+    return reshape.op[ndim](x, newshape)
+
+
+class Flatten(Op):
+    """Flattens the input node"""
+    #Could be done as a reshape, but this is more direct.
+    #TODO: optimize reshape(x, prod(shape(x))) -> flatten(x)
+    def __init__(self, ldim=None):
+        self.ldim = ldim
+    def make_node(self, x):
+        x = as_tensor(x)
+        outdim = 1 if self.ldim is None else x.ndim - self.ldim +1
+        return gof.Apply(self, [x], [tensor(x.type.dtype, (False,)*outdim)])
+    def perform(self, node, (x,), (out,)):
+        # flatten the entire tensor or just the last ldim dimensions
+        out[0] = x.flatten() if self.ldim is None else\
+                 x.reshape(numpy.r_[x.shape[:-self.ldim],\
+                           numpy.prod(x.shape[-self.ldim:])])
+    def grad(self, (x,), (g_out,)):
+        return [reshape(g_out, shape(x), x.ndim)]
+
+def flatten(ldim=None): return Flatten(ldim)
+
+class TileGrad(Op):
+    """Calculates the gradient of the Tile Op"""
+    #this is so weird, I can't think of how to make this a general thing.
+    def make_node(self, x, reps, g_out):
+        return gof.Apply(self, [x, reps, g_out], [x.type()])
+    def perform(self, node, (x, reps, g_out), (gx,)):
+        xsh = x.shape
+        if len(reps)==2 and reps[1] == 1 and len(x.shape) == 1:
+            gx[0] = numpy.sum(g_out, axis=0)
+        else:
+            raise NotImplementedError('x.shape, reps combination not supported',
+                    (x.shape, reps))
+tilegrad = TileGrad()
+
+
+class Tile(Op):
+    """Tiles its input according to reps. Reps is of same dimension as x
+    and contains the number of times to tile x in each dimension"""
+    def __init__(self, ndim):
+        self.ndim = ndim
+    def __eq__(self, other):
+        return (type(other) is Tile) and (other.ndim == self.ndim)
+    def __hash__(self):
+        return hash(Tile) ^ hash(self.ndim)
+
+    def make_node(self, x, reps):
+        x = as_tensor(x)
+        reps = as_tensor(reps)
+        return gof.Apply(self, [x, reps], [tensor(x.type.dtype, [False,] * self.ndim)])
+    def perform(self, node, (x, reps), (out,)):
+        out[0] = numpy.tile(x, reps)
+        if len(out[0].shape) != self.ndim:
+            raise ValueError('Tile.perform produced incorrect shape')
+    def grad(self, (x, reps), (g_out,)):
+        return [tilegrad(x, reps, g_out), None]
+
+def tile(x, reps, ndim=None):
+    if not hasattr(tile, 'op'):
+        tile.op = {}
+    ndim = len(reps) if ndim is None else ndim #not sure if len(shp) is going to work.
+    if ndim not in tile.op:
+        tile.op[ndim] = Tile(ndim)
+    return tile.op[ndim](x, reps)
+


 #########################
@@ -1834,13 +1937,89 @@ class Dot(Op):
        if x.type.ndim == 1 and y.type.ndim > 1:
            return dot(gz, y.T), outer(x.T, gz)
        if x.type.ndim > 1 and y.type.ndim == 1:
-            return outer(gz, y.T), dot(x.T, gz)
+            return outer(gz, y.T), dot(x.T, gz) 
        return dot(gz, y.T), dot(x.T, gz)
    def __str__(self):
        return "dot"
 dot = Dot()
 pprint.assign(dot, printing.OperatorPrinter(printing.special['middle_dot'], -1, 'left'))

+#########################
+# Linalg : TensorDot
+#########################
+class TensorDotGrad(Op):
+    def __init__(self, axes):
+        self.axes = axes;
+
+    def make_node(self, x, y, gz):
+        assert isinstance(x, Result)
+        assert isinstance(y, Result)
+        assert isinstance(gz, Result)
+        gx = x.type()
+        gy = y.type()
+        return Apply(self, [x,y,gz], [gx, gy])
+
+    def perform(self, node, (x, y, gz), (gx,gy)):
+
+        sum_over_y = range(y.ndim)
+        [sum_over_y.remove(q) for q in self.axes[1]]
+        sum_over_x = range(x.ndim)
+        [sum_over_x.remove(q) for q in self.axes[0]]
+
+        _gx = numpy.tensordot(gz, y, [range(x.ndim-len(self.axes[0]),gz.ndim), sum_over_y])
+        idx = numpy.hstack((sum_over_x, self.axes[0]))
+        newshapex = numpy.zeros(x.ndim)
+        newshapex[[newpos for newpos in idx]] = [i for i in range(x.ndim)]
+        gx[0] = numpy.transpose(_gx, newshapex)
+        assert str(gx[0].dtype) == 'float64'
+
+        _gy = numpy.tensordot(x, gz, [sum_over_x, range(x.ndim-len(self.axes[0]))])
+        idy = numpy.hstack((self.axes[1], sum_over_y))
+        newshapey = numpy.zeros(y.ndim)
+        newshapey[[newpos for newpos in idy]] = [i for i in range(y.ndim)]
+        gy[0] = numpy.transpose(_gy, newshapey)
+        assert str(gy[0].dtype) == 'float64'
+
+tensordot_grad = TensorDotGrad
+
+class TensorDot(Op):
+    """Compute tensor-tensor products over the given axes. See numpy documentation for details.
+
+    """
+
+    def __init__(self, axes):
+        self.axes = axes;
+
+    def make_node(self, x, y):
+
+        axesdim = numpy.size(self.axes)/2
+        x, y = map(as_tensor, [x, y])
+
+        if axesdim > x.type.ndim or axesdim > y.type.ndim:
+            raise TypeError('Cannot sum over more dimensions than input. %i > %i,%i' %
+                    axesdim, x.type.ndim, y.type.ndim)
+       
+        outdim = x.type.ndim + y.type.ndim - 2*axesdim
+        output = tensor(dtype=x.dtype, broadcastable=[None]*outdim);
+        return Apply(self, inputs=[x,y], outputs=[output,])
+
+    def perform(self, node, (x, y), (z,)):
+        try:
+            z[0] = numpy.asarray(numpy.tensordot(x, y, self.axes))
+            assert str(z[0].dtype) == 'float64'
+        except ValueError, e:
+            # The error raised by numpy has no shape information, we mean to add that
+            e.args = e.args + (x.shape, y.shape, self.axes)
+            raise
+
+    def grad(self, (x, y), (gz,)):
+        gx, gy = tensordot_grad(self.axes)(x, y, gz)
+        return [gx, gy]
+    
+    def __str__(self):
+        return "tensordot"
+tensordot = TensorDot
+
 class Outer(Op):
    """ Compute vector-vector outer product
    """
@@ -1988,11 +2167,17 @@ class numeric_grad:
            errs.append(numpy.max(numeric_grad.abs_rel_err(a,b)))
        return numpy.max(errs)

+# TODO: remove testcase parameter as it is not used... and useless (forces you to
+# run your testcases from a unittest.TestCase class = not necessary)
 def verify_grad(testcase, op, pt, n_tests=1, rng=numpy.random, eps=1.0e-7, tol=0.0001,
        mode=compile.Mode(optimizer=None, linker='c&py')):
    """ WRITEME
    
    testcase.failUnless(analytic gradient matches finite-diff gradient)
+
+    :param pt: the list of numpy.ndarrays to use as inputs to the op
+    :param op: something that behaves like an Op instance.
+    :param testcase: the thing to call `fail` on if things go awry.
    
    """
    pt = [numpy.array(p) for p in pt]
@@ -2050,6 +2235,7 @@ def verify_grad(testcase, op, pt, n_tests=1, rng=numpy.random, eps=1.0e-7, tol=0
            #print 'analytic grad', analytic_grad
            #print 'numeric grad', num_grad.gf
            raise Exception(verify_grad.E_grad, (max_err, tol))
+
 verify_grad.E_grad = 'gradient error exceeded tolerance'
 """This error is raised when a gradient is calculated, but incorrect."""

--- a/theano/tensor/tests/test_basic.py
+++ b/theano/tensor/tests/test_basic.py
@@ -1582,6 +1582,132 @@ class T_op_cache(unittest.TestCase):
        a = numpy.random.rand(5,2)
        self.failUnless(numpy.all(fn_py(a) == fn_c_or_py(a)))

+
+def test_reshape():
+
+    a = dvector()
+    b = dmatrix()
+
+    c = reshape(a, [2,3])
+
+    #basic
+    f = function([a], c, mode='FAST_COMPILE')
+    assert numpy.all(f(numpy.asarray([0,1,2,3,4,5])) == numpy.asarray([[0,1,2], [3,4,5]]))
+
+    #test that it works without inplace operations
+    a_val = numpy.asarray([0,1,2,3,4,5])
+    a_val_copy = numpy.asarray([0,1,2,3,4,5])
+    b_val = numpy.asarray([[0,1,2],[3,4,5]])
+
+    f_sub = function([a,b], c-b, mode='FAST_COMPILE')
+    assert numpy.all(f_sub(a_val, b_val) == 0.0)
+    assert numpy.all(a_val == a_val_copy)
+
+    #test that it works with inplace operations
+    a_val = numpy.asarray([0,1,2,3,4,5], dtype='float64')
+    a_val_copy = numpy.asarray([0,1,2,3,4,5], dtype='float64')
+    b_val = numpy.asarray([[0,1,2],[3,4,5]], dtype='float64')
+
+    f_sub = function([a,b], c-b, mode=compile.Mode(optimizer='fast_run', linker='c|py'))
+    assert numpy.all(f_sub(a_val, b_val) == 0.0)
+    assert numpy.all(a_val == a_val_copy)
+
+    # verify gradient
+    tensor.verify_grad(None, Reshape(2), [a_val,numpy.asarray([2,3], dtype='float64')])
+
+
+def test_flatten():
+    """ Flatten always returns a copy of the array. There is no danger with in-place
+    operations and thus no need to test it."""
+
+    a = dmatrix()
+    c = flatten(a)
+    f = function([a], c, mode='FAST_COMPILE')
+    a_val = numpy.asarray([[0,1,2],[3,4,5]], dtype='float64')
+    c_val = numpy.asarray([0,1,2,3,4,5], dtype='float64')
+    assert numpy.all(f(a_val)==c_val)
+    f = function([a], c, mode='FAST_RUN')
+    assert numpy.all(f(a_val)==c_val)
+
+    tensor.verify_grad(None, Flatten(), [a_val])
+
+
+# TODO: write test case for Tile Op
+def test_tile():
+    pass 
+
+
+class test_tensordot(unittest.TestCase):
+
+    def test0(self):
+
+        for mod in 'FAST_COMPILE', 'FAST_RUN', default_mode:
+
+            # test vector-vector
+            avec = dvector()
+            bvec = dvector()
+            axes = ((0,),(0,))
+            c = tensordot(axes)(avec, bvec)
+            f1 = function([avec,bvec],c, mode=mod)
+            aval = numpy.random.rand(5);
+            bval = numpy.random.rand(5);
+            self.failUnless(numpy.tensordot(aval,bval,axes) == \
+                            f1(aval,bval))
+            tensor.verify_grad(None, TensorDot(axes), [aval,bval])
+
+            # test matrix-vector
+            bmat = dmatrix()
+            axes = ((0,),(1,))
+            c = tensordot(axes)(avec, bmat)
+            f2 = function([avec,bmat],c, mode=mod)
+            aval = numpy.random.rand(5);
+            bval = numpy.random.rand(8,5);
+            self.failUnless(numpy.all(numpy.tensordot(aval,bval,axes) == \
+                                      f2(aval,bval)))
+            tensor.verify_grad(None, TensorDot(axes), [aval,bval])
+
+            # test matrix-matrix
+            amat = dmatrix()
+            axes = ((1,),(0,))
+            c = tensordot(axes)(amat, bmat)
+            f3 = function([amat,bmat],c, mode=mod)
+            aval = numpy.random.rand(4,7);
+            bval = numpy.random.rand(7,9);
+            self.failUnless(numpy.all(numpy.tensordot(aval,bval,axes) == \
+                                      f3(aval,bval)))
+            tensor.verify_grad(None, TensorDot(axes), [aval,bval])
+
+            # test ndarray-matrix, sum over one dim of matrix
+            atens = Tensor('float64', broadcastable=(False,)*4)()
+            axes = ((2,),(1,))
+            c = tensordot(axes)(atens, bmat)
+            f4 = function([atens,bmat],c, mode=mod)
+            aval = numpy.random.rand(1,2,3,4);
+            bval = numpy.random.rand(2,3);
+            self.failUnless(numpy.all(numpy.tensordot(aval,bval,axes) == \
+                                      f4(aval,bval)))
+            tensor.verify_grad(None, TensorDot(axes), [aval,bval])
+
+            # test ndarray-ndarray
+            atens = Tensor('float64', broadcastable=(False,)*4)()
+            btens = Tensor('float64', broadcastable=(False,)*3)()
+            axes = ((1,3),(0,2))
+            c = tensordot(axes)(atens, btens)
+            f5 = function([atens,btens],c, mode=mod)
+            aval = numpy.random.rand(4,3,5,2);
+            bval = numpy.random.rand(3,4,2);
+            self.failUnless(numpy.all(numpy.tensordot(aval,bval,axes) == \
+                                      f5(aval,bval)))
+            tensor.verify_grad(None, TensorDot(axes), [aval,bval])
+            
+            axes = (axes[1],axes[0])
+            c = tensordot(axes)(btens, atens)
+            f6 = function([btens,atens],c, mode=mod)
+            self.failUnless(numpy.all(numpy.tensordot(bval,aval,axes) == \
+                                      f6(bval,aval)))
+            tensor.verify_grad(None, TensorDot(axes), [bval,aval])
+
+
 if __name__ == '__main__':
    if len(sys.argv) >= 2 and sys.argv[1] == 'OPT':
        default_mode = compile.Mode(linker = 'c&py',