Speed up/generalize/GPU friendly the kron op.

The kron operation use existing basic function(available on the GPU). Reusing them is faster then the scipy version, as the scipy version end up coping the data twice. We copy it only once in the reshape. We now also support it when the inputs ndim != 2 as scipy.

Speed up/generalize/GPU friendly the kron op.
ce5974fb · Frederic · 46d97f35 · ce5974fb · ce5974fb
--- a/theano/sandbox/linalg/kron.py
+++ b/theano/sandbox/linalg/kron.py
-import theano
-from theano.gof import Op, Apply
 from theano import tensor

-try:
-    import scipy.linalg
-    imported_scipy = True
-except ImportError:
-    imported_scipy = False

-
-class Kron(Op):
-    """
-    Kronecker product of a and b.
-
-    Parameters:
-
-    a: array, shape (M, N)
-    b: array, shape (P, Q)
-
-    Returns:
-
-    A: array, shape (M*P, N*Q)
-
-    The result is the block matrix:
-    (notice that a[i,j]*b is itself a matrix of the same shape as b)
-
-    a[0,0]*b    a[0,1]*b  ... a[0,-1]*b
-    a[1,0]*b    a[1,1]*b  ... a[1,-1]*b
-    ...
-    a[-1,0]*b   a[-1,1]*b ... a[-1,-1]*b
-    """
-
-    def __eq__(self, other):
-        return type(self) == type(other)
-
-    def __hash__(self):
-        return hash(type(self))
-
-    def __str__(self):
-        return "%s" % self.__class__.__name__
-
-    def make_node(self, a, b):
-        assert imported_scipy, (
-            "Scipy not available. Scipy is needed for the Kron op")
-        a = tensor.as_tensor_variable(a)
-        b = tensor.as_tensor_variable(b)
-
-        if (not a.ndim == 2 or not b.ndim == 2):
-            raise TypeError('%s: inputs must have two dimensions' %
-                            self.__class__.__name__)
-
-        out_var = tensor.TensorType(dtype=theano.scalar.upcast(a, b),
-                                     broadcastable=(False, False))()
-        return Apply(self, [a, b], [out_var])
-
-    def infer_shape(self, node, in_shapes):
-        shape_a, shape_b = in_shapes
-        return [[shape_a[0] * shape_b[0], shape_a[1] * shape_b[1]]]
-
-    def perform(self, node, inputs, output_storage):
-        a, b = inputs
-        output_storage[0][0] = scipy.linalg.kron(a, b)
-
-    def grad(self, inputs, cost_grad):
-        raise NotImplementedError('%s: gradient is not currently'
-                ' implemented' % self.__class__.__name__)
-
-kron = Kron()
+def kron(a, b):
+    a = tensor.as_tensor_variable(a)
+    b = tensor.as_tensor_variable(b)
+    if (a.ndim + b.ndim <= 2):
+        raise TypeError('kron: inputs dimensions must sum to 3 or more. '
+                        'You passed %d and %d.' % (a.ndim, b.ndim))
+    o = tensor.outer(a, b)
+    o = o.reshape(tensor.concatenate((a.shape, b.shape)),
+                  a.ndim + b.ndim)
+    shf = o.dimshuffle(0, 2, 1, * range(3, o.ndim))
+    if shf.ndim == 3:
+        shf = o.dimshuffle(1, 0, 2)
+        o = shf.flatten()
+    else:
+        o = shf.reshape((o.shape[0] * o.shape[2],
+                         o.shape[1] * o.shape[3]) +
+                        tuple([o.shape[i] for i in range(4, o.ndim)]))
+    return o
--- a/theano/sandbox/linalg/tests/test_kron.py
+++ b/theano/sandbox/linalg/tests/test_kron.py
 from nose.plugins.skip import SkipTest
 import numpy

+import theano
 from theano import tensor, function
 from theano.tests import unittest_tools as utt
-from theano.sandbox.linalg.kron import Kron, kron
+from theano.sandbox.linalg.kron import kron
+
+floatX = theano.config.floatX

 try:
    import scipy.linalg
@@ -21,32 +24,23 @@ class TestKron(utt.InferShapeTester):

    def setUp(self):
        super(TestKron, self).setUp()
-        self.op_class = Kron
        self.op = kron

    def test_perform(self):
-        x = tensor.dmatrix()
-        y = tensor.dmatrix()
-        f = function([x, y], kron(x, y))
-
-        for shp0 in [(8, 6), (5, 8)]:
-            for shp1 in [(5, 7), (3, 3)]:
-                a = numpy.random.rand(*shp0)
-                b = numpy.random.rand(*shp1)
+        for shp0 in [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]:
+            for shp1 in [(6,), (6, 7), (6, 7, 8), (6, 7, 8, 9)]:
+                if len(shp0) + len(shp1) == 2:
+                    continue
+                x = tensor.tensor(dtype='floatX',
+                                  broadcastable=(False,) * len(shp0))
+                y = tensor.tensor(dtype='floatX',
+                                  broadcastable=(False,) * len(shp1))
+                f = function([x, y], kron(x, y))
+                a = numpy.asarray(self.rng.rand(*shp0)).astype(floatX)
+                b = self.rng.rand(*shp1).astype(floatX)
                out = f(a, b)
                assert numpy.allclose(out, scipy.linalg.kron(a, b))

-    def test_infer_shape(self):
-        x = tensor.dmatrix()
-        y = tensor.dmatrix()
-        self._compile_and_check([x, y], [self.op(x, y)],
-                                [numpy.random.rand(8, 5),
-                                 numpy.random.rand(3, 7)],
-                                self.op_class)
-        self._compile_and_check([x, y], [self.op(x, y)],
-                                [numpy.random.rand(2, 5),
-                                 numpy.random.rand(6, 3)],
-                                self.op_class)

 if __name__ == "__main__":
    t = TestKron('setUp')