提交 ce5974fb authored 作者: Frederic's avatar Frederic

Speed up/generalize/GPU friendly the kron op.

The kron operation use existing basic function(available on the GPU). Reusing them is faster then the scipy version, as the scipy version end up coping the data twice. We copy it only once in the reshape. We now also support it when the inputs ndim != 2 as scipy.
上级 46d97f35
import theano
from theano.gof import Op, Apply
from theano import tensor
try:
import scipy.linalg
imported_scipy = True
except ImportError:
imported_scipy = False
class Kron(Op):
"""
Kronecker product of a and b.
Parameters:
a: array, shape (M, N)
b: array, shape (P, Q)
Returns:
A: array, shape (M*P, N*Q)
The result is the block matrix:
(notice that a[i,j]*b is itself a matrix of the same shape as b)
a[0,0]*b a[0,1]*b ... a[0,-1]*b
a[1,0]*b a[1,1]*b ... a[1,-1]*b
...
a[-1,0]*b a[-1,1]*b ... a[-1,-1]*b
"""
def __eq__(self, other):
return type(self) == type(other)
def __hash__(self):
return hash(type(self))
def __str__(self):
return "%s" % self.__class__.__name__
def make_node(self, a, b):
assert imported_scipy, (
"Scipy not available. Scipy is needed for the Kron op")
a = tensor.as_tensor_variable(a)
b = tensor.as_tensor_variable(b)
if (not a.ndim == 2 or not b.ndim == 2):
raise TypeError('%s: inputs must have two dimensions' %
self.__class__.__name__)
out_var = tensor.TensorType(dtype=theano.scalar.upcast(a, b),
broadcastable=(False, False))()
return Apply(self, [a, b], [out_var])
def infer_shape(self, node, in_shapes):
shape_a, shape_b = in_shapes
return [[shape_a[0] * shape_b[0], shape_a[1] * shape_b[1]]]
def perform(self, node, inputs, output_storage):
a, b = inputs
output_storage[0][0] = scipy.linalg.kron(a, b)
def grad(self, inputs, cost_grad):
raise NotImplementedError('%s: gradient is not currently'
' implemented' % self.__class__.__name__)
kron = Kron()
def kron(a, b):
a = tensor.as_tensor_variable(a)
b = tensor.as_tensor_variable(b)
if (a.ndim + b.ndim <= 2):
raise TypeError('kron: inputs dimensions must sum to 3 or more. '
'You passed %d and %d.' % (a.ndim, b.ndim))
o = tensor.outer(a, b)
o = o.reshape(tensor.concatenate((a.shape, b.shape)),
a.ndim + b.ndim)
shf = o.dimshuffle(0, 2, 1, * range(3, o.ndim))
if shf.ndim == 3:
shf = o.dimshuffle(1, 0, 2)
o = shf.flatten()
else:
o = shf.reshape((o.shape[0] * o.shape[2],
o.shape[1] * o.shape[3]) +
tuple([o.shape[i] for i in range(4, o.ndim)]))
return o
from nose.plugins.skip import SkipTest
import numpy
import theano
from theano import tensor, function
from theano.tests import unittest_tools as utt
from theano.sandbox.linalg.kron import Kron, kron
from theano.sandbox.linalg.kron import kron
floatX = theano.config.floatX
try:
import scipy.linalg
......@@ -21,32 +24,23 @@ class TestKron(utt.InferShapeTester):
def setUp(self):
super(TestKron, self).setUp()
self.op_class = Kron
self.op = kron
def test_perform(self):
x = tensor.dmatrix()
y = tensor.dmatrix()
f = function([x, y], kron(x, y))
for shp0 in [(8, 6), (5, 8)]:
for shp1 in [(5, 7), (3, 3)]:
a = numpy.random.rand(*shp0)
b = numpy.random.rand(*shp1)
for shp0 in [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]:
for shp1 in [(6,), (6, 7), (6, 7, 8), (6, 7, 8, 9)]:
if len(shp0) + len(shp1) == 2:
continue
x = tensor.tensor(dtype='floatX',
broadcastable=(False,) * len(shp0))
y = tensor.tensor(dtype='floatX',
broadcastable=(False,) * len(shp1))
f = function([x, y], kron(x, y))
a = numpy.asarray(self.rng.rand(*shp0)).astype(floatX)
b = self.rng.rand(*shp1).astype(floatX)
out = f(a, b)
assert numpy.allclose(out, scipy.linalg.kron(a, b))
def test_infer_shape(self):
x = tensor.dmatrix()
y = tensor.dmatrix()
self._compile_and_check([x, y], [self.op(x, y)],
[numpy.random.rand(8, 5),
numpy.random.rand(3, 7)],
self.op_class)
self._compile_and_check([x, y], [self.op(x, y)],
[numpy.random.rand(2, 5),
numpy.random.rand(6, 3)],
self.op_class)
if __name__ == "__main__":
t = TestKron('setUp')
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论