提交 1b773bb2 authored 作者: lamblin's avatar lamblin

Merge pull request #334 from benanne/gpu_tensordot

gpu accelerated tensordot by conversion to matrix product
...@@ -2059,6 +2059,33 @@ class GpuContiguous(Op): ...@@ -2059,6 +2059,33 @@ class GpuContiguous(Op):
gpu_contiguous = GpuContiguous() gpu_contiguous = GpuContiguous()
def tensordot(a, b, axes=2):
"""
implementation of tensordot that reduces to a regular matrix product. This allows tensordot to be GPU accelerated,
which isn't possible with the default Theano implementation (which is just a wrapper around numpy.tensordot).
based on code from Tijmen Tieleman's gnumpy http://www.cs.toronto.edu/~tijmen/gnumpy.html
"""
if numpy.isscalar(axes):
# if 'axes' is a number of axes to multiply and sum over (trailing axes
# of a, leading axes of b), we can just reshape and use dot.
outshape = tensor.concatenate([a.shape[:a.ndim - axes], b.shape[axes:]])
outndim = a.ndim + b.ndim - 2*axes
a_reshaped = a.reshape((tensor.prod(a.shape[:a.ndim - axes]), tensor.prod(a.shape[a.ndim - axes:])))
b_reshaped = b.reshape((tensor.prod(b.shape[:axes]), tensor.prod(b.shape[axes:])))
return tensor.dot(a_reshaped, b_reshaped).reshape(outshape, ndim=outndim)
elif len(axes) == 2:
# if 'axes' is a pair of axis lists, we first shuffle the axes of a and
# b to reduce this to the first case (note the recursion).
a_other, b_other = tuple(axes[0]), tuple(axes[1])
num_axes = len(a_other)
a_order = tuple(x for x in tuple(xrange(a.ndim)) if x not in a_other) + a_other
b_order = b_other + tuple(x for x in tuple(xrange(b.ndim)) if x not in b_other)
a_shuffled = a.dimshuffle(a_order)
b_shuffled = b.dimshuffle(b_order)
return tensordot(a_shuffled, b_shuffled, num_axes)
else:
raise ValueError("Axes should be scalar valued or a list/tuple of len 2.")
# Those are predifined CudaNdarrayType as done in tensor.basic # Those are predifined CudaNdarrayType as done in tensor.basic
# Useful mostly for test as the gpu op are inserted automatically... # Useful mostly for test as the gpu op are inserted automatically...
......
...@@ -870,6 +870,36 @@ def test_shared_cudandarray(): ...@@ -870,6 +870,36 @@ def test_shared_cudandarray():
a = cuda.shared_constructor(cuda.CudaNdarray.zeros((2,3))) a = cuda.shared_constructor(cuda.CudaNdarray.zeros((2,3)))
assert isinstance(a.type, tcn.CudaNdarrayType) assert isinstance(a.type, tcn.CudaNdarrayType)
def test_tensordot_reshape():
'''Test that the tensordot implementation using dimshuffle, reshape and dot
gives the same results as the default (numpy) version'''
# define some tensors
a = numpy.arange(20, dtype=theano.config.floatX) / 20.0
b = numpy.arange(10, dtype=theano.config.floatX) / 10.0
c = numpy.arange(5, dtype=theano.config.floatX) / 5.0
d = numpy.arange(8, dtype=theano.config.floatX) / 8.0
tensor1 = numpy.tensordot(a, numpy.tensordot(b, numpy.tensordot(c, d, 0), 0), 0)
tensor2 = numpy.tensordot(c, numpy.tensordot(d, a, 0), 0)
tensor3 = tensor2.swapaxes(1, 2).swapaxes(0, 2) # d, a, c
x = T.tensor4('x')
y = T.tensor3('y')
# case 1: number of axes to sum over
default1 = theano.function([x,y], T.tensordot(x, y, 2))(tensor1, tensor2)
reshape1 = theano.function([x,y], B.tensordot(x, y, 2))(tensor1, tensor2)
assert numpy.allclose(default1, reshape1)
# case 2: axis pairs
default2 = theano.function([x,y], T.tensordot(x, y, axes=[(0, 3), (1, 0)]))(tensor1, tensor3)
reshape2 = theano.function([x,y], B.tensordot(x, y, axes=[(0, 3), (1, 0)]))(tensor1, tensor3)
assert numpy.allclose(default2, reshape2)
default3 = theano.function([x,y], T.tensordot(x, y, axes=[(0, 3, 2), (1, 0, 2)]))(tensor1, tensor3)
reshape3 = theano.function([x,y], B.tensordot(x, y, axes=[(0, 3, 2), (1, 0, 2)]))(tensor1, tensor3)
assert numpy.allclose(default3, reshape3)
class test_size(unittest.TestCase): class test_size(unittest.TestCase):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论