提交 7748ec15 authored 作者: Alexandre de Brebisson's avatar Alexandre de Brebisson 提交者: Xavier Bouthillier

Add tests for BlockSparse gemv and outer

上级 658bf2ef
import numpy import numpy
from numpy.random import randn
from unittest import TestCase
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
import theano import theano
from theano import tensor from theano import tensor
import theano.tests.unittest_tools as utt import theano.tests.unittest_tools as utt
import theano.sandbox.tests.test_blocksparse
import theano.sandbox.cuda as cuda_ndarray import theano.sandbox.cuda as cuda_ndarray
if not cuda_ndarray.cuda_available: if not cuda_ndarray.cuda_available:
raise SkipTest('Optional package cuda disabled') raise SkipTest('Optional package cuda disabled')
from theano.sandbox.cuda.blocksparse import (GpuSparseBlockOuter,
from theano.sandbox.cuda.basic_ops import (GpuDimShuffle, gpu_sparse_block_gemv,
as_cuda_ndarray_variable) gpu_sparse_block_outer)
from theano.sandbox.cuda.blocksparse import (sparse_block_dot_SS,
sparse_block_gemv_ss,
sparse_block_outer_ss,
sparse_block_outer_ss_inplace,
SparseBlockOuterSS)
from theano.sandbox.cuda.var import float32_shared_constructor from theano.sandbox.cuda.var import float32_shared_constructor
...@@ -29,187 +21,56 @@ else: ...@@ -29,187 +21,56 @@ else:
mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu') mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
def setup(): class BlockSparse_Gemv_and_Outer(
utt.seed_rng() theano.sandbox.tests.test_blocksparse.BlockSparse_Gemv_and_Outer):
def setUp(self):
utt.seed_rng()
def blocksparse_data(): self.mode = mode_with_gpu.excluding('constant_folding')
nInputBlock = 128 self.gemv_op = gpu_sparse_block_gemv
nOutputBlock = 64 self.outer_op = gpu_sparse_block_outer
inputSize = 40
outputSize = 30
inputWindowSize = 7
outputWindowSize = 9
batchSize = 2
input = randn(batchSize, inputWindowSize, inputSize).astype('float32')
permutation = numpy.random.permutation
inputIndice = numpy.vstack(permutation(nInputBlock)[:inputWindowSize]
for _ in range(batchSize))
outputIndice = numpy.vstack(permutation(nOutputBlock)[:outputWindowSize]
for _ in range(batchSize))
weight = randn(nInputBlock, nOutputBlock,
inputSize, outputSize).astype('float32')
bias = randn(nOutputBlock, outputSize).astype('float32')
return weight, input, inputIndice, bias, outputIndice
def blocksparse(W, h, iIdx, b, oIdx):
o = b.take(oIdx, axis=0)
for b in range(o.shape[0]):
for j in range(o.shape[1]):
outputIdx = oIdx[b, j]
for i in range(h.shape[1]):
inputIdx = iIdx[b, i]
w = W[inputIdx, outputIdx]
# this below is a gemv I think
o[b, j, :] += numpy.dot(h[b, i], w)
return o
def test_blocksparse():
b = tensor.fmatrix()
W = tensor.ftensor4()
h = tensor.ftensor3()
iIdx = tensor.lmatrix()
oIdx = tensor.lmatrix()
o = sparse_block_dot_SS(W, h, iIdx, b, oIdx)
f = theano.function([W, h, iIdx, b, oIdx], o, mode=mode_with_gpu)
W_val, h_val, iIdx_val, b_val, oIdx_val = blocksparse_data()
th_out = f(W_val, h_val, iIdx_val, b_val, oIdx_val)
ref_out = blocksparse(W_val, h_val, iIdx_val, b_val, oIdx_val)
utt.assert_allclose(ref_out, th_out)
test_blocksparse.setup = setup
# test the fortan order for W (which can happen in the grad for some graphs).
def test_blocksparseF():
b = tensor.fmatrix()
W = tensor.ftensor4()
h = tensor.ftensor3()
iIdx = tensor.lmatrix()
oIdx = tensor.lmatrix()
o = sparse_block_dot_SS(GpuDimShuffle((False, False, False, False),
(0, 1, 3, 2))(
as_cuda_ndarray_variable(W)),
h, iIdx, b, oIdx)
f = theano.function([W, h, iIdx, b, oIdx], o, mode=mode_with_gpu)
W_val, h_val, iIdx_val, b_val, oIdx_val = blocksparse_data()
th_out = f(numpy.swapaxes(W_val, 2, 3), h_val, iIdx_val, b_val, oIdx_val)
ref_out = blocksparse(W_val, h_val, iIdx_val, b_val, oIdx_val)
utt.assert_allclose(ref_out, th_out)
def test_blocksparse_grad():
h_val = randn(1, 2, 3).astype('float32')
iIdx_val = numpy.random.permutation(3)[:2][None, :]
oIdx_val = numpy.random.permutation(3)[:2][None, :]
W_val = randn(3, 3, 3, 4).astype('float32')
b_val = randn(3, 4).astype('float32')
iIdx = theano.tensor.constant(iIdx_val)
oIdx = theano.tensor.constant(oIdx_val)
def f(b, h, W):
return sparse_block_gemv_ss(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
utt.verify_grad(f, [b_val, h_val, W_val], mode=mode_with_gpu)
def test_blocksparse_grad_1():
# This tests that we correctly handle cases where dimensions are 1.
h_val = randn(1, 1, 1).astype('float32')
iIdx_val = numpy.random.permutation(1)[:1][None, :]
oIdx_val = numpy.random.permutation(1)[:1][None, :]
W_val = randn(1, 1, 1, 1).astype('float32')
b_val = randn(1, 1).astype('float32')
iIdx = theano.tensor.constant(iIdx_val)
oIdx = theano.tensor.constant(oIdx_val)
def f(b, h, W):
return sparse_block_gemv_ss(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
utt.verify_grad(f, [b_val, h_val, W_val], mode=mode_with_gpu)
def test_blocksparse_grad_shape():
b = tensor.fmatrix()
W = tensor.ftensor4()
h = tensor.ftensor3()
iIdx = tensor.lmatrix()
oIdx = tensor.lmatrix()
o = sparse_block_gemv_ss(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
go = theano.grad(o.sum(), [b, W, h])
f = theano.function([W, h, iIdx, b, oIdx], go, mode=mode_with_gpu)
W_val, h_val, iIdx_val, b_val, oIdx_val = blocksparse_data()
# just make sure that it runs correcly and all the shapes are ok.
b_g, W_g, h_g = f(W_val, h_val, iIdx_val, b_val, oIdx_val)
assert b_g.shape == b_val.shape
assert h_g.shape == h_val.shape
assert W_g.shape == W_val.shape
# This test is temporarily disabled since we disabled the output_merge # This test is temporarily disabled since we disabled the output_merge
# and alpha_merge optimizations for blocksparse due to brokeness. # and alpha_merge optimizations for blocksparse due to brokeness.
# Re-enable when those are re-added. # Re-enable when those are re-added.
def Xtest_blocksparse_grad_merge(): def Xtest_blocksparse_grad_merge(self):
b = tensor.fmatrix() b = tensor.fmatrix()
h = tensor.ftensor3() h = tensor.ftensor3()
iIdx = tensor.lmatrix() iIdx = tensor.lmatrix()
oIdx = tensor.lmatrix() oIdx = tensor.lmatrix()
W_val, h_val, iIdx_val, b_val, oIdx_val = blocksparse_data() W_val, h_val, iIdx_val, b_val, oIdx_val = self.gemv_data()
W = float32_shared_constructor(W_val) W = float32_shared_constructor(W_val)
o = sparse_block_gemv_ss(b.take(oIdx, axis=0), W, h, iIdx, oIdx) o = gpu_sparse_block_gemv(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
gW = theano.grad(o.sum(), W) gW = theano.grad(o.sum(), W)
lr = numpy.asarray(0.05, dtype='float32') lr = numpy.asarray(0.05, dtype='float32')
upd = W - lr * gW upd = W - lr * gW
f1 = theano.function([h, iIdx, b, oIdx], updates=[(W, upd)], f1 = theano.function([h, iIdx, b, oIdx], updates=[(W, upd)],
mode=mode_with_gpu) mode=mode_with_gpu)
# Make sure the lr update was merged. # Make sure the lr update was merged.
assert isinstance(f1.maker.fgraph.outputs[0].owner.op, SparseBlockOuterSS) assert isinstance(f1.maker.fgraph.outputs[0].owner.op,
GpuSparseBlockOuter)
# Exclude the merge optimizations. # Exclude the merge optimizations.
mode = mode_with_gpu.excluding('local_merge_blocksparse_alpha') mode = mode_with_gpu.excluding('local_merge_blocksparse_alpha')
mode = mode.excluding('local_merge_blocksparse_output') mode = mode.excluding('local_merge_blocksparse_output')
f2 = theano.function([h, iIdx, b, oIdx], updates=[(W, upd)], mode=mode) f2 = theano.function([h, iIdx, b, oIdx], updates=[(W, upd)], mode=mode)
# Make sure the lr update is not merged. # Make sure the lr update is not merged.
assert not isinstance(f2.maker.fgraph.outputs[0].owner.op, assert not isinstance(f2.maker.fgraph.outputs[0].owner.op,
SparseBlockOuterSS) GpuSparseBlockOuter)
f2(h_val, iIdx_val, b_val, oIdx_val) f2(h_val, iIdx_val, b_val, oIdx_val)
W_ref = W.get_value() W_ref = W.get_value()
# reset the var # reset the var
W.set_value(W_val) W.set_value(W_val)
f1(h_val, iIdx_val, b_val, oIdx_val) f1(h_val, iIdx_val, b_val, oIdx_val)
W_opt = W.get_value() W_opt = W.get_value()
utt.assert_allclose(W_ref, W_opt) utt.assert_allclose(W_ref, W_opt)
"""
Tests for block sparse dot
"""
import unittest
import time
import numpy
from numpy.random import randn
import theano
from theano import tensor
import theano.tests.unittest_tools as utt
from theano.sandbox.blocksparse import sparse_block_dot, cpu_sparse_block_gemv, \
cpu_sparse_block_outer
class BlockSparse_Gemv_and_Outer(unittest.TestCase):
def runTest(self):
pass
def setUp(self):
utt.seed_rng()
self.mode = theano.compile.get_default_mode().excluding(
'constant_folding'
)
self.gemv_op = cpu_sparse_block_gemv
self.outer_op = cpu_sparse_block_outer
@staticmethod
def gemv_data():
nInputBlock = 8
nOutputBlock = 7
inputSize = 6
outputSize = 5
inputWindowSize = 4
outputWindowSize = 3
batchSize = 2
# nInputBlock = 2
# nOutputBlock = 2
# inputSize = 2
# outputSize = 2
# inputWindowSize = 1
# outputWindowSize = 1
# batchSize = 1
input = randn(batchSize, inputWindowSize, inputSize).astype('float32')
permutation = numpy.random.permutation
inputIndice = numpy.vstack(permutation(nInputBlock)[:inputWindowSize]
for _ in range(batchSize)).astype('int32')
outputIndice = numpy.vstack(
permutation(nOutputBlock)[:outputWindowSize]
for _ in range(batchSize)).astype('int32')
weight = randn(nInputBlock, nOutputBlock,
inputSize, outputSize).astype('float32')
bias = randn(nOutputBlock, outputSize).astype('float32')
return weight, input, inputIndice, bias, outputIndice
@staticmethod
def outer_data():
nInputBlock = 8
nOutputBlock = 7
xSize = 6
ySize = 5
xWindowSize = 4
yWindowSize = 3
batchSize = 2
o = randn(nInputBlock, nOutputBlock, xSize, ySize).astype('float32')
x = randn(batchSize, xWindowSize, xSize).astype('float32')
y = randn(batchSize, yWindowSize, ySize).astype('float32')
randint = numpy.random.randint
xIdx = numpy.vstack(randint(0, nInputBlock, size=xWindowSize)
for _ in range(batchSize)).astype('int32')
yIdx = numpy.vstack(randint(0, nOutputBlock, size=yWindowSize)
for _ in range(batchSize)).astype('int32')
return o, x, y, xIdx, yIdx
@staticmethod
def gemv_numpy(o, W, h, iIdx, oIdx):
for b in range(o.shape[0]):
for j in range(o.shape[1]):
outputIdx = oIdx[b, j]
for i in range(h.shape[1]):
inputIdx = iIdx[b, i]
w = W[inputIdx, outputIdx]
o[b, j, :] += numpy.dot(h[b, i], w)
return o
@staticmethod
def gemv_numpy2(o, W, h, iIdx, oIdx):
from numpy import ix_
for b in range(o.shape[0]):
w = W[ix_(iIdx[b], oIdx[b])].swapaxes(1, 2)
w = w.reshape((w.shape[0] * w.shape[1], w.shape[2] * w.shape[3]))
o[b] += numpy.dot(h[b].ravel(), w).reshape(o.shape[1:])
return o
@staticmethod
def gemv_numpy3(o, W, h, iIdx, oIdx):
from numpy import ix_
for b in range(o.shape[0]):
w = W[ix_(iIdx[b], oIdx[b])]
# o[b] += (h[b][:, None, :, None] * w).sum(axis=(0, 2))
# o[b] += numpy.tensordot(h[b], w, [(0,1),(0,2)])
o[b] += numpy.einsum('ik,ijkl', h[b], w)
return o
@staticmethod
def gemv_data2():
nInputBlock = 100
nOutputBlock = 100
inputSize = 50
outputSize = 50
inputWindowSize = 30
outputWindowSize = 30
batchSize = 1
input = randn(batchSize, inputWindowSize, inputSize).astype('float32')
permutation = numpy.random.permutation
inputIndice = numpy.vstack(permutation(nInputBlock)[:inputWindowSize]
for _ in range(batchSize)).astype('int32')
outputIndice = numpy.vstack(
permutation(nOutputBlock)[:outputWindowSize]
for _ in range(batchSize)).astype('int32')
weight = randn(nInputBlock, nOutputBlock,
inputSize, outputSize).astype('float32')
bias = randn(nOutputBlock, outputSize).astype('float32')
return weight, input, inputIndice, bias, outputIndice
@staticmethod
def compare():
W_val, h_val, iIdx_val, b_val, oIdx_val = \
BlockSparse_Gemv_and_Outer.gemv_data2()
start = time.clock()
ref_out = BlockSparse_Gemv_and_Outer.gemv_numpy(
b_val.take(oIdx_val, axis=0), W_val, h_val, iIdx_val, oIdx_val)
v1 = time.clock()
ref_out_2 = BlockSparse_Gemv_and_Outer.gemv_numpy2(
b_val.take(oIdx_val, axis=0), W_val, h_val, iIdx_val, oIdx_val)
v2 = time.clock()
ref_out_3 = BlockSparse_Gemv_and_Outer.gemv_numpy3(
b_val.take(oIdx_val, axis=0), W_val, h_val, iIdx_val, oIdx_val)
v3 = time.clock()
print v1 - start
print v2 - v1
print v3 - v2
# utt.assert_allclose(ref_out, ref_out_2)
@staticmethod
def outer_numpy(o, x, y, xIdx, yIdx):
for b in range(x.shape[0]):
for i in range(xIdx.shape[1]):
for j in range(yIdx.shape[1]):
o[xIdx[b, i], yIdx[b, j]] += numpy.outer(x[b, i, :],
y[b, j, :])
return o
def test_sparseblockdot(self):
"""
Compares the numpy version of sparseblockgemv to sparse_block_dot.
"""
b = tensor.fmatrix()
W = tensor.ftensor4()
h = tensor.ftensor3()
iIdx = tensor.imatrix()
oIdx = tensor.imatrix()
o = sparse_block_dot(W, h, iIdx, b, oIdx)
f = theano.function([W, h, iIdx, b, oIdx], o, mode=self.mode)
W_val, h_val, iIdx_val, b_val, oIdx_val = \
BlockSparse_Gemv_and_Outer.gemv_data()
th_out = f(W_val, h_val, iIdx_val, b_val, oIdx_val)
ref_out = BlockSparse_Gemv_and_Outer.gemv_numpy(
b_val.take(oIdx_val, axis=0), W_val, h_val, iIdx_val, oIdx_val)
utt.assert_allclose(ref_out, th_out)
def test_sparseblockgemv(self):
"""
Compares the numpy and theano versions of sparseblockgemv.
"""
b = tensor.fmatrix()
W = tensor.ftensor4()
h = tensor.ftensor3()
iIdx = tensor.imatrix()
oIdx = tensor.imatrix()
o = self.gemv_op(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
f = theano.function([W, h, iIdx, b, oIdx], o, mode=self.mode)
W_val, h_val, iIdx_val, b_val, oIdx_val = \
BlockSparse_Gemv_and_Outer.gemv_data()
th_out = f(W_val, h_val, iIdx_val, b_val, oIdx_val)
ref_out = BlockSparse_Gemv_and_Outer.gemv_numpy(
b_val.take(oIdx_val, axis=0), W_val, h_val, iIdx_val, oIdx_val)
utt.assert_allclose(ref_out, th_out)
def test_sparseblockgemvF(self):
"""
Test the fortan order for W (which can happen in the grad for some
graphs).
"""
b = tensor.fmatrix()
W = tensor.ftensor4()
h = tensor.ftensor3()
iIdx = tensor.imatrix()
oIdx = tensor.imatrix()
o = self.gemv_op(b.take(oIdx, axis=0),
tensor.DimShuffle((False, False, False, False),
(0, 1, 3, 2))(tensor.as_tensor_variable(W)),
h, iIdx, oIdx)
f = theano.function([W, h, iIdx, b, oIdx], o, mode=self.mode)
W_val, h_val, iIdx_val, b_val, oIdx_val = \
BlockSparse_Gemv_and_Outer.gemv_data()
th_out = f(numpy.swapaxes(W_val, 2, 3), h_val, iIdx_val, b_val,
oIdx_val)
ref_out = BlockSparse_Gemv_and_Outer.gemv_numpy(
b_val.take(oIdx_val, axis=0), W_val, h_val, iIdx_val, oIdx_val)
utt.assert_allclose(ref_out, th_out)
def test_sparseblockgemv_grad(self):
W_val, h_val, iIdx_val, b_val, oIdx_val = \
BlockSparse_Gemv_and_Outer.gemv_data()
h_val = randn(1, 1, 1).astype('float32')
iIdx_val = numpy.random.permutation(1)[:1][None, :]
oIdx_val = numpy.random.permutation(1)[:1][None, :]
W_val = randn(1, 1, 1, 1).astype('float32')
b_val = randn(1, 1).astype('float32')
iIdx = theano.tensor.constant(iIdx_val)
oIdx = theano.tensor.constant(oIdx_val)
def metaop(b, h, W):
return sparse_block_dot(W, h, iIdx, b, oIdx)
def op(b, h, W):
return self.gemv_op(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
utt.verify_grad(metaop, [b_val, h_val, W_val], mode=self.mode)
utt.verify_grad(op, [b_val, h_val, W_val], mode=self.mode)
def test_sparseblockgemv_grad_1(self):
"""
Test that we correctly handle cases where dimensions are 1.
"""
h_val = randn(1, 1, 1).astype('float32')
iIdx_val = numpy.random.permutation(1)[:1][None, :]
oIdx_val = numpy.random.permutation(1)[:1][None, :]
W_val = randn(1, 1, 1, 1).astype('float32')
b_val = randn(1, 1).astype('float32')
iIdx = theano.tensor.constant(iIdx_val)
oIdx = theano.tensor.constant(oIdx_val)
def metaop(b, h, W):
return sparse_block_dot(W, h, iIdx, b, oIdx)
def op(b, h, W):
return self.gemv_op(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
utt.verify_grad(metaop, [b_val, h_val, W_val], mode=self.mode)
utt.verify_grad(op, [b_val, h_val, W_val], mode=self.mode)
def test_sparseblockgemv_grad_shape(self):
b = tensor.fmatrix()
W = tensor.ftensor4()
h = tensor.ftensor3()
iIdx = tensor.imatrix()
oIdx = tensor.imatrix()
o = self.gemv_op(b.take(oIdx, axis=0), W, h, iIdx, oIdx)
go = theano.grad(o.sum(), [b, W, h])
f = theano.function([W, h, iIdx, b, oIdx], go, mode=self.mode)
W_val, h_val, iIdx_val, b_val, oIdx_val = \
BlockSparse_Gemv_and_Outer.gemv_data()
# just make sure that it runs correcly and all the shapes are ok.
b_g, W_g, h_g = f(W_val, h_val, iIdx_val, b_val, oIdx_val)
assert b_g.shape == b_val.shape
assert h_g.shape == h_val.shape
assert W_g.shape == W_val.shape
def test_sparseblockouter(self):
o = tensor.ftensor4()
x = tensor.ftensor3()
y = tensor.ftensor3()
xIdx = tensor.imatrix()
yIdx = tensor.imatrix()
out = self.outer_op(o, x, y, xIdx, yIdx)
f = theano.function([o, x, y, xIdx, yIdx], out, on_unused_input="warn")
o_val, x_val, y_val, xIdx_val, yIdx_val = \
BlockSparse_Gemv_and_Outer.outer_data()
th_out = f(o_val, x_val, y_val, xIdx_val, yIdx_val)
ref_out = BlockSparse_Gemv_and_Outer.outer_numpy(
o_val, x_val, y_val, xIdx_val, yIdx_val)
utt.assert_allclose(ref_out, th_out)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论