提交 bc6e4d4e authored 作者: Dustin Webb's avatar Dustin Webb

Added test for the GpuSoftmax -> GpuDnnSoftmax optimization.

上级 113afd29
...@@ -546,9 +546,9 @@ err%(name)s = cudnnSoftmaxForward( ...@@ -546,9 +546,9 @@ err%(name)s = cudnnSoftmaxForward(
@local_optimizer([GpuSoftmax]) @local_optimizer([GpuSoftmax])
def local_softmax_dnn(node): def local_softmax_dnn(node):
if isinstance(node.op, GpuSoftmax): if isinstance(node.op, GpuSoftmax):
ins = node.inputs[0].dimshuffle('x', 'x', 0, 1) ins = node.inputs[0].dimshuffle(0, 1, 'x', 'x')
out = GpuDnnSoftmax('bc01', 'accurate', 'channel')(gpu_contiguous(ins)) out = GpuDnnSoftmax('bc01', 'accurate', 'channel')(gpu_contiguous(ins))
out = as_cuda_ndarray_variable(out.dimshuffle(2, 3)) out = as_cuda_ndarray_variable(out.dimshuffle(0, 1))
return [out] return [out]
gpu_optimizer.register("softmax_cudnn", local_softmax_dnn, 'cudnn') gpu_optimizer.register("softmax_cudnn", local_softmax_dnn, 'cudnn')
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
import numpy import numpy
import unittest
import theano import theano
from theano.gof.python25 import any from theano.gof.python25 import any
...@@ -208,87 +209,140 @@ def test_softmax_with_bias(): ...@@ -208,87 +209,140 @@ def test_softmax_with_bias():
cmp(128, 64 * 1024) cmp(128, 64 * 1024)
def _test_softmax(x, x_gpu, f_z, f_gpu_z, cpu_type, gpu_type, cmp, topo_idx): class test_SoftMax(unittest.TestCase):
""" def _test_softmax(
This is basic test for GpuSoftmax and GpuDnnSoftmax self,
We check that we loop when their is too much block
We use slower code when there isn't enough shared memory
"""
f_z_out = f_z(x)
f_gpu_z_out = f_gpu_z(x_gpu)
f = theano.function([x], f_z_out, mode=mode_without_gpu)
f_gpu = theano.function([x_gpu], f_gpu_z_out, mode=mode_with_gpu)
assert isinstance(f.maker.fgraph.toposort()[-1].op, cpu_type)
assert isinstance(f_gpu.maker.fgraph.toposort()[topo_idx].op, gpu_type)
#we need to test n>32*1024 to check that we make the block loop.
cmp(1, 5, f, f_gpu)
cmp(2, 5, f, f_gpu)
cmp(10, 5, f, f_gpu)
cmp(100, 5, f, f_gpu)
cmp(1000, 5, f, f_gpu)
cmp(10000, 5, f, f_gpu)
cmp(4074, 400, f, f_gpu)
cmp(784, 784, f, f_gpu)
cmp(4, 1000, f, f_gpu)
cmp(4, 1024, f, f_gpu)
cmp(4, 2000, f, f_gpu)
cmp(4, 2024, f, f_gpu)
# The GTX285 don't have enough shared memory.
cmp(4, 4074, f, f_gpu)
# The GTX580, 680 and kepler don't have enough shared memory.
cmp(2, 10000, f, f_gpu)
cmp(128, 16 * 1024, f, f_gpu)
cmp(128, 64 * 1024, f, f_gpu)
cmp((2 << 15) - 1, 5, f, f_gpu) # cudnn permits no more than 2^15 - 1 rows
cmp(5, 2 << 15, f, f_gpu)
return f, f_gpu
def test_softmax():
def cmp(n, m, f, f_gpu):
#print "test_softmax",n,m
data = numpy.arange(n * m, dtype='float32').reshape(n, m)
out = f(data)
gout = f_gpu(data)
assert numpy.allclose(out, gout), numpy.absolute(out - gout)
x = T.fmatrix('x')
z = T.nnet.softmax
f, f_gpu = _test_softmax(
x, x,
x, x_gpu,
z, f_z,
z, f_gpu_z,
type(z),
cuda.nnet.GpuSoftmax,
cmp, cmp,
-2 gpu_mode,
) check_types
):
# cuDNN cannot handle these test cases but the Theano softmax can so we """
# test them only for the Theano softmax. This is basic test for GpuSoftmax and GpuDnnSoftmax
cmp(2 << 15, 5, f, f_gpu)
cmp(0, 10, f, f_gpu) We check that we loop when their is too much block
We use slower code when there isn't enough shared memory
"""
def test_cudnn_softmax(): f_z_out = f_z(x)
def cmp(n, m, f, f_gpu): f_gpu_z_out = f_gpu_z(x_gpu)
f = theano.function([x], f_z_out, mode=mode_without_gpu)
f_gpu = theano.function([x_gpu], f_gpu_z_out, mode=gpu_mode)
check_types(f, f_gpu)
#we need to test n>32*1024 to check that we make the block loop.
cmp(1, 5, f, f_gpu)
cmp(2, 5, f, f_gpu)
cmp(10, 5, f, f_gpu)
cmp(100, 5, f, f_gpu)
cmp(1000, 5, f, f_gpu)
cmp(10000, 5, f, f_gpu)
cmp(4074, 400, f, f_gpu)
cmp(784, 784, f, f_gpu)
cmp(4, 1000, f, f_gpu)
cmp(4, 1024, f, f_gpu)
cmp(4, 2000, f, f_gpu)
cmp(4, 2024, f, f_gpu)
# The GTX285 don't have enough shared memory.
cmp(4, 4074, f, f_gpu)
# The GTX580, 680 and kepler don't have enough shared memory.
cmp(2, 10000, f, f_gpu)
cmp(128, 16 * 1024, f, f_gpu)
cmp(128, 64 * 1024, f, f_gpu)
# cudnn permits no more than 2^15 - 1 rows
cmp((2 << 15) - 1, 5, f, f_gpu)
cmp(5, 2 << 15, f, f_gpu)
return f, f_gpu
def _cmp(self, n, m, f, f_gpu):
#print "test_softmax",n,m #print "test_softmax",n,m
data = numpy.arange(n * m, dtype='float32').reshape(n, m) data = numpy.arange(n * m, dtype='float32').reshape(n, m)
gdata = numpy.asarray(data).transpose()[None, :, :, None]
out = f(data) out = f(data)
gout = numpy.asarray(f_gpu(gdata))[0, :, :, 0].transpose() gout = f_gpu(data)
assert numpy.allclose(out, gout), numpy.absolute(out - gout) assert numpy.allclose(out, gout), numpy.absolute(out - gout)
x = T.matrix('x', 'float32') def _check_types(self, graph, graph_gpu, topo_idx, f_type, f_gpu_type):
x_gpu = T.tensor4('x_gpu', 'float32') assert isinstance(graph.maker.fgraph.toposort()[-1].op, f_type)
f_z = T.nnet.softmax assert isinstance(
f_gpu = theano.sandbox.cuda.dnn.GpuDnnSoftmax( graph_gpu.maker.fgraph.toposort()[topo_idx].op,
'bc01', f_gpu_type
'accurate', )
'channel'
) def test_softmax(self):
_test_softmax(x, x_gpu, f_z, f_gpu, type(f_z), type(f_gpu), cmp, -1) x = T.fmatrix('x')
z = T.nnet.softmax
def check_types(graph, graph_gpu):
self._check_types(
graph,
graph_gpu,
-2,
type(z),
cuda.nnet.GpuSoftmax
)
f, f_gpu = self._test_softmax(
x,
x,
z,
z,
self._cmp,
mode_with_gpu,
check_types
)
# cuDNN cannot handle these test cases but the Theano softmax can so we
# test them only for the Theano softmax.
self._cmp(2 << 15, 5, f, f_gpu)
self._cmp(0, 10, f, f_gpu)
def test_cudnn_softmax(self):
def cmp(n, m, f, f_gpu):
data = numpy.arange(n * m, dtype='float32').reshape(n, m)
gdata = numpy.asarray(data)[:, :, None, None]
out = f(data)
gout = numpy.asarray(f_gpu(gdata))[:, :, 0, 0]
assert numpy.allclose(out, gout), numpy.absolute(out - gout)
x = T.matrix('x', 'float32')
x_gpu = T.tensor4('x_gpu', 'float32')
f_z = T.nnet.softmax
f_gpu = theano.sandbox.cuda.dnn.GpuDnnSoftmax(
'bc01',
'accurate',
'channel'
)
def check_types(graph, graph_gpu):
self._check_types(
graph,
graph_gpu,
-1,
type(f_z),
theano.sandbox.cuda.dnn.GpuDnnSoftmax
)
def check_types_opt(graph, graph_gpu):
assert isinstance(graph.maker.fgraph.toposort()[-1].op, type(f_z))
assert len([n for n in graph_gpu.maker.fgraph.toposort()
if isinstance(
n.op,
theano.sandbox.cuda.dnn.GpuDnnSoftmax
)]) == 1
self._test_softmax(
x,
x_gpu,
f_z,
f_gpu,
cmp,
mode_with_gpu,
check_types
)
mode = mode_with_gpu.including("cudnn")
self._test_softmax(x, x, f_z, f_z, self._cmp, mode, check_types_opt)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论