提交 bc6e4d4e authored 作者: Dustin Webb's avatar Dustin Webb

Added test for the GpuSoftmax -> GpuDnnSoftmax optimization.

上级 113afd29
...@@ -546,9 +546,9 @@ err%(name)s = cudnnSoftmaxForward( ...@@ -546,9 +546,9 @@ err%(name)s = cudnnSoftmaxForward(
@local_optimizer([GpuSoftmax]) @local_optimizer([GpuSoftmax])
def local_softmax_dnn(node): def local_softmax_dnn(node):
if isinstance(node.op, GpuSoftmax): if isinstance(node.op, GpuSoftmax):
ins = node.inputs[0].dimshuffle('x', 'x', 0, 1) ins = node.inputs[0].dimshuffle(0, 1, 'x', 'x')
out = GpuDnnSoftmax('bc01', 'accurate', 'channel')(gpu_contiguous(ins)) out = GpuDnnSoftmax('bc01', 'accurate', 'channel')(gpu_contiguous(ins))
out = as_cuda_ndarray_variable(out.dimshuffle(2, 3)) out = as_cuda_ndarray_variable(out.dimshuffle(0, 1))
return [out] return [out]
gpu_optimizer.register("softmax_cudnn", local_softmax_dnn, 'cudnn') gpu_optimizer.register("softmax_cudnn", local_softmax_dnn, 'cudnn')
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
import numpy import numpy
import unittest
import theano import theano
from theano.gof.python25 import any from theano.gof.python25 import any
...@@ -208,7 +209,17 @@ def test_softmax_with_bias(): ...@@ -208,7 +209,17 @@ def test_softmax_with_bias():
cmp(128, 64 * 1024) cmp(128, 64 * 1024)
def _test_softmax(x, x_gpu, f_z, f_gpu_z, cpu_type, gpu_type, cmp, topo_idx): class test_SoftMax(unittest.TestCase):
def _test_softmax(
self,
x,
x_gpu,
f_z,
f_gpu_z,
cmp,
gpu_mode,
check_types
):
""" """
This is basic test for GpuSoftmax and GpuDnnSoftmax This is basic test for GpuSoftmax and GpuDnnSoftmax
...@@ -217,10 +228,10 @@ def _test_softmax(x, x_gpu, f_z, f_gpu_z, cpu_type, gpu_type, cmp, topo_idx): ...@@ -217,10 +228,10 @@ def _test_softmax(x, x_gpu, f_z, f_gpu_z, cpu_type, gpu_type, cmp, topo_idx):
""" """
f_z_out = f_z(x) f_z_out = f_z(x)
f_gpu_z_out = f_gpu_z(x_gpu) f_gpu_z_out = f_gpu_z(x_gpu)
f = theano.function([x], f_z_out, mode=mode_without_gpu) f = theano.function([x], f_z_out, mode=mode_without_gpu)
f_gpu = theano.function([x_gpu], f_gpu_z_out, mode=mode_with_gpu) f_gpu = theano.function([x_gpu], f_gpu_z_out, mode=gpu_mode)
assert isinstance(f.maker.fgraph.toposort()[-1].op, cpu_type) check_types(f, f_gpu)
assert isinstance(f_gpu.maker.fgraph.toposort()[topo_idx].op, gpu_type)
#we need to test n>32*1024 to check that we make the block loop. #we need to test n>32*1024 to check that we make the block loop.
cmp(1, 5, f, f_gpu) cmp(1, 5, f, f_gpu)
...@@ -241,46 +252,60 @@ def _test_softmax(x, x_gpu, f_z, f_gpu_z, cpu_type, gpu_type, cmp, topo_idx): ...@@ -241,46 +252,60 @@ def _test_softmax(x, x_gpu, f_z, f_gpu_z, cpu_type, gpu_type, cmp, topo_idx):
cmp(2, 10000, f, f_gpu) cmp(2, 10000, f, f_gpu)
cmp(128, 16 * 1024, f, f_gpu) cmp(128, 16 * 1024, f, f_gpu)
cmp(128, 64 * 1024, f, f_gpu) cmp(128, 64 * 1024, f, f_gpu)
cmp((2 << 15) - 1, 5, f, f_gpu) # cudnn permits no more than 2^15 - 1 rows # cudnn permits no more than 2^15 - 1 rows
cmp((2 << 15) - 1, 5, f, f_gpu)
cmp(5, 2 << 15, f, f_gpu) cmp(5, 2 << 15, f, f_gpu)
return f, f_gpu return f, f_gpu
def _cmp(self, n, m, f, f_gpu):
def test_softmax():
def cmp(n, m, f, f_gpu):
#print "test_softmax",n,m #print "test_softmax",n,m
data = numpy.arange(n * m, dtype='float32').reshape(n, m) data = numpy.arange(n * m, dtype='float32').reshape(n, m)
out = f(data) out = f(data)
gout = f_gpu(data) gout = f_gpu(data)
assert numpy.allclose(out, gout), numpy.absolute(out - gout) assert numpy.allclose(out, gout), numpy.absolute(out - gout)
def _check_types(self, graph, graph_gpu, topo_idx, f_type, f_gpu_type):
assert isinstance(graph.maker.fgraph.toposort()[-1].op, f_type)
assert isinstance(
graph_gpu.maker.fgraph.toposort()[topo_idx].op,
f_gpu_type
)
def test_softmax(self):
x = T.fmatrix('x') x = T.fmatrix('x')
z = T.nnet.softmax z = T.nnet.softmax
f, f_gpu = _test_softmax(
def check_types(graph, graph_gpu):
self._check_types(
graph,
graph_gpu,
-2,
type(z),
cuda.nnet.GpuSoftmax
)
f, f_gpu = self._test_softmax(
x, x,
x, x,
z, z,
z, z,
type(z), self._cmp,
cuda.nnet.GpuSoftmax, mode_with_gpu,
cmp, check_types
-2
) )
# cuDNN cannot handle these test cases but the Theano softmax can so we # cuDNN cannot handle these test cases but the Theano softmax can so we
# test them only for the Theano softmax. # test them only for the Theano softmax.
cmp(2 << 15, 5, f, f_gpu) self._cmp(2 << 15, 5, f, f_gpu)
cmp(0, 10, f, f_gpu) self._cmp(0, 10, f, f_gpu)
def test_cudnn_softmax(): def test_cudnn_softmax(self):
def cmp(n, m, f, f_gpu): def cmp(n, m, f, f_gpu):
#print "test_softmax",n,m
data = numpy.arange(n * m, dtype='float32').reshape(n, m) data = numpy.arange(n * m, dtype='float32').reshape(n, m)
gdata = numpy.asarray(data).transpose()[None, :, :, None] gdata = numpy.asarray(data)[:, :, None, None]
out = f(data) out = f(data)
gout = numpy.asarray(f_gpu(gdata))[0, :, :, 0].transpose() gout = numpy.asarray(f_gpu(gdata))[:, :, 0, 0]
assert numpy.allclose(out, gout), numpy.absolute(out - gout) assert numpy.allclose(out, gout), numpy.absolute(out - gout)
x = T.matrix('x', 'float32') x = T.matrix('x', 'float32')
...@@ -291,4 +316,33 @@ def test_cudnn_softmax(): ...@@ -291,4 +316,33 @@ def test_cudnn_softmax():
'accurate', 'accurate',
'channel' 'channel'
) )
_test_softmax(x, x_gpu, f_z, f_gpu, type(f_z), type(f_gpu), cmp, -1)
def check_types(graph, graph_gpu):
self._check_types(
graph,
graph_gpu,
-1,
type(f_z),
theano.sandbox.cuda.dnn.GpuDnnSoftmax
)
def check_types_opt(graph, graph_gpu):
assert isinstance(graph.maker.fgraph.toposort()[-1].op, type(f_z))
assert len([n for n in graph_gpu.maker.fgraph.toposort()
if isinstance(
n.op,
theano.sandbox.cuda.dnn.GpuDnnSoftmax
)]) == 1
self._test_softmax(
x,
x_gpu,
f_z,
f_gpu,
cmp,
mode_with_gpu,
check_types
)
mode = mode_with_gpu.including("cudnn")
self._test_softmax(x, x, f_z, f_z, self._cmp, mode, check_types_opt)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论