提交 57fd0ae3 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #3304 from abergeron/test_dnn_softmax

Add some minimal tests for cudnn softmax.
...@@ -15,6 +15,8 @@ import theano.sandbox.cuda.dnn as dnn ...@@ -15,6 +15,8 @@ import theano.sandbox.cuda.dnn as dnn
from theano.sandbox.cuda.basic_ops import GpuAllocEmpty, gpu_alloc_empty from theano.sandbox.cuda.basic_ops import GpuAllocEmpty, gpu_alloc_empty
from theano.sandbox.cuda import float32_shared_constructor as shared from theano.sandbox.cuda import float32_shared_constructor as shared
from . import test_nnet
# Skip test if cuda_ndarray is not available. # Skip test if cuda_ndarray is not available.
import theano.sandbox.cuda as cuda import theano.sandbox.cuda as cuda
if not cuda.cuda_available: if not cuda.cuda_available:
...@@ -467,11 +469,100 @@ def test_pooling_opt(): ...@@ -467,11 +469,100 @@ def test_pooling_opt():
for n in f.maker.fgraph.toposort()]) for n in f.maker.fgraph.toposort()])
def test_log_softmax(): class test_DnnSoftMax(test_nnet.test_SoftMax):
gpu_op = dnn.GpuDnnSoftmax
gpu_grad_op = dnn.GpuDnnSoftmaxGrad
mode = mode_with_gpu
do_0 = False
topo_idx = -3
def setUp(self):
if not cuda.dnn.dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg)
utt.seed_rng()
def test_dnn_softmax_grad(self):
softmax_op = dnn.GpuDnnSoftmax('bc01', 'accurate', 'channel')
x_val = numpy.random.normal(0, 1, (3, 4, 2, 5)).astype('float32')
x_val2 = numpy.random.normal(0, 1, (3, 4, 1, 1)).astype('float32')
utt.verify_grad(softmax_op, [x_val])
# Gradient is broken for (n, c, 1, 1) in v3 rc1
if cuda.dnn.version() != (3000, 3000):
utt.verify_grad(softmax_op, [x_val2])
def test_cudnn_softmax_grad_opt(self):
# Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad optimization is
# applied when cudnn is required
y = T.fvector('y')
f = theano.function(
[y],
T.grad(T.nnet.softmax(y).mean(), y),
mode=mode_with_gpu
)
sorted_f = f.maker.fgraph.toposort()
assert(len([i
for i in sorted_f
if isinstance(
i.op,
theano.sandbox.cuda.dnn.GpuDnnSoftmaxGrad
)]) == 1)
assert(len([i
for i in sorted_f
if isinstance(
i.op,
theano.tensor.nnet.SoftmaxGrad
)]) == 0)
# Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad optimization is not
# applied when cudnn is excluded or not available
mode_wo_cudnn = mode_with_gpu.excluding("cudnn")
y = T.fvector('y')
f = theano.function(
[y],
T.grad(T.nnet.softmax(y).mean(), y),
mode=mode_wo_cudnn
)
sorted_f = f.maker.fgraph.toposort()
assert(len([i
for i in sorted_f
if isinstance(
i.op,
theano.sandbox.cuda.dnn.GpuDnnSoftmaxGrad
)]) == 0)
assert(len([i
for i in sorted_f
if isinstance(
i.op,
theano.tensor.nnet.SoftmaxGrad
)]) == 1)
# Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad do not
# crash with manual graph
y = T.fvector('y')
o = theano.tensor.nnet.SoftmaxGrad()(y, y * 2)
f = theano.function([y], o, mode=mode_with_gpu)
sorted_f = f.maker.fgraph.toposort()
assert(len([i
for i in sorted_f
if isinstance(
i.op,
theano.sandbox.cuda.dnn.GpuDnnSoftmaxGrad
)]) == 1)
assert(len([i
for i in sorted_f
if isinstance(
i.op,
theano.tensor.nnet.SoftmaxGrad
)]) == 0)
def test_log_softmax(self):
# This is a test for an optimization that depends on CuDNN v3 or # This is a test for an optimization that depends on CuDNN v3 or
# more recent. Don't test if the CuDNN version is too old. # more recent. Don't test if the CuDNN version is too old.
if not cuda.dnn.dnn_available() or cuda.dnn.version() < (3000, 3000): if cuda.dnn.version() < (3000, 3000):
raise SkipTest(cuda.dnn.dnn_available.msg) raise SkipTest("Log-softmax is only in cudnn v3+")
x = T.ftensor4() x = T.ftensor4()
softmax_out = dnn.GpuDnnSoftmax('bc01', 'accurate', 'channel')(x) softmax_out = dnn.GpuDnnSoftmax('bc01', 'accurate', 'channel')(x)
......
...@@ -9,7 +9,7 @@ import theano.tests.unittest_tools as utt ...@@ -9,7 +9,7 @@ import theano.tests.unittest_tools as utt
# Skip test if cuda_ndarray is not available. # Skip test if cuda_ndarray is not available.
import theano.sandbox.cuda as cuda import theano.sandbox.cuda as cuda
if cuda.cuda_available == False: if not cuda.cuda_available:
raise SkipTest('Optional package cuda disabled') raise SkipTest('Optional package cuda disabled')
if theano.config.mode == 'FAST_COMPILE': if theano.config.mode == 'FAST_COMPILE':
...@@ -39,15 +39,13 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias(): ...@@ -39,15 +39,13 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
n_in = 4098 n_in = 4098
n_out = 4099 n_out = 4099
x = T.fmatrix('x')
y = T.lvector('y') y = T.lvector('y')
b = T.fvector('b') b = T.fvector('b')
#W = T.fmatrix('W')
# we precompute the dot with big shape before to allow the test of # we precompute the dot with big shape before to allow the test of
# GpuCrossentropySoftmax1HotWithBiasDx to don't fail with the error # GpuCrossentropySoftmax1HotWithBiasDx to don't fail with the error
#(the launch timed out and was terminated) on GPU card not # (the launch timed out and was terminated) on GPU card not
# powerful enough. We need the big shape to check for corner # powerful enough. We need the big shape to check for corner
# case. # case.
dot_result = T.fmatrix('dot_result') dot_result = T.fmatrix('dot_result')
...@@ -57,7 +55,6 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias(): ...@@ -57,7 +55,6 @@ def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
xx = numpy.asarray(numpy.random.rand(batch_size, n_in), xx = numpy.asarray(numpy.random.rand(batch_size, n_in),
dtype=numpy.float32) dtype=numpy.float32)
#?????yy = numpy.ones((batch_size,),dtype='float32')
yy = numpy.ones((batch_size,), dtype='int32') yy = numpy.ones((batch_size,), dtype='int32')
b_values = numpy.zeros((n_out,), dtype='float32') b_values = numpy.zeros((n_out,), dtype='float32')
W_values = numpy.asarray(numpy.random.rand(n_in, n_out), dtype='float32') W_values = numpy.asarray(numpy.random.rand(n_in, n_out), dtype='float32')
...@@ -104,12 +101,10 @@ def test_GpuCrossentropySoftmax1HotWithBiasDx(): ...@@ -104,12 +101,10 @@ def test_GpuCrossentropySoftmax1HotWithBiasDx():
We check that we loop when there are too many threads We check that we loop when there are too many threads
""" """
n_in = 1000
batch_size = 4097 batch_size = 4097
n_out = 1250 n_out = 1250
if not isinstance(mode_with_gpu, theano.compile.DebugMode): if not isinstance(mode_with_gpu, theano.compile.DebugMode):
n_in = 4098
n_out = 4099 n_out = 4099
# Seed numpy.random with config.unittests.rseed # Seed numpy.random with config.unittests.rseed
...@@ -212,6 +207,12 @@ def test_softmax_with_bias(): ...@@ -212,6 +207,12 @@ def test_softmax_with_bias():
class test_SoftMax(unittest.TestCase): class test_SoftMax(unittest.TestCase):
gpu_op = cuda.nnet.GpuSoftmax
mode = mode_with_gpu.excluding("cudnn")
do_big = True
do_0 = True
topo_idx = -2
def _test_softmax( def _test_softmax(
self, self,
x, x,
...@@ -219,7 +220,6 @@ class test_SoftMax(unittest.TestCase): ...@@ -219,7 +220,6 @@ class test_SoftMax(unittest.TestCase):
f_z, f_z,
f_gpu_z, f_gpu_z,
cmp, cmp,
gpu_mode,
check_types check_types
): ):
""" """
...@@ -232,7 +232,7 @@ class test_SoftMax(unittest.TestCase): ...@@ -232,7 +232,7 @@ class test_SoftMax(unittest.TestCase):
f_gpu_z_out = f_gpu_z(x_gpu) f_gpu_z_out = f_gpu_z(x_gpu)
f = theano.function([x], f_z_out, mode=mode_without_gpu) f = theano.function([x], f_z_out, mode=mode_without_gpu)
f_gpu = theano.function([x_gpu], f_gpu_z_out, mode=gpu_mode) f_gpu = theano.function([x_gpu], f_gpu_z_out, mode=self.mode)
check_types(f, f_gpu) check_types(f, f_gpu)
# we need to test n>32*1024 to check that we make the block loop. # we need to test n>32*1024 to check that we make the block loop.
...@@ -261,16 +261,15 @@ class test_SoftMax(unittest.TestCase): ...@@ -261,16 +261,15 @@ class test_SoftMax(unittest.TestCase):
return f, f_gpu return f, f_gpu
def _cmp(self, n, m, f, f_gpu): def _cmp(self, n, m, f, f_gpu):
# print "test_softmax",n,m
data = numpy.arange(n * m, dtype='float32').reshape(n, m) data = numpy.arange(n * m, dtype='float32').reshape(n, m)
out = f(data) out = f(data)
gout = f_gpu(data) gout = f_gpu(data)
assert numpy.allclose(out, gout), numpy.absolute(out - gout) utt.assert_allclose(out, gout)
def _check_types(self, graph, graph_gpu, topo_idx, f_type, f_gpu_type): def _check_types(self, graph, graph_gpu, f_type, f_gpu_type):
assert isinstance(graph.maker.fgraph.toposort()[-1].op, f_type) assert isinstance(graph.maker.fgraph.toposort()[-1].op, f_type)
assert isinstance( assert isinstance(
graph_gpu.maker.fgraph.toposort()[topo_idx].op, graph_gpu.maker.fgraph.toposort()[self.topo_idx].op,
f_gpu_type f_gpu_type
) )
...@@ -278,180 +277,24 @@ class test_SoftMax(unittest.TestCase): ...@@ -278,180 +277,24 @@ class test_SoftMax(unittest.TestCase):
x = T.fmatrix('x') x = T.fmatrix('x')
z = T.nnet.softmax_op z = T.nnet.softmax_op
def check_types_without_cudnn(graph, graph_gpu): def check_types(graph, graph_gpu):
self._check_types( self._check_types(
graph, graph,
graph_gpu, graph_gpu,
-2,
type(z), type(z),
cuda.nnet.GpuSoftmax self.gpu_op
) )
mode_wo_cudnn = mode_with_gpu.excluding("cudnn")
f, f_gpu = self._test_softmax( f, f_gpu = self._test_softmax(
x, x,
x, x,
z, z,
z, z,
self._cmp, self._cmp,
mode_wo_cudnn, check_types
check_types_without_cudnn
) )
# cuDNN R1 cannot handle these test cases but the Theano softmax can so if self.do_big:
# we test them only for the Theano softmax.
self._cmp(2 << 15, 5, f, f_gpu) self._cmp(2 << 15, 5, f, f_gpu)
if self.do_0:
self._cmp(0, 10, f, f_gpu) self._cmp(0, 10, f, f_gpu)
def test_softmax_cudnn(self):
if not cuda.dnn.dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg)
x = T.fmatrix('x')
z = T.nnet.softmax_op
def check_types_with_cudnn(graph, graph_gpu):
self._check_types(
graph,
graph_gpu,
-3,
type(z),
theano.sandbox.cuda.dnn.GpuDnnSoftmax
)
f, f_gpu = self._test_softmax(
x,
x,
z,
z,
self._cmp,
mode_with_gpu,
check_types_with_cudnn
)
def test_cudnn_softmax_grad(self):
if not cuda.dnn.dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg)
def cmp(n, m, f, f_gpu):
data = numpy.arange(n * m, dtype='float32').reshape(n, m)
gdata = numpy.asarray(data)[:, :, None, None]
out = f(data)
gout = numpy.asarray(f_gpu(gdata))[:, :, 0, 0]
assert numpy.allclose(out, gout), numpy.absolute(out - gout)
x = T.matrix('x', 'float32')
x_gpu = T.tensor4('x_gpu', 'float32')
f_z = T.nnet.softmax_op
f_gpu = theano.sandbox.cuda.dnn.GpuDnnSoftmax(
'bc01',
'accurate',
'channel'
)
# Verify the grad operation
dims = (2, 3, 4, 5)
gdata = numpy.arange(
numpy.product(dims),
dtype='float32'
).reshape(dims)
T.verify_grad(f_gpu, [gdata], rng=numpy.random,
mode=mode_with_gpu)
def check_types(graph, graph_gpu):
self._check_types(
graph,
graph_gpu,
-1,
type(f_z),
theano.sandbox.cuda.dnn.GpuDnnSoftmax
)
def check_types_opt(graph, graph_gpu):
assert isinstance(graph.maker.fgraph.toposort()[-1].op, type(f_z))
assert len([n for n in graph_gpu.maker.fgraph.toposort()
if isinstance(
n.op,
theano.sandbox.cuda.dnn.GpuDnnSoftmax
)]) == 1
# Verify that the CPU and GPU implementations return the same results
# up to a tolerance.
self._test_softmax(
x,
x_gpu,
f_z,
f_gpu,
cmp,
mode_with_gpu,
check_types
)
mode_w_cudnn = mode_with_gpu.including("cudnn")
self._test_softmax(
x, x, f_z, f_z, self._cmp,
mode_w_cudnn, check_types_opt
)
# Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad optimization is
# applied when cudnn is required
y = T.fvector('y')
f = theano.function(
[y],
T.grad(T.nnet.softmax(y).mean(), y),
mode=mode_with_gpu
)
sorted_f = f.maker.fgraph.toposort()
assert(len([i
for i in sorted_f
if isinstance(
i.op,
theano.sandbox.cuda.dnn.GpuDnnSoftmaxGrad
)]) == 1)
assert(len([i
for i in sorted_f
if isinstance(
i.op,
theano.tensor.nnet.SoftmaxGrad
)]) == 0)
# Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad optimization is not
# applied when cudnn is excluded or not available
mode_wo_cudnn = mode_with_gpu.excluding("cudnn")
y = T.fvector('y')
f = theano.function(
[y],
T.grad(T.nnet.softmax(y).mean(), y),
mode=mode_wo_cudnn
)
sorted_f = f.maker.fgraph.toposort()
assert(len([i
for i in sorted_f
if isinstance(
i.op,
theano.sandbox.cuda.dnn.GpuDnnSoftmaxGrad
)]) == 0)
assert(len([i
for i in sorted_f
if isinstance(
i.op,
theano.tensor.nnet.SoftmaxGrad
)]) == 1)
# Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad do not
# crash with manual graph
y = T.fvector('y')
o = theano.tensor.nnet.SoftmaxGrad()(y, y*2)
f = theano.function([y], o, mode=mode_with_gpu)
sorted_f = f.maker.fgraph.toposort()
assert(len([i
for i in sorted_f
if isinstance(
i.op,
theano.sandbox.cuda.dnn.GpuDnnSoftmaxGrad
)]) == 1)
assert(len([i
for i in sorted_f
if isinstance(
i.op,
theano.tensor.nnet.SoftmaxGrad
)]) == 0)
...@@ -139,7 +139,6 @@ whitelist_flake8 = [ ...@@ -139,7 +139,6 @@ whitelist_flake8 = [
"sandbox/cuda/tests/test_blas.py", "sandbox/cuda/tests/test_blas.py",
"sandbox/cuda/tests/test_driver.py", "sandbox/cuda/tests/test_driver.py",
"sandbox/cuda/tests/test_rng_curand.py", "sandbox/cuda/tests/test_rng_curand.py",
"sandbox/cuda/tests/test_nnet.py",
"sandbox/cuda/tests/test_basic_ops.py", "sandbox/cuda/tests/test_basic_ops.py",
"sandbox/cuda/tests/test_memory.py", "sandbox/cuda/tests/test_memory.py",
"sandbox/cuda/tests/test_mlp.py", "sandbox/cuda/tests/test_mlp.py",
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论