提交 6a02b8a0 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Import tests from cuda.

上级 4d8e60e7
import logging
from nose.plugins.skip import SkipTest
import numpy
from itertools import product
import theano
from six import StringIO
import theano.tensor as T
import theano.tests.unittest_tools as utt
from theano.sandbox.neighbours import images2neibs
from theano.tensor.signal.downsample import max_pool_2d
from theano.tensor.signal.downsample import DownsampleFactorMaxGrad
import theano.sandbox.cuda.dnn as dnn
from theano.sandbox.cuda.basic_ops import GpuAllocEmpty, gpu_alloc_empty
# Skip test if cuda_ndarray is not available.
import theano.sandbox.cuda as cuda
if not cuda.cuda_available:
raise SkipTest('Optional package cuda disabled')
if theano.config.mode == 'FAST_COMPILE':
mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including('gpu')
mode_without_gpu = theano.compile.mode.get_mode(
'FAST_RUN').excluding('gpu')
else:
mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
mode_without_gpu = theano.compile.mode.get_default_mode().excluding('gpu')
def test_dnn_conv_desc_merge():
if not cuda.dnn.dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg)
img_shp = T.as_tensor_variable(
numpy.asarray([2, 1, 8, 8]).astype('int64'))
kern_shp = T.as_tensor_variable(
numpy.asarray([3, 1, 2, 2]).astype('int64'))
desc1 = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(2, 2),
conv_mode='conv')(img_shp, kern_shp)
desc2 = dnn.GpuDnnConvDesc(border_mode='full', subsample=(1, 1),
conv_mode='cross')(img_shp, kern_shp)
# CDataType is not DeepCopyable so this will crash if we don't use
# borrow=True
f = theano.function([], [theano.Out(desc1, borrow=True),
theano.Out(desc2, borrow=True)])
d1, d2 = f()
# This will be the case if they are merged, which would be bad.
assert d1 != d2
def test_dnn_conv_merge():
"""This test that we merge correctly multiple dnn_conv.
This can is more difficult due to GpuEmptyAlloc that aren't
merged.
"""
if not cuda.dnn.dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg)
img_shp = [2, 5, 6, 8]
kern_shp = [3, 5, 5, 6]
img = T.ftensor4('img')
kern = T.ftensor4('kern')
out = T.ftensor4('out')
desc = dnn.GpuDnnConvDesc(
border_mode='valid')(img.shape, kern.shape)
# Test forward op
o1 = dnn.dnn_conv(img, kern)
o2 = dnn.dnn_conv(img, kern)
f = theano.function([img, kern], [o1, o2], mode=mode_with_gpu)
d1, d2 = f(numpy.random.rand(*img_shp).astype('float32'),
numpy.random.rand(*kern_shp).astype('float32'))
topo = f.maker.fgraph.toposort()
assert len([n for n in topo if isinstance(n.op, dnn.GpuDnnConv)]) == 1
# Test grad w op
o1 = dnn.GpuDnnConvGradW()(img, kern, out, desc)
o2 = dnn.GpuDnnConvGradW()(img, kern, out, desc)
f = theano.function([img, kern, out], [o1, o2], mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert len([n for n in topo if isinstance(n.op, dnn.GpuDnnConvGradW)]) == 1
# Test grad i op
o1 = dnn.GpuDnnConvGradI()(img, kern, out, desc)
o2 = dnn.GpuDnnConvGradI()(img, kern, out, desc)
f = theano.function([img, kern, out], [o1, o2], mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
assert len([n for n in topo if isinstance(n.op, dnn.GpuDnnConvGradI)]) == 1
def test_dnn_conv_inplace():
"""This test that we have inplace work correctly even when
GpuAllocEmpty get merged together.
"""
if not cuda.dnn.dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg)
img_shp = [2, 5, 6, 8]
kern_shp = [3, 5, 5, 6]
img = T.ftensor4('img')
kern = T.ftensor4('kern')
out = T.ftensor4('out')
desc1 = dnn.GpuDnnConvDesc(border_mode='valid', conv_mode='conv')(
img.shape, kern.shape)
desc2 = dnn.GpuDnnConvDesc(
border_mode='valid', conv_mode='cross')(img.shape, kern.shape)
# Test forward op
o1 = dnn.dnn_conv(img, kern, conv_mode='conv')
o2 = dnn.dnn_conv(img, kern, conv_mode='cross')
f = theano.function([img, kern], [o1, o2], mode=mode_with_gpu)
d1, d2 = f(numpy.random.rand(*img_shp).astype('float32'),
numpy.random.rand(*kern_shp).astype('float32'))
topo = f.maker.fgraph.toposort()
convs = [n for n in topo if isinstance(n.op, dnn.GpuDnnConv)]
assert len(convs) == 2
assert all([node.op.inplace for node in convs])
assert len([n for n in topo if isinstance(n.op, GpuAllocEmpty)]) == 2
# Test grad w op
out = gpu_alloc_empty(*kern.shape)
o1 = dnn.GpuDnnConvGradW()(img, kern, out, desc1)
o2 = dnn.GpuDnnConvGradW()(img, kern, out, desc2)
f = theano.function([img, kern], [o1, o2], mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
convs = [n for n in topo if isinstance(n.op, dnn.GpuDnnConvGradW)]
assert len(convs) == 2
assert all([node.op.inplace for node in convs])
assert len([n for n in topo if isinstance(n.op, GpuAllocEmpty)]) == 2
# Test grad i op
out = gpu_alloc_empty(*img.shape)
o1 = dnn.GpuDnnConvGradI()(img, kern, out, desc1)
o2 = dnn.GpuDnnConvGradI()(img, kern, out, desc2)
f = theano.function([img, kern], [o1, o2], mode=mode_with_gpu)
topo = f.maker.fgraph.toposort()
convs = [n for n in topo if isinstance(n.op, dnn.GpuDnnConvGradI)]
assert len(convs) == 2
assert all([node.op.inplace for node in convs])
assert len([n for n in topo if isinstance(n.op, GpuAllocEmpty)]) == 2
def pool_2d_i2n(input, ds=(2, 2), strides=None,
pad=(0, 0),
pool_function=T.max, mode='ignore_borders'):
if strides is None:
strides = ds
if strides[0] > ds[0] or strides[1] > ds[1]:
raise RuntimeError(
"strides should be smaller than or equal to ds,"
" strides=(%d, %d) and ds=(%d, %d)" %
(strides + ds))
shape = input.shape
if pad != (0, 0):
assert pool_function is T.max
pad_x = pad[0]
pad_y = pad[1]
a = T.alloc(-numpy.inf, shape[0], shape[1], shape[2] + pad_x * 2,
shape[3] + pad_y * 2)
input = T.set_subtensor(a[:, :,
pad_x:pad_x + shape[2],
pad_y:pad_y + shape[3]],
input)
shape = input.shape
neibs = images2neibs(input, ds, strides, mode=mode)
pooled_neibs = pool_function(neibs, axis=1)
output_width = (shape[2] - ds[0]) // strides[0] + 1
output_height = (shape[3] - ds[1]) // strides[1] + 1
pooled_output = pooled_neibs.reshape((shape[0], shape[1],
output_width, output_height))
return pooled_output
def test_pooling():
if not cuda.dnn.dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg)
x = T.ftensor4()
for mode, pad in product(('max', 'average_inc_pad', 'average_exc_pad'),
((0, 0), (1, 0), (1, 0), (2, 3), (3, 2))):
if mode == 'max':
func = T.max
else:
func = T.mean
if pad != (0, 0) and cuda.dnn.version() == -1:
continue
if pad != (0, 0) and func is T.mean:
continue
for ws in (4, 2, 5):
for stride in (2, 3):
if stride > ws:
continue
if pad[0] > stride or pad[1] > stride:
# Not implemented
continue
# We will check that the opt introduced it.
out1 = max_pool_2d(x, (ws, ws),
st=(stride, stride),
ignore_border=True,
padding=pad, mode=mode)
out2 = pool_2d_i2n(x, ds=(ws, ws), strides=(stride, stride),
pad=pad,
pool_function=func)
mode_without_gpu2 = mode_without_gpu.including()
mode_without_gpu2.check_isfinite = False
f1 = theano.function([x], out1, mode=mode_with_gpu)
assert any([isinstance(node.op, cuda.dnn.GpuDnnPool)
for node in f1.maker.fgraph.apply_nodes])
f2 = theano.function([x], out2, mode=mode_without_gpu2)
assert not any([isinstance(node.op, cuda.dnn.GpuDnnPool)
for node in f2.maker.fgraph.apply_nodes])
for shp in [(1, 10, 100, 100),
(1, 3, 99, 99),
(32, 1, 147, 197),
]:
data = numpy.random.normal(0, 1, shp).astype("float32")
a = f1(data).__array__()
b = f2(data).__array__()
assert numpy.allclose(a, b,
atol=numpy.finfo(numpy.float32).eps)
# Test the grad
for shp in [(1, 1, 2, 2),
(1, 1, 3, 3)]:
data = numpy.random.normal(0, 1, shp).astype("float32") * 10
ws = 2
stride = 2
if pad[0] > stride or pad[1] > stride:
# Not implemented
continue
# This test the CPU grad + opt + GPU implemtentation
def fn(x):
return max_pool_2d(x, (ws, ws), ignore_border=True,
padding=pad, mode=mode)
theano.tests.unittest_tools.verify_grad(fn, [data],
cast_to_output_type=False,
mode=mode_with_gpu)
# Confirm that the opt would have inserted it.
fg = theano.function([x], theano.grad(fn(x).sum(), x),
mode=mode_with_gpu)
assert any([isinstance(node.op, cuda.dnn.GpuDnnPoolGrad)
for node in fg.maker.fgraph.toposort()])
# Test the GPU grad + GPU implementation
def fn(x):
dnn_op = cuda.dnn.dnn_pool(
x, ws=(ws, ws),
stride=(stride, stride),
pad=pad,
mode=mode)
return dnn_op
theano.tests.unittest_tools.verify_grad(
fn, [data],
cast_to_output_type=False,
mode=mode_with_gpu)
# Confirm that we get the good op.
fg = theano.function([x], theano.grad(fn(x).sum(), x),
mode=mode_with_gpu)
assert any([isinstance(node.op, cuda.dnn.GpuDnnPoolGrad)
for node in fg.maker.fgraph.toposort()])
g_out = fg(data)
# Compare again the CPU result
out = max_pool_2d(x, (ws, ws),
padding=pad,
ignore_border=True, mode=mode)
fc = theano.function([x], theano.grad(out.sum(), x),
mode=mode_without_gpu)
assert any([isinstance(node.op, DownsampleFactorMaxGrad)
for node in fc.maker.fgraph.toposort()])
c_out = fc(data)
assert numpy.allclose(c_out, g_out)
def test_pooling_opt():
if not cuda.dnn.dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg)
x = T.ftensor4()
f = theano.function(
[x],
max_pool_2d(x, ds=(2, 2), ignore_border=True),
mode=mode_with_gpu)
assert any([isinstance(n.op, cuda.dnn.GpuDnnPool)
for n in f.maker.fgraph.toposort()])
f = theano.function(
[x],
T.grad(max_pool_2d(x, ds=(2, 2), ignore_border=True).sum(), x),
mode=mode_with_gpu.including("cudnn"))
assert any([isinstance(n.op, cuda.dnn.GpuDnnPoolGrad)
for n in f.maker.fgraph.toposort()])
def test_dnn_tag():
"""
Test that if cudnn isn't avail we crash and that if it is avail, we use it.
"""
x = T.ftensor4()
old = theano.config.on_opt_error
theano.config.on_opt_error = "raise"
sio = StringIO()
handler = logging.StreamHandler(sio)
logging.getLogger('theano.compile.tests.test_dnn').addHandler(handler)
# Silence original handler when intentionnally generating warning messages
logging.getLogger('theano').removeHandler(theano.logging_default_handler)
raised = False
try:
f = theano.function(
[x],
max_pool_2d(x, ds=(2, 2), ignore_border=True),
mode=mode_with_gpu.including("cudnn"))
except (AssertionError, RuntimeError):
assert not cuda.dnn.dnn_available()
raised = True
finally:
theano.config.on_opt_error = old
logging.getLogger(
'theano.compile.tests.test_dnn').removeHandler(handler)
logging.getLogger('theano').addHandler(theano.logging_default_handler)
if not raised:
assert cuda.dnn.dnn_available()
assert any([isinstance(n.op, cuda.dnn.GpuDnnPool)
for n in f.maker.fgraph.toposort()])
class TestDnnInferShapes(utt.InferShapeTester):
def setUp(self):
super(TestDnnInferShapes, self).setUp()
self.mode = mode_with_gpu
def test_softmax(self):
if not dnn.dnn_available():
raise SkipTest(dnn.dnn_available.msg)
t = T.ftensor4('t')
rand_tensor = numpy.asarray(
numpy.random.rand(5, 4, 3, 2),
dtype='float32'
)
self._compile_and_check(
[t],
[dnn.GpuDnnSoftmax('bc01', 'accurate', 'channel')(t)],
[rand_tensor],
dnn.GpuDnnSoftmax
)
self._compile_and_check(
[t],
[
T.grad(
dnn.GpuDnnSoftmax(
'bc01',
'accurate',
'channel'
)(t).mean(),
t
)
],
[rand_tensor],
dnn.GpuDnnSoftmaxGrad
)
def test_conv(self):
if not dnn.dnn_available():
raise SkipTest(dnn.dnn_available.msg)
img = T.ftensor4('img')
kerns = T.ftensor4('kerns')
out = T.ftensor4('out')
img_val = numpy.asarray(
numpy.random.rand(7, 2, 6, 4),
dtype='float32'
)
kern_vals = numpy.asarray(
numpy.random.rand(8, 2, 4, 3),
dtype='float32'
)
for params in product(
['valid', 'full'],
[(1, 1), (2, 2)],
['conv', 'cross']
):
out_vals = numpy.zeros(
dnn.GpuDnnConv.get_out_shape(img_val.shape, kern_vals.shape,
border_mode=params[0],
subsample=params[1]),
dtype='float32')
desc = dnn.GpuDnnConvDesc(
border_mode=params[0],
subsample=params[1],
conv_mode=params[2]
)(img.shape, kerns.shape)
conv = dnn.GpuDnnConv()(img, kerns, out, desc)
self._compile_and_check(
[img, kerns, out],
[conv],
[img_val, kern_vals, out_vals],
dnn.GpuDnnConv
)
def test_conv_gradw(self):
if not dnn.dnn_available():
raise SkipTest(dnn.dnn_available.msg)
img = T.ftensor4('img')
kerns = T.ftensor4('kerns')
out = T.ftensor4('out')
img_val = numpy.asarray(
numpy.random.rand(2, 5, 6, 8),
dtype='float32'
)
kern_vals = numpy.asarray(
numpy.random.rand(2, 1, 5, 6),
dtype='float32'
)
for params in product(
['valid', 'full'],
[(1, 1)], # strides besides (1, 1)
['conv', 'cross']
):
temp_img = img.dimshuffle(1, 0, 2, 3)
temp_kerns = kerns
if params[2] == 'conv':
temp_kerns = temp_kerns[:, :, ::-1, ::-1]
temp_kerns = temp_kerns.dimshuffle(1, 0, 2, 3)
shape = (
kern_vals.shape[1], img_val.shape[1],
img_val.shape[2] - kern_vals.shape[2] + 1,
img_val.shape[3] - kern_vals.shape[3] + 1
)
out_vals = numpy.zeros(shape, dtype='float32')
desc = dnn.GpuDnnConvDesc(
border_mode=params[0],
subsample=params[1],
conv_mode=params[2]
)(temp_img.shape, out.shape)
conv_grad_w = dnn.GpuDnnConvGradW()(
temp_img,
temp_kerns,
out,
desc,
)
self._compile_and_check(
[temp_img, temp_kerns, out],
[conv_grad_w],
[img_val, kern_vals, out_vals],
dnn.GpuDnnConvGradW
)
def test_conv_gradi(self):
if not dnn.dnn_available():
raise SkipTest(dnn.dnn_available.msg)
img = T.ftensor4('img')
kerns = T.ftensor4('kerns')
out = T.ftensor4('out')
img_val = numpy.asarray(
numpy.random.rand(3, 4, 5, 6),
dtype='float32'
)
kern_vals = numpy.asarray(
numpy.random.rand(13, 14, 15, 16),
dtype='float32'
)
for params in product(
['valid'], # Should this work for 'full'?
[(1, 1)],
['conv', 'cross']
):
temp_kerns = kerns.dimshuffle(1, 0, 2, 3)
shape = (
img_val.shape[0], kern_vals.shape[1],
img_val.shape[2] + kern_vals.shape[2] - 1,
img_val.shape[3] + kern_vals.shape[3] - 1
)
out_vals = numpy.zeros(shape, dtype='float32')
desc = dnn.GpuDnnConvDesc(
border_mode=params[0],
subsample=params[1],
conv_mode=params[2]
)(out.shape, temp_kerns.shape)
conv_grad_i = dnn.GpuDnnConvGradI()(
temp_kerns,
img,
out,
desc,
)
self._compile_and_check(
[temp_kerns, img, out],
[conv_grad_i],
[kern_vals, img_val, out_vals],
dnn.GpuDnnConvGradI
)
def test_pool(self):
if not dnn.dnn_available():
raise SkipTest(dnn.dnn_available.msg)
img = T.ftensor4('img')
img_val = numpy.asarray(
numpy.random.rand(2, 3, 4, 5),
dtype='float32'
)
for params in product(
[(1, 1), (2, 2), (3, 3)],
[(1, 1), (2, 2), (3, 3)],
['max', 'average_inc_pad', 'average_exc_pad']
):
desc = dnn.GpuDnnPoolDesc(
ws=params[0],
stride=params[1],
mode=params[2]
)()
self._compile_and_check(
[img],
[dnn.GpuDnnPool()(img, desc)],
[img_val],
dnn.GpuDnnPool
)
def test_pool_grad(self):
if not dnn.dnn_available():
raise SkipTest(dnn.dnn_available.msg)
img = T.ftensor4('img')
img_grad = T.ftensor4('img_grad')
out = T.ftensor4('out')
img_val = numpy.asarray(
numpy.random.rand(2, 3, 4, 5),
dtype='float32'
)
img_grad_val = numpy.asarray(
numpy.random.rand(2, 3, 4, 5),
dtype='float32'
)
out_val = numpy.asarray(
numpy.random.rand(2, 3, 4, 5),
dtype='float32'
)
for params in product(
[(1, 1), (2, 2), (3, 3)],
[(1, 1), (2, 2), (3, 3)],
['max', 'average_inc_pad']
):
desc = dnn.GpuDnnPoolDesc(
ws=params[0],
stride=params[1],
mode=params[2]
)()
pool_grad = dnn.GpuDnnPoolGrad()(
img,
out,
img_grad,
desc
)
self._compile_and_check(
[img, img_grad, out],
[pool_grad],
[img_val, img_grad_val, out_val],
dnn.GpuDnnPoolGrad
)
# this has been a problem in the past
def test_dnn_conv_border_mode():
if not cuda.dnn.dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg)
img = T.ftensor4()
kern = T.ftensor4()
dnn.dnn_conv(img, kern, border_mode=1)
dnn.dnn_conv(img, kern, border_mode=(2, 3))
dnn.dnn_conv(img, kern, border_mode='full')
dnn.dnn_conv(img, kern, border_mode='valid')
def test_dnn_conv_alpha_output_merge():
if not cuda.dnn.dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg)
img = T.ftensor4()
kern = T.ftensor4()
out = T.ftensor4()
b = 1
c = 4
f = 3
ih = 5
iw = 8
kh = 2
kw = 6
img_val = numpy.random.random((b, c, ih, iw)).astype('float32')
kern_val = numpy.random.random((f, c, kh, kw)).astype('float32')
out_val = numpy.random.random((b, f, ih - kh + 1,
iw - kw + 1)).astype('float32')
conv = dnn.dnn_conv(img, kern)
gw = theano.grad(conv.sum(), kern)
gi = theano.grad(conv.sum(), img)
lr = numpy.asarray(0.05, dtype='float32')
if cuda.dnn.version() == -1:
# Can't merge alpha with cudnn v1
fr = conv + out
wr = kern + gw
ir = img + gi
else:
fr = lr * (conv + out)
wr = kern + lr * gw
ir = img + lr * gi
f1 = theano.function([img, kern, out], [fr, wr, ir], mode=mode_with_gpu)
assert isinstance(f1.maker.fgraph.outputs[0].owner.inputs[0].owner.op,
dnn.GpuDnnConv)
assert isinstance(f1.maker.fgraph.outputs[1].owner.inputs[0].owner.op,
dnn.GpuDnnConvGradW)
assert isinstance(f1.maker.fgraph.outputs[2].owner.inputs[0].owner.op,
dnn.GpuDnnConvGradI)
mode = mode_with_gpu
mode = mode.excluding('local_dnn_conv_alpha_merge')
mode = mode.excluding('local_dnn_convw_alpha_merge')
mode = mode.excluding('local_dnn_convi_alpha_merge')
mode = mode.excluding('local_dnn_conv_output_merge')
mode = mode.excluding('local_dnn_convw_output_merge')
mode = mode.excluding('local_dnn_convi_output_merge')
f2 = theano.function([img, kern, out], [fr, wr, ir], mode=mode)
assert not isinstance(f2.maker.fgraph.outputs[0].owner.inputs[0].owner.op,
dnn.GpuDnnConv)
assert not isinstance(f2.maker.fgraph.outputs[1].owner.inputs[0].owner.op,
dnn.GpuDnnConvGradW)
assert not isinstance(f2.maker.fgraph.outputs[2].owner.inputs[0].owner.op,
dnn.GpuDnnConvGradI)
out_f1 = f1(img_val, kern_val, out_val)
out_f2 = f2(img_val, kern_val, out_val)
assert len(out_f1) == len(out_f2)
for v1, v2 in zip(out_f1, out_f2):
utt.assert_allclose(v1, v2)
def test_dnn_conv_grad():
if not cuda.dnn.dnn_available() or dnn.version() == -1:
raise SkipTest('alpha != 1.0 not supported in cudnn v1')
b = 1
c = 4
f = 3
ih = 2
iw = 8
kh = 2
kw = 2
img_val = numpy.random.random((b, c, ih, iw)).astype('float32')
kern_val = numpy.random.random((f, c, kh, kw)).astype('float32')
out_val = numpy.random.random((b, f, ih - kw + 1,
iw - kw + 1)).astype('float32')
def dconv(img, kern, out):
desc = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
conv_mode='conv')(img.shape, kern.shape)
return dnn.GpuDnnConv()(img, kern, out, desc, alpha=0.5, beta=0.75)
def dconvi(img, kern, out):
desc = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
conv_mode='conv')(img.shape, kern.shape)
return dnn.GpuDnnConvGradI()(kern, out, img, desc, alpha=-1.0,
beta=0.0)
def dconvw(img, kern, out):
desc = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(1, 1),
conv_mode='conv')(img.shape, kern.shape)
return dnn.GpuDnnConvGradW()(img, out, kern, desc, alpha=0.75,
beta=-1.0)
utt.verify_grad(dconv, [img_val, kern_val, out_val])
utt.verify_grad(dconvi, [img_val, kern_val, out_val])
utt.verify_grad(dconvw, [img_val, kern_val, out_val])
def test_version():
if not cuda.dnn.dnn_available():
raise SkipTest(cuda.dnn.dnn_available.msg)
assert isinstance(cuda.dnn.version(), (int, tuple))
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论