提交 0e40d582 authored 作者: Gijs van Tulder's avatar Gijs van Tulder

DnnConv grad ops in old gpu backend check shape consistency.

上级 2f7e7d92
...@@ -33,6 +33,45 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output, ...@@ -33,6 +33,45 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
if (c_set_tensorNd(*input, APPLY_SPECIFIC(input)) == -1) if (c_set_tensorNd(*input, APPLY_SPECIFIC(input)) == -1)
return 1; return 1;
int expected_output_dims[5] = {0};
err = cudnnGetConvolutionNdForwardOutputDim(desc, APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns),
nb_dim, expected_output_dims);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "error computing convolution output dim: %s",
cudnnGetErrorString(err));
return 1;
}
if (nb_dim == 4) {
if ((CudaNdarray_HOST_DIMS(output)[0] != expected_output_dims[0]) ||
(CudaNdarray_HOST_DIMS(output)[1] != expected_output_dims[1]) ||
(CudaNdarray_HOST_DIMS(output)[2] != expected_output_dims[2]) ||
(CudaNdarray_HOST_DIMS(output)[3] != expected_output_dims[3])) {
PyErr_Format(PyExc_ValueError, "impossible convolution output dim: expected %ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ld",
(long int)expected_output_dims[0], (long int)expected_output_dims[1],
(long int)expected_output_dims[2], (long int)expected_output_dims[3],
(long int)CudaNdarray_HOST_DIMS(output)[0], (long int)CudaNdarray_HOST_DIMS(output)[1],
(long int)CudaNdarray_HOST_DIMS(output)[2], (long int)CudaNdarray_HOST_DIMS(output)[3]);
return 1;
}
} else if (nb_dim == 5) {
if ((CudaNdarray_HOST_DIMS(output)[0] != expected_output_dims[0]) ||
(CudaNdarray_HOST_DIMS(output)[1] != expected_output_dims[1]) ||
(CudaNdarray_HOST_DIMS(output)[2] != expected_output_dims[2]) ||
(CudaNdarray_HOST_DIMS(output)[3] != expected_output_dims[3]) ||
(CudaNdarray_HOST_DIMS(output)[4] != expected_output_dims[4])) {
PyErr_Format(PyExc_ValueError, "impossible convolution output dim: expected %ldx%ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ldx%ld",
(long int)expected_output_dims[0], (long int)expected_output_dims[1],
(long int)expected_output_dims[2], (long int)expected_output_dims[3],
(long int)expected_output_dims[4],
(long int)CudaNdarray_HOST_DIMS(output)[0], (long int)CudaNdarray_HOST_DIMS(output)[1],
(long int)CudaNdarray_HOST_DIMS(output)[2], (long int)CudaNdarray_HOST_DIMS(output)[3],
(long int)CudaNdarray_HOST_DIMS(output)[4]);
return 1;
}
}
{ {
size_t worksize; size_t worksize;
void *workspace; void *workspace;
......
...@@ -33,6 +33,45 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output, ...@@ -33,6 +33,45 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
if (c_set_filterNd(*kerns, APPLY_SPECIFIC(kerns)) == -1) if (c_set_filterNd(*kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1; return 1;
int expected_output_dims[5] = {0};
err = cudnnGetConvolutionNdForwardOutputDim(desc, APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns),
nb_dim, expected_output_dims);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "error computing convolution output dim: %s",
cudnnGetErrorString(err));
return 1;
}
if (nb_dim == 4) {
if ((CudaNdarray_HOST_DIMS(output)[0] != expected_output_dims[0]) ||
(CudaNdarray_HOST_DIMS(output)[1] != expected_output_dims[1]) ||
(CudaNdarray_HOST_DIMS(output)[2] != expected_output_dims[2]) ||
(CudaNdarray_HOST_DIMS(output)[3] != expected_output_dims[3])) {
PyErr_Format(PyExc_ValueError, "impossible convolution output dim: expected %ldx%ldx%dx%ld"
" but received gradient with shape %ldx%ldx%dx%ld",
(long int)expected_output_dims[0], (long int)expected_output_dims[1],
(long int)expected_output_dims[2], (long int)expected_output_dims[3],
(long int)CudaNdarray_HOST_DIMS(output)[0], (long int)CudaNdarray_HOST_DIMS(output)[1],
(long int)CudaNdarray_HOST_DIMS(output)[2], (long int)CudaNdarray_HOST_DIMS(output)[3]);
return 1;
}
} else if (nb_dim == 5) {
if ((CudaNdarray_HOST_DIMS(output)[0] != expected_output_dims[0]) ||
(CudaNdarray_HOST_DIMS(output)[1] != expected_output_dims[1]) ||
(CudaNdarray_HOST_DIMS(output)[2] != expected_output_dims[2]) ||
(CudaNdarray_HOST_DIMS(output)[3] != expected_output_dims[3]) ||
(CudaNdarray_HOST_DIMS(output)[4] != expected_output_dims[4])) {
PyErr_Format(PyExc_ValueError, "impossible convolution output dim: expected %ldx%ldx%ldx%ldx%ld"
" but received gradient with shape %ldx%ldx%ldx%ldx%ld",
(long int)expected_output_dims[0], (long int)expected_output_dims[1],
(long int)expected_output_dims[2], (long int)expected_output_dims[3],
(long int)expected_output_dims[4],
(long int)CudaNdarray_HOST_DIMS(output)[0], (long int)CudaNdarray_HOST_DIMS(output)[1],
(long int)CudaNdarray_HOST_DIMS(output)[2], (long int)CudaNdarray_HOST_DIMS(output)[3],
(long int)CudaNdarray_HOST_DIMS(output)[4]);
return 1;
}
}
{ {
size_t worksize; size_t worksize;
void *workspace; void *workspace;
......
...@@ -4,6 +4,7 @@ import os ...@@ -4,6 +4,7 @@ import os
import sys import sys
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
from nose_parameterized import parameterized
from itertools import chain, product from itertools import chain, product
import six.moves.cPickle as pickle import six.moves.cPickle as pickle
from six import StringIO from six import StringIO
...@@ -16,6 +17,7 @@ import theano.tensor as T ...@@ -16,6 +17,7 @@ import theano.tensor as T
import theano.tests.unittest_tools as utt import theano.tests.unittest_tools as utt
from theano.tensor.signal.pool import pool_2d, pool_3d from theano.tensor.signal.pool import pool_2d, pool_3d
from theano.tensor.signal.pool import Pool, MaxPoolGrad, AveragePoolGrad from theano.tensor.signal.pool import Pool, MaxPoolGrad, AveragePoolGrad
from theano.tensor.nnet.abstract_conv import get_conv_output_shape
import theano.sandbox.cuda.dnn as dnn import theano.sandbox.cuda.dnn as dnn
from theano.sandbox.cuda.basic_ops import GpuAllocEmpty, gpu_alloc_empty from theano.sandbox.cuda.basic_ops import GpuAllocEmpty, gpu_alloc_empty
from theano.sandbox.cuda import float32_shared_constructor as shared from theano.sandbox.cuda import float32_shared_constructor as shared
...@@ -979,99 +981,105 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -979,99 +981,105 @@ class TestDnnInferShapes(utt.InferShapeTester):
dnn.GpuDnnConv3d dnn.GpuDnnConv3d
) )
def test_conv_gradw(self): def _test_conv_gradw(self, img, topgrad, kerns, img_shape, kerns_shape, border_mode, conv_mode, subsample):
if not dnn.dnn_available(): if not dnn.dnn_available():
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
img = T.ftensor4('img')
kerns = T.ftensor4('kerns') topgrad_shape = get_conv_output_shape(img_shape, kerns_shape,
out = T.ftensor4('out') border_mode, subsample)
img_val = numpy.asarray( img_val = numpy.asarray(
numpy.random.rand(2, 5, 6, 8), numpy.random.rand(*img_shape),
dtype='float32' dtype=theano.config.floatX
) )
kern_vals = numpy.asarray( topgrad_vals = numpy.asarray(
numpy.random.rand(2, 1, 5, 6), numpy.random.rand(*topgrad_shape),
dtype='float32' dtype=theano.config.floatX
) )
for params in product( kerns_vals = numpy.zeros(kerns_shape, dtype=theano.config.floatX)
['valid', 'full', 'half'], kerns_shape = theano.shared(numpy.asarray(kerns_shape))
[(1, 1)], # strides besides (1, 1) topgrad_shape = theano.shared(numpy.asarray(topgrad_shape))
['conv', 'cross']
):
temp_img = img.dimshuffle(1, 0, 2, 3)
temp_kerns = kerns
if params[2] == 'conv':
temp_kerns = temp_kerns[:, :, ::-1, ::-1]
temp_kerns = temp_kerns.dimshuffle(1, 0, 2, 3)
shape = (
kern_vals.shape[1], img_val.shape[1],
img_val.shape[2] - kern_vals.shape[2] + 1,
img_val.shape[3] - kern_vals.shape[3] + 1
)
out_vals = numpy.zeros(shape, dtype='float32')
desc = dnn.GpuDnnConvDesc( desc = dnn.GpuDnnConvDesc(
border_mode=params[0], border_mode=border_mode,
subsample=params[1], subsample=subsample,
conv_mode=params[2] conv_mode=conv_mode
)(temp_img.shape, out.shape) )(topgrad_shape, kerns_shape)
conv_grad_w = dnn.GpuDnnConvGradW()( conv_grad_w = dnn.GpuDnnConvGradW()(
temp_img, img,
temp_kerns, topgrad,
out, kerns,
desc, desc,
) )
self._compile_and_check( self._compile_and_check(
[temp_img, temp_kerns, out], [img, topgrad, kerns],
[conv_grad_w], [conv_grad_w],
[img_val, kern_vals, out_vals], [img_val, topgrad_vals, kerns_vals],
dnn.GpuDnnConvGradW dnn.GpuDnnConvGradW
) )
def test_conv3d_gradw(self): border_modes = ['valid', 'full', 'half']
conv_modes = ['conv', 'cross']
@parameterized.expand(product(border_modes, conv_modes), utt.custom_name_func)
def test_conv_gradw(self, border_mode, conv_mode):
self._test_conv_gradw(T.tensor4('img'),
T.tensor4('topgrad'),
T.tensor4('kerns'),
(5, 2, 6, 13),
(1, 2, 3, 7),
border_mode,
conv_mode,
(1, 1))
def _test_conv3d_gradw(self, img, topgrad, kerns, img_shape, kerns_shape, border_mode, conv_mode, subsample):
if not (cuda.dnn.dnn_available() and dnn.version() >= (2000, 2000)): if not (cuda.dnn.dnn_available() and dnn.version() >= (2000, 2000)):
raise SkipTest('"cuDNN 3D convolution requires cuDNN v2') raise SkipTest('"cuDNN 3D convolution requires cuDNN v2')
img = T.ftensor5('img')
kerns = T.ftensor5('kerns') topgrad_shape = get_conv_output_shape(img_shape, kerns_shape,
out = T.ftensor5('out') border_mode, subsample)
img_val = numpy.asarray( img_val = numpy.asarray(
numpy.random.rand(9, 2, 4, 8, 13), numpy.random.rand(*img_shape),
dtype='float32' dtype=theano.config.floatX
) )
kern_vals = numpy.asarray( topgrad_vals = numpy.asarray(
numpy.random.rand(11, 2, 3, 1, 4), numpy.random.rand(*topgrad_shape),
dtype='float32' dtype=theano.config.floatX
) )
for params in product( kerns_vals = numpy.zeros(kerns_shape, dtype=theano.config.floatX)
['valid', 'full', 'half'], kerns_shape = theano.shared(numpy.asarray(kerns_shape))
[(1, 1, 1), (2, 2, 2)], topgrad_shape = theano.shared(numpy.asarray(topgrad_shape))
['conv', 'cross']
):
out_vals = numpy.zeros(
dnn.GpuDnnConv3d.get_out_shape(img_val.shape, kern_vals.shape,
border_mode=params[0],
subsample=params[1]),
dtype='float32')
desc = dnn.GpuDnnConvDesc( desc = dnn.GpuDnnConvDesc(
border_mode=params[0], border_mode=border_mode,
subsample=params[1], subsample=subsample,
conv_mode=params[2] conv_mode=conv_mode
)(img.shape, out.shape) )(topgrad_shape, kerns_shape)
conv_grad_w = dnn.GpuDnnConv3dGradW()( conv_grad_w = dnn.GpuDnnConv3dGradW()(
img, img,
out, topgrad,
kerns, kerns,
desc, desc,
) )
self._compile_and_check( self._compile_and_check(
[img, out, kerns], [img, topgrad, kerns],
[conv_grad_w], [conv_grad_w],
[img_val, out_vals, kern_vals], [img_val, topgrad_vals, kerns_vals],
dnn.GpuDnnConv3dGradW dnn.GpuDnnConv3dGradW
) )
@parameterized.expand(product(border_modes, conv_modes), utt.custom_name_func)
def test_conv3d_gradw(self, border_mode, conv_mode):
self._test_conv3d_gradw(T.tensor5('img'),
T.tensor5('topgrad'),
T.tensor5('kerns'),
(5, 2, 6, 13, 21),
(1, 2, 3, 7, 9),
border_mode,
conv_mode,
(1, 1, 1))
def test_conv_gradi(self): def test_conv_gradi(self):
if not dnn.dnn_available(): if not dnn.dnn_available():
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论