提交 ef0fc56e authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: Frederic Bastien

Merge pull request #4344 from mgermain/cudnn5

Added cuDNN v5 support & Added optional dependencies to setup.py Conflicts: .travis.yml doc/install.txt
上级 02453383
......@@ -37,6 +37,7 @@ install:
- source activate pyenv
- if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install pydot; fi
- pip install . --no-deps
- pip install nose-parameterized==0.5.0
# command to run tests
env:
......
......@@ -49,7 +49,7 @@ instructions below for detailed installation steps):
The following libraries and software are optional:
`nose <http://somethingaboutorange.com/mrl/projects/nose/>`_ >= 1.3.0
`nose <http://nose.readthedocs.org/en/latest/>`_ >= 1.3.0 and `nose-parameterized <https://pypi.python.org/pypi/nose-parameterized/>`_ >= 0.5.0
Recommended, to run Theano's test-suite.
`Sphinx <http://sphinx.pocoo.org/>`_ >= 0.5.1, `pygments <http://pygments.org/>`_
......
sphinx>=1.3.0
pygments
nose>=1.3.0
nose-parameterized>=0.5.0
......@@ -163,6 +163,11 @@ def do_setup():
packages=find_packages(),
# 1.7.0 give too much warning related to numpy.diagonal.
install_requires=['numpy>=1.7.1', 'scipy>=0.11', 'six>=1.9.0'],
# pygments is a dependency for Sphinx code highlight
extras_require={
'test': ['nose>=1.3.0', 'nose-parameterized>=0.5.0'],
'doc': ['Sphinx>=0.5.1', 'pygments']
},
package_data={
'': ['*.txt', '*.rst', '*.cu', '*.cuh', '*.c', '*.sh', '*.pkl',
'*.h', '*.cpp', 'ChangeLog'],
......
......@@ -286,6 +286,20 @@ def safe_no_dnn_algo_bwd(algo):
'`dnn.conv.algo_bwd_filter` and `dnn.conv.algo_bwd_data` instead.')
return True
# Those are the supported algorithm by Theano,
# The tests will reference those lists.
SUPPORTED_DNN_CONV_ALGO_FWD = ('small', 'none', 'large', 'fft', 'fft_tiling',
'winograd', 'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change')
SUPPORTED_DNN_CONV_ALGO_BWD_DATA = ('none', 'deterministic', 'fft', 'fft_tiling',
'winograd', 'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change')
SUPPORTED_DNN_CONV_ALGO_BWD_FILTER = ('none', 'deterministic', 'fft', 'small',
'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change')
AddConfigVar('dnn.conv.algo_bwd',
"This flag is deprecated; use dnn.conv.algo_bwd_data and "
"dnn.conv.algo_bwd_filter.",
......@@ -295,26 +309,20 @@ AddConfigVar('dnn.conv.algo_bwd',
AddConfigVar('dnn.conv.algo_fwd',
"Default implementation to use for CuDNN forward convolution.",
EnumStr('small', 'none', 'large', 'fft', 'fft_tiling',
'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change'),
EnumStr(*SUPPORTED_DNN_CONV_ALGO_FWD),
in_c_key=False)
AddConfigVar('dnn.conv.algo_bwd_data',
"Default implementation to use for CuDNN backward convolution to "
"get the gradients of the convolution with regard to the inputs.",
EnumStr('none', 'deterministic', 'fft', 'fft_tiling',
'guess_once', 'guess_on_shape_change', 'time_once',
'time_on_shape_change'),
EnumStr(*SUPPORTED_DNN_CONV_ALGO_BWD_DATA),
in_c_key=False)
AddConfigVar('dnn.conv.algo_bwd_filter',
"Default implementation to use for CuDNN backward convolution to "
"get the gradients of the convolution with regard to the "
"filters.",
EnumStr('none', 'deterministic', 'fft', 'small', 'guess_once',
'guess_on_shape_change', 'time_once',
'time_on_shape_change'),
EnumStr(*SUPPORTED_DNN_CONV_ALGO_BWD_FILTER),
in_c_key=False)
AddConfigVar('dnn.conv.precision',
......
差异被折叠。
......@@ -54,8 +54,11 @@ c_set_filterNd(CudaNdarray *var, cudnnFilterDescriptor_t desc) {
return -1;
}
int dim = CudaNdarray_NDIM(var);
cudnnStatus_t err = cudnnSetFilterNdDescriptor(desc, CUDNN_DATA_FLOAT, dim,
CudaNdarray_HOST_DIMS(var));
cudnnStatus_t err = cudnnSetFilterNdDescriptor_v4(desc,
CUDNN_DATA_FLOAT,
CUDNN_TENSOR_NCHW,
dim,
CudaNdarray_HOST_DIMS(var));
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not set filter descriptor: %s."
......
......@@ -179,8 +179,8 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
int upscale[2];
cudnnConvolutionMode_t mode;
cudnnDataType_t data_type;
err = cudnnGetConvolutionNdDescriptor_v3(desc, 2, &nd, pad, stride,
upscale, &mode, &data_type);
err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride,
upscale, &mode, &data_type);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
......@@ -224,6 +224,19 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
APPLY_SPECIFIC(output),
chosen_algo,
&worksize);
if (err == CUDNN_STATUS_NOT_SUPPORTED) {
// Fallback to none algo if not supported
// TODO: Print a warning
chosen_algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
err = cudnnGetConvolutionForwardWorkspaceSize(_handle,
APPLY_SPECIFIC(input),
APPLY_SPECIFIC(kerns),
desc,
APPLY_SPECIFIC(output),
chosen_algo,
&worksize);
}
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"GpuDnnConv: error getting worksize: %s",
......
......@@ -178,8 +178,8 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
int upscale[2];
cudnnConvolutionMode_t mode;
cudnnDataType_t data_type;
err = cudnnGetConvolutionNdDescriptor_v3(desc, 2, &nd, pad, stride,
upscale, &mode, &data_type);
err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride,
upscale, &mode, &data_type);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
......@@ -237,7 +237,7 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
return 1;
// Perform the convolution
err = cudnnConvolutionBackwardData_v3(
err = cudnnConvolutionBackwardData(
_handle,
(void *)&alpha,
APPLY_SPECIFIC(kerns), CudaNdarray_DEV_DATA(kerns),
......
......@@ -173,8 +173,8 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
int upscale[2];
cudnnConvolutionMode_t mode;
cudnnDataType_t data_type;
err = cudnnGetConvolutionNdDescriptor_v3(desc, 2, &nd, pad, stride,
upscale, &mode, &data_type);
err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride,
upscale, &mode, &data_type);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
......@@ -221,7 +221,7 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
return 1;
// Perform the convolution
err = cudnnConvolutionBackwardFilter_v3(
err = cudnnConvolutionBackwardFilter(
_handle,
(void *)&alpha,
APPLY_SPECIFIC(input), CudaNdarray_DEV_DATA(input),
......
......@@ -392,8 +392,9 @@ def test_pooling_with_tensor_vars():
def test_old_pool_interface():
if not cuda.dnn.dnn_available():
if not cuda.dnn.dnn_available() or cuda.dnn.version() > (5000, 5000):
raise SkipTest(cuda.dnn.dnn_available.msg)
testfile_dir = os.path.dirname(os.path.realpath(__file__))
fname = 'old_pool_interface.pkl'
with open(os.path.join(testfile_dir, fname), 'rb') as fp:
......
......@@ -35,7 +35,7 @@ int APPLY_SPECIFIC(conv_desc)(PyArrayObject *filt_shp,
return -1;
}
err = cudnnSetConvolutionNdDescriptor_v3(*desc, NB_DIMS, pad, strides,
upscale, CONV_MODE, PRECISION);
err = cudnnSetConvolutionNdDescriptor(*desc, NB_DIMS, pad, strides,
upscale, CONV_MODE, PRECISION);
return 0;
}
......@@ -33,6 +33,8 @@ from .nnet import GpuSoftmax
from .opt import gpu_seqopt, register_opt, conv_groupopt, op_lifter
from .opt_util import alpha_merge, output_merge, inplace_allocempty
from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_BWD_FILTER
def raise_no_cudnn(msg="CuDNN is required for convolution and pooling"):
raise RuntimeError(msg)
......@@ -232,6 +234,7 @@ def version(raises=True):
:raises: If True, raise an exception if CuDNN is not present or badly installed.
Otherwise, return -1.
"""
if not dnn_present():
if raises:
......@@ -397,9 +400,9 @@ class GpuDnnConv(DnnBase):
----------
image
kernel
descr
descr :
The convolution descriptor.
algo : {'small', 'none', 'large', 'fft', 'fft_tiling', 'guess_once',
algo : {'small', 'none', 'large', 'fft', 'fft_tiling', 'winograd', 'guess_once',
'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Default is the value of :attr:`config.dnn.conv.algo_fwd`.
......@@ -435,8 +438,12 @@ class GpuDnnConv(DnnBase):
raise RuntimeError("CuDNN tiled-FFT convolution requires "
"CuDNN v4 or more recent")
if version() < 5000 and self.algo == 'winograd':
raise RuntimeError("CuDNN winograd convolution requires "
"CuDNN v5 or more recent")
assert self.algo in ['none', 'small', 'large', 'fft', 'fft_tiling',
'guess_once', 'guess_on_shape_change',
'winograd', 'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change']
def __setstate__(self, d):
......@@ -468,6 +475,9 @@ class GpuDnnConv(DnnBase):
elif self.algo == 'fft_tiling':
# need v4
alg = 'CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING'
elif self.algo == 'winograd':
# need v5
alg = 'CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD'
defs.append(('CONV_ALGO', alg))
if self.algo in ['guess_once', 'guess_on_shape_change',
......@@ -565,8 +575,11 @@ class GpuDnnConvGradW(DnnBase):
----------
image
kernel
descr
descr :
The convolution descriptor.
algo : {'none', 'deterministic', 'fft', 'small', 'guess_once',
'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Default is the value of :attr:`config.dnn.conv.algo_bwd_filter`.
"""
......@@ -582,9 +595,7 @@ class GpuDnnConvGradW(DnnBase):
algo = config.dnn.conv.algo_bwd_filter
self.algo = algo
assert self.algo in ['none', 'deterministic', 'fft', 'small',
'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change']
assert self.algo in SUPPORTED_DNN_CONV_ALGO_BWD_FILTER
def __setstate__(self, d):
self.__dict__.update(d)
......@@ -688,6 +699,9 @@ class GpuDnnConvGradI(DnnBase):
kernel
descr
The convolution descriptor.
algo : {'none', 'deterministic', 'fft', 'fft_tiling', 'winograd', 'guess_once',
'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Default is the value of :attr:`config.dnn.conv.algo_bwd_data`.
"""
......@@ -708,9 +722,12 @@ class GpuDnnConvGradI(DnnBase):
if version() < 4000 and self.algo == 'fft_tiling':
raise RuntimeError("CuDNN's tiled-FFT convolution requires CuDNN "
"v4 or more recent")
if version() < 5000 and self.algo == 'winograd':
raise RuntimeError("CuDNN's winograd convolution requires CuDNN "
"v5 or more recent")
assert self.algo in ['none', 'deterministic', 'fft', 'fft_tiling',
'guess_once', 'guess_on_shape_change',
'winograd', 'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change']
def __setstate__(self, d):
......@@ -749,13 +766,16 @@ class GpuDnnConvGradI(DnnBase):
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0'
if self.algo == 'none':
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0'
if self.algo == 'deterministic':
elif self.algo == 'deterministic':
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_1'
if self.algo == 'fft':
elif self.algo == 'fft':
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT'
if self.algo == 'fft_tiling':
elif self.algo == 'fft_tiling':
# big workspace but less than fft
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING'
elif self.algo == 'winograd':
# need v5
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD'
if self.algo in ['guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change']:
......@@ -1047,9 +1067,13 @@ class GpuDnnPoolDesc(Op):
static const int win[%(nd)d] = {%(win)s};
static const int pad[%(nd)d] = {%(pad)s};
static const int str[%(nd)d] = {%(str)s};
err = cudnnSetPoolingNdDescriptor(
%(desc)s, %(mode_flag)s, %(nd)d,
win, pad, str);
#if CUDNN_VERSION >= 5000
err = cudnnSetPoolingNdDescriptor(%(desc)s, %(mode_flag)s, CUDNN_PROPAGATE_NAN, %(nd)d, win, pad, str);
#else
err = cudnnSetPoolingNdDescriptor(%(desc)s, %(mode_flag)s, %(nd)d, win, pad, str);
#endif
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor: %%s",
cudnnGetErrorString(err));
......@@ -1062,7 +1086,7 @@ class GpuDnnPoolDesc(Op):
str=', '.join(map(str, self.stride)))
def c_code_cache_version(self):
return (3, version())
return (4, version())
class GpuDnnPool(DnnBase):
......@@ -1070,18 +1094,17 @@ class GpuDnnPool(DnnBase):
"""
Parameters
----------
img
img : tensor
The image 4d or 5d tensor.
Parameters
----------
ws : tensor variable
ws : tensor
Window size.
stride : tensor variable
stride : tensor
(dx, dy) or (dx, dy, dz).
mode : {'max', 'average_inc_pad', 'average_exc_pad'}
The old deprecated name 'average' corresponds to 'average_inc_pad'.
pad : tensor
(padX, padY) or (padX, padY, padZ)
"""
__props__ = ('mode',)
......@@ -1255,14 +1278,12 @@ class GpuDnnSoftmaxBase(DnnBase):
Parameters
----------
algo
'fast', 'accurate' or 'log' indicating whether, respectively,
computations should be optimized for speed, for accuracy, or if CuDNN
should rather compute the log-softmax instead.
mode
'instance' or 'channel' indicating whether the softmax should be
computed per image across 'c01' or per spatial location '01' per
image across 'c'.
algo : {'fast', 'accurate', 'log'}
Indicating whether, respectively, computations should be optimized for
speed, for accuracy, or if CuDNN should rather compute the log-softmax instead.
mode : {'instance', 'channel'}
Indicating whether the softmax should be computed per image across 'c01'
or per spatial location '01' per image across 'c'.
"""
......@@ -1306,14 +1327,12 @@ class GpuDnnSoftmax(GpuDnnSoftmaxBase):
"""
Op for the cuDNN Softmax.
algo
'fast', 'accurate' or 'log' indicating whether, respectively,
computations should be optimized for speed, for accuracy, or if CuDNN
should rather compute the log-softmax instead.
mode
'instance' or 'channel' indicating whether the softmax should be
computed per image across 'c01' or per spatial location '01' per
image across 'c'.
algo : {'fast', 'accurate', 'log'}
Indicating whether, respectively, computations should be optimized for
speed, for accuracy, or if CuDNN should rather compute the log-softmax instead.
mode : {'instance', 'channel'}
Indicating whether the softmax should be computed per image across 'c01'
or per spatial location '01' per image across 'c'.
"""
direction = "forward"
......
......@@ -51,6 +51,8 @@ c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
static int
c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
cudnnDataType_t dt;
cudnnStatus_t err;
if (!GpuArray_IS_C_CONTIGUOUS(&var->ga)) {
PyErr_SetString(PyExc_ValueError,
"Only contiguous filters (kernels) are supported.");
......@@ -86,7 +88,12 @@ c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
dims[i] = PyGpuArray_DIM(var, i);
}
cudnnStatus_t err = cudnnSetFilterNdDescriptor(desc, dt, nd, dims);
#if CUDNN_VERSION >= 5000
err = cudnnSetFilterNdDescriptor(desc, dt, CUDNN_TENSOR_NCHW, nd, dims);
#else
err = cudnnSetFilterNdDescriptor(desc, dt, nd, dims);
#endif
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not set filter descriptor: %s.",
......
......@@ -92,12 +92,12 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
}
algo = choice.algo;
#else
size_t free = 0, total = 0;
cudaError_t err2 = cudaMemGetInfo(&free, &total);
if (err2 != cudaSuccess) {
size_t free;
int err2 = c->ops->property(c->ctx, NULL, NULL, GA_CTX_PROP_FREE_GMEM, &free);
if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the "
"memory information on the GPU: %s\n",
cudaGetErrorString(err2));
"memory information on the GPU");
cuda_exit(c->ctx);
return 1;
}
......@@ -154,7 +154,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
int upscale[2];
cudnnConvolutionMode_t mode;
cudnnDataType_t data_type;
err = cudnnGetConvolutionNdDescriptor_v3(desc, 2, &nd, pad, stride,
err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride,
upscale, &mode, &data_type);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
......@@ -193,6 +193,21 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
APPLY_SPECIFIC(output),
algo,
&worksize);
if (err == CUDNN_STATUS_NOT_SUPPORTED) {
// Fallback to none algo if not supported
// TODO: Print a warning
algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
err = cudnnGetConvolutionForwardWorkspaceSize(APPLY_SPECIFIC(_handle),
APPLY_SPECIFIC(input),
APPLY_SPECIFIC(kerns),
desc,
APPLY_SPECIFIC(output),
algo,
&worksize);
}
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"error getting worksize: %s",
......
......@@ -91,12 +91,12 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
algo = choice.algo;
#else
size_t free = 0, total = 0;
cudaError_t err2 = cudaMemGetInfo(&free, &total);
if (err2 != cudaSuccess){
cudaGetLastError();
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the memory "
"information on the GPU: %s\n", cudaGetErrorString(err2));
size_t free;
int err2 = c->ops->property(c->ctx, NULL, NULL, GA_CTX_PROP_FREE_GMEM, &free);
if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the "
"memory information on the GPU");
cuda_exit(c->ctx);
return 1;
}
......@@ -146,7 +146,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
int upscale[2];
cudnnConvolutionMode_t mode;
cudnnDataType_t data_type;
err = cudnnGetConvolutionNdDescriptor_v3(desc, 2, &nd, pad, stride,
err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride,
upscale, &mode, &data_type);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
......@@ -203,7 +203,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
cuda_wait(output->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_wait((*input)->ga.data, GPUARRAY_CUDA_WAIT_WRITE);
err = cudnnConvolutionBackwardData_v3(
err = cudnnConvolutionBackwardData(
APPLY_SPECIFIC(_handle),
alpha_p,
APPLY_SPECIFIC(kerns), PyGpuArray_DEV_DATA(kerns),
......
......@@ -92,12 +92,12 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
algo = choice.algo;
#else
size_t free = 0, total = 0;
cudaError_t err2 = cudaMemGetInfo(&free, &total);
if (err2 != cudaSuccess){
cudaGetLastError();
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the memory "
"information on the GPU: %s\n", cudaGetErrorString(err2));
size_t free;
int err2 = c->ops->property(c->ctx, NULL, NULL, GA_CTX_PROP_FREE_GMEM, &free);
if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the "
"memory information on the GPU");
cuda_exit(c->ctx);
return 1;
}
......@@ -146,7 +146,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
int upscale[2];
cudnnConvolutionMode_t mode;
cudnnDataType_t data_type;
err = cudnnGetConvolutionNdDescriptor_v3(desc, 2, &nd, pad, stride,
err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride,
upscale, &mode, &data_type);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
......@@ -190,7 +190,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
cuda_wait(output->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_wait((*kerns)->ga.data, GPUARRAY_CUDA_WAIT_WRITE);
err = cudnnConvolutionBackwardFilter_v3(
err = cudnnConvolutionBackwardFilter(
APPLY_SPECIFIC(_handle),
alpha_p,
APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(input),
......
......@@ -24,7 +24,7 @@ if ((APPLY_SPECIFIC(err) = cudnnCreateTensorDescriptor(&APPLY_SPECIFIC(output)))
}
if ((APPLY_SPECIFIC(err) = cudnnCreatePoolingDescriptor(&APPLY_SPECIFIC(pool))) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate pooling descriptor"
"(pool): %s", cudnnGetErrorString(APPLY_SPECIFIC(err)));
"(pool): %s", cudnnGetErrorString(APPLY_SPECIFIC(err)));
FAIL;
}
......@@ -38,7 +38,7 @@ if (APPLY_SPECIFIC(pool) != NULL) { cudnnDestroyPoolingDescriptor(APPLY_SPECIFIC
#section support_code_struct
int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
PyArrayObject *ws,
PyArrayObject *ws,
PyArrayObject *stride,
PyArrayObject *pad,
PyGpuArrayObject **out,
......@@ -69,7 +69,12 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
for(int i = 0; i < ndims; i++) {
s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i));
}
#if CUDNN_VERSION >= 5000
err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, CUDNN_PROPAGATE_NAN, ndims, w, p, s);
#else
err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, ndims, w, p, s);
#endif
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor %s", cudnnGetErrorString(err));
......
......@@ -42,7 +42,7 @@ APPLY_SPECIFIC(pool) = NULL;
}
if ((err = cudnnCreatePoolingDescriptor(&APPLY_SPECIFIC(pool))) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate pooling descriptor"
"(pool): %s", cudnnGetErrorString(err));
"(pool): %s", cudnnGetErrorString(err));
FAIL;
}
}
......@@ -60,7 +60,7 @@ if (APPLY_SPECIFIC(pool) != NULL) { cudnnDestroyPoolingDescriptor(APPLY_SPECIFIC
int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
PyGpuArrayObject *out,
PyGpuArrayObject *out_grad,
PyArrayObject *ws,
PyArrayObject *ws,
PyArrayObject *stride,
PyArrayObject *pad,
PyGpuArrayObject **inp_grad,
......@@ -109,7 +109,12 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
for(int i = 0; i < ndims; i++) {
s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i));
}
#if CUDNN_VERSION >= 5000
err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, CUDNN_PROPAGATE_NAN, ndims, w, p, s);
#else
err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, ndims, w, p, s);
#endif
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor %s", cudnnGetErrorString(err));
......
import logging
from nose.plugins.skip import SkipTest
from nose_parameterized import parameterized
import numpy
from itertools import product
from itertools import product, chain
import theano
from six import StringIO
......@@ -18,6 +19,8 @@ from ..basic_ops import GpuAllocEmpty
from .config import mode_with_gpu, mode_without_gpu, test_ctx_name
from . import test_nnet
from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_FWD
def test_dnn_conv_desc_merge():
if not dnn.dnn_available(test_ctx_name):
......@@ -393,6 +396,9 @@ def test_dnn_tag():
class TestDnnInferShapes(utt.InferShapeTester):
border_modes = ['valid', 'full', 'half']
conv_modes = ['conv', 'cross']
def setUp(self):
super(TestDnnInferShapes, self).setUp()
self.mode = mode_with_gpu
......@@ -427,37 +433,25 @@ class TestDnnInferShapes(utt.InferShapeTester):
dnn.GpuDnnSoftmaxGrad
)
def test_conv(self):
def _test_conv(self, img, kerns, out, img_val, kern_vals, border_mode, conv_mode, subsamples, algo):
if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg)
img = T.ftensor4('img')
kerns = T.ftensor4('kerns')
out = T.ftensor4('out')
img_val = numpy.asarray(
numpy.random.rand(7, 2, 6, 4),
dtype='float32'
)
kern_vals = numpy.asarray(
numpy.random.rand(8, 2, 4, 3),
dtype='float32'
)
for params in product(
['valid', 'full', 'half'],
[(1, 1), (2, 2)],
['conv', 'cross']
):
img_val = numpy.asarray(img_val, dtype='float32')
kern_vals = numpy.asarray(kern_vals, dtype='float32')
for subsample in subsamples:
out_vals = numpy.zeros(
dnn.GpuDnnConv.get_out_shape(img_val.shape, kern_vals.shape,
border_mode=params[0],
subsample=params[1]),
border_mode=border_mode,
subsample=subsample),
dtype='float32')
desc = dnn.GpuDnnConvDesc(
border_mode=params[0],
subsample=params[1],
conv_mode=params[2]
border_mode=border_mode,
subsample=subsample,
conv_mode=conv_mode
)(kerns.shape)
conv = dnn.GpuDnnConv()(img, kerns, out, desc)
conv = dnn.GpuDnnConv(algo=algo)(img, kerns, out, desc)
self._compile_and_check(
[img, kerns, out],
[conv],
......@@ -465,54 +459,92 @@ class TestDnnInferShapes(utt.InferShapeTester):
dnn.GpuDnnConv
)
def test_conv_gradw(self):
@parameterized.expand(chain(product([SUPPORTED_DNN_CONV_ALGO_FWD[0]],
border_modes,
conv_modes),
product(SUPPORTED_DNN_CONV_ALGO_FWD[1:],
[border_modes[0]],
[conv_modes[0]])),
testcase_func_name=utt.custom_name_func)
def test_conv(self, algo, border_mode, conv_mode):
if algo == 'winograd' and dnn.version() < 5000:
raise SkipTest(dnn.dnn_available.msg)
self._test_conv(T.ftensor4('img'),
T.ftensor4('kerns'),
T.ftensor4('out'),
numpy.random.rand(7, 2, 8, 4),
numpy.random.rand(8, 2, 4, 3),
border_mode,
conv_mode,
[(1, 1), (2, 2)],
algo)
@parameterized.expand(product(border_modes, conv_modes), utt.custom_name_func)
def test_conv3d_none(self, border_mode, conv_mode):
ftensor5 = T.TensorType(dtype="float32", broadcastable=(False,) * 5)
self._test_conv(ftensor5('img'),
ftensor5('kerns'),
ftensor5('out'),
numpy.random.rand(10, 2, 6, 4, 11),
numpy.random.rand(8, 2, 4, 3, 1),
border_mode,
conv_mode,
[(1, 1, 1), (2, 2, 2)],
'none')
def _test_conv_gradw(self, img, kerns, out, img_val, kern_vals, border_mode, conv_mode, subsample):
if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg)
img = T.ftensor4('img')
kerns = T.ftensor4('kerns')
out = T.ftensor4('out')
img_val = numpy.asarray(
numpy.random.rand(2, 5, 6, 8),
img_val,
dtype='float32'
)
kern_vals = numpy.asarray(
numpy.random.rand(2, 1, 5, 6),
kern_vals,
dtype='float32'
)
for params in product(
['valid', 'full', 'half'],
[(1, 1)], # strides besides (1, 1)
['conv', 'cross']
):
temp_img = img.dimshuffle(1, 0, 2, 3)
temp_kerns = kerns
if params[2] == 'conv':
temp_kerns = temp_kerns[:, :, ::-1, ::-1]
temp_kerns = temp_kerns.dimshuffle(1, 0, 2, 3)
shape = (
kern_vals.shape[1], img_val.shape[1],
img_val.shape[2] - kern_vals.shape[2] + 1,
img_val.shape[3] - kern_vals.shape[3] + 1
)
out_vals = numpy.zeros(shape, dtype='float32')
desc = dnn.GpuDnnConvDesc(
border_mode=params[0],
subsample=params[1],
conv_mode=params[2]
)(out.shape)
conv_grad_w = dnn.GpuDnnConvGradW()(
temp_img,
temp_kerns,
out,
desc,
)
self._compile_and_check(
[temp_img, temp_kerns, out],
[conv_grad_w],
[img_val, kern_vals, out_vals],
dnn.GpuDnnConvGradW
)
temp_img = img.dimshuffle(1, 0, 2, 3)
temp_kerns = kerns
if conv_mode == 'conv':
temp_kerns = temp_kerns[:, :, ::-1, ::-1]
temp_kerns = temp_kerns.dimshuffle(1, 0, 2, 3)
shape = (
kern_vals.shape[1], img_val.shape[1],
img_val.shape[2] - kern_vals.shape[2] + 1,
img_val.shape[3] - kern_vals.shape[3] + 1
)
out_vals = numpy.zeros(shape, dtype='float32')
desc = dnn.GpuDnnConvDesc(
border_mode=border_mode,
subsample=subsample,
conv_mode=conv_mode
)(out.shape)
conv_grad_w = dnn.GpuDnnConvGradW()(
temp_img,
temp_kerns,
out,
desc,
)
self._compile_and_check(
[temp_img, temp_kerns, out],
[conv_grad_w],
[img_val, kern_vals, out_vals],
dnn.GpuDnnConvGradW
)
@parameterized.expand(product(border_modes, conv_modes), utt.custom_name_func)
def test_conv_gradw(self, border_mode, conv_mode):
self._test_conv_gradw(T.ftensor4('img'),
T.ftensor4('kerns'),
T.ftensor4('out'),
numpy.random.rand(2, 5, 6, 8),
numpy.random.rand(2, 1, 5, 6),
border_mode,
conv_mode,
(1, 1))
def test_conv_gradi(self):
if not dnn.dnn_available(test_ctx_name):
......
......@@ -4,6 +4,7 @@ from functools import wraps
import logging
import sys
import unittest
from nose_parameterized import parameterized
from six import integer_types
from six.moves import StringIO
......@@ -31,6 +32,13 @@ except ImportError:
_logger = logging.getLogger("theano.tests.unittest_tools")
def custom_name_func(testcase_func, param_num, param):
return "%s_%s" % (
testcase_func.__name__,
parameterized.to_safe_name("_".join(str(x) for x in param.args)),
)
def fetch_seed(pseed=None):
"""
Returns the seed to use for running the unit tests.
......@@ -96,6 +104,7 @@ verify_grad.E_grad = T.verify_grad.E_grad
class TestOptimizationMixin(object):
def assertFunctionContains(self, f, op, min=1, max=sys.maxsize):
toposort = f.maker.fgraph.toposort()
matches = [node for node in toposort if node.op == op]
......@@ -172,6 +181,7 @@ class T_OpContractMixin(object):
class InferShapeTester(unittest.TestCase):
def setUp(self):
seed_rng()
# Take into account any mode that may be defined in a child class
......@@ -311,6 +321,7 @@ def str_diagnostic(expected, value, rtol, atol):
class WrongValue(Exception):
def __init__(self, expected_val, val, rtol, atol):
Exception.__init__(self) # to be compatible with python2.4
self.val1 = expected_val
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论