提交 ef0fc56e authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: Frederic Bastien

Merge pull request #4344 from mgermain/cudnn5

Added cuDNN v5 support & Added optional dependencies to setup.py Conflicts: .travis.yml doc/install.txt
上级 02453383
......@@ -37,6 +37,7 @@ install:
- source activate pyenv
- if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install pydot; fi
- pip install . --no-deps
- pip install nose-parameterized==0.5.0
# command to run tests
env:
......
......@@ -49,7 +49,7 @@ instructions below for detailed installation steps):
The following libraries and software are optional:
`nose <http://somethingaboutorange.com/mrl/projects/nose/>`_ >= 1.3.0
`nose <http://nose.readthedocs.org/en/latest/>`_ >= 1.3.0 and `nose-parameterized <https://pypi.python.org/pypi/nose-parameterized/>`_ >= 0.5.0
Recommended, to run Theano's test-suite.
`Sphinx <http://sphinx.pocoo.org/>`_ >= 0.5.1, `pygments <http://pygments.org/>`_
......
sphinx>=1.3.0
pygments
nose>=1.3.0
nose-parameterized>=0.5.0
......@@ -163,6 +163,11 @@ def do_setup():
packages=find_packages(),
# 1.7.0 give too much warning related to numpy.diagonal.
install_requires=['numpy>=1.7.1', 'scipy>=0.11', 'six>=1.9.0'],
# pygments is a dependency for Sphinx code highlight
extras_require={
'test': ['nose>=1.3.0', 'nose-parameterized>=0.5.0'],
'doc': ['Sphinx>=0.5.1', 'pygments']
},
package_data={
'': ['*.txt', '*.rst', '*.cu', '*.cuh', '*.c', '*.sh', '*.pkl',
'*.h', '*.cpp', 'ChangeLog'],
......
......@@ -286,6 +286,20 @@ def safe_no_dnn_algo_bwd(algo):
'`dnn.conv.algo_bwd_filter` and `dnn.conv.algo_bwd_data` instead.')
return True
# Those are the supported algorithm by Theano,
# The tests will reference those lists.
SUPPORTED_DNN_CONV_ALGO_FWD = ('small', 'none', 'large', 'fft', 'fft_tiling',
'winograd', 'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change')
SUPPORTED_DNN_CONV_ALGO_BWD_DATA = ('none', 'deterministic', 'fft', 'fft_tiling',
'winograd', 'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change')
SUPPORTED_DNN_CONV_ALGO_BWD_FILTER = ('none', 'deterministic', 'fft', 'small',
'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change')
AddConfigVar('dnn.conv.algo_bwd',
"This flag is deprecated; use dnn.conv.algo_bwd_data and "
"dnn.conv.algo_bwd_filter.",
......@@ -295,26 +309,20 @@ AddConfigVar('dnn.conv.algo_bwd',
AddConfigVar('dnn.conv.algo_fwd',
"Default implementation to use for CuDNN forward convolution.",
EnumStr('small', 'none', 'large', 'fft', 'fft_tiling',
'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change'),
EnumStr(*SUPPORTED_DNN_CONV_ALGO_FWD),
in_c_key=False)
AddConfigVar('dnn.conv.algo_bwd_data',
"Default implementation to use for CuDNN backward convolution to "
"get the gradients of the convolution with regard to the inputs.",
EnumStr('none', 'deterministic', 'fft', 'fft_tiling',
'guess_once', 'guess_on_shape_change', 'time_once',
'time_on_shape_change'),
EnumStr(*SUPPORTED_DNN_CONV_ALGO_BWD_DATA),
in_c_key=False)
AddConfigVar('dnn.conv.algo_bwd_filter',
"Default implementation to use for CuDNN backward convolution to "
"get the gradients of the convolution with regard to the "
"filters.",
EnumStr('none', 'deterministic', 'fft', 'small', 'guess_once',
'guess_on_shape_change', 'time_once',
'time_on_shape_change'),
EnumStr(*SUPPORTED_DNN_CONV_ALGO_BWD_FILTER),
in_c_key=False)
AddConfigVar('dnn.conv.precision',
......
......@@ -240,7 +240,7 @@ class GpuDnnConvDesc(GpuOp):
}
}
err = cudnnSetConvolutionNdDescriptor_v3(
err = cudnnSetConvolutionNdDescriptor(
%(desc)s,
%(nb_dim)d,
pad, subsample, upscale,
......@@ -294,10 +294,8 @@ class GpuDnnConv(DnnBase, COp):
The convolution descriptor.
workmem
*deprecated*, use parameter algo instead.
algo
['none', 'small', 'large', 'fft', 'fft_tiling', 'guess_once',
'guess_on_shape_change', 'time_once', 'time_on_shape_change']
algo : {'none', 'small', 'large', 'fft', 'fft_tiling', 'guess_once', 'winograd',
'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Default is the value of :attr:`config.dnn.conv.algo_fwd`.
"""
......@@ -344,8 +342,13 @@ class GpuDnnConv(DnnBase, COp):
raise RuntimeError("CuDNN tiled-FFT convolution requires "
"CuDNN v4 or more recent")
if version() < (5000, 5000):
if self.algo == 'winograd':
raise RuntimeError("CuDNN winograd convolution requires "
"CuDNN v5 or more recent")
assert self.algo in ['none', 'small', 'large', 'fft', 'fft_tiling',
'guess_once', 'guess_on_shape_change',
'winograd', 'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change']
def __setstate__(self, d):
......@@ -383,8 +386,11 @@ class GpuDnnConv(DnnBase, COp):
# need v3
alg = 'CUDNN_CONVOLUTION_FWD_ALGO_FFT'
elif self.algo == 'fft_tiling':
# need v4
# need v4 for conv2d, need v5 for conv3d
alg = 'CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING'
elif self.algo == 'winograd':
# need v5
alg = 'CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD'
elif self.algo in ['guess_once', 'guess_on_shape_change']:
# The convolution implementation should be choosen according
# to a heuristic
......@@ -472,15 +478,20 @@ class GpuDnnConv3d(GpuDnnConv):
"""
The forward convolution.
:param image:
:param kernel:
:param descr: the convolution descriptor
:param workmem:
Parameters
----------
image
kernel
descr
The convolution descriptor
workmem
*deprecated*, use parameter algo instead.
:param algo: ['none', 'guess_once', 'guess_on_shape_change', 'time_once', 'time_on_shape_change']
algo : {'none', 'small', 'fft_tiling', 'winograd', 'guess_once',
'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Default is the value of :attr:`config.dnn.conv.algo_fwd`.
"""
__props__ = ('algo', 'inplace')
__input_name__ = ('image', 'kernel', 'output',
'descriptor', 'alpha', 'beta')
......@@ -492,9 +503,24 @@ class GpuDnnConv3d(GpuDnnConv):
assert algo is None
algo = workmem
super(GpuDnnConv3d, self).__init__(inplace=inplace, algo='none')
assert self.algo in ['none', 'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change']
good_algo = ['none', 'small', 'fft_tiling', 'winograd',
'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change']
if algo is None and config.dnn.conv.algo_fwd not in good_algo:
algo = 'guess_once'
elif algo is not None and algo not in good_algo:
algo = 'guess_once'
super(GpuDnnConv3d, self).__init__(inplace=inplace, algo=algo)
assert self.algo in good_algo
if version() < (5000, 5000):
if self.algo == 'fft_tiling':
raise RuntimeError("CuDNN 3d tiled-FFT convolution requires "
"CuDNN v5 or more recent")
elif self.algo == 'winograd':
raise RuntimeError("CuDNN 3d winograd convolution requires "
"CuDNN v5 or more recent")
def make_node(self, img, kern, output, desc, alpha=None, beta=None):
......@@ -558,7 +584,8 @@ class GpuDnnConvGradW(DnnBase, COp):
The convolution descriptor.
workmem
*deprecated*, use parameter algo instead.
algo : {'none', 'deterministic', 'fft', 'small', 'guess_once', 'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
algo : {'none', 'deterministic', 'fft', 'small', 'guess_once',
'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Default is the value of :attr:`config.dnn.conv.algo_bwd_filter`.
"""
......@@ -693,15 +720,20 @@ class GpuDnnConv3dGradW(GpuDnnConvGradW):
"""
The convolution gradient with respect to the weights.
:param image:
:param kernel:
:param descr: the convolution descriptor
:param workmem:
Parameters
----------
image
kernel
descr
The convolution descriptor
workmem
*deprecated*, use parameter algo instead.
:param algo: ['none', 'guess_once', 'guess_on_shape_change', 'time_once', 'time_on_shape_change']
algo : {'none', 'small', 'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change'}
Default is the value of :attr:`config.dnn.conv.algo_bwd_filter`.
"""
__props__ = ('algo', 'inplace',)
__input_name__ = ('image', 'grad', 'output', 'descriptor', 'alpha', 'beta')
......@@ -711,11 +743,18 @@ class GpuDnnConv3dGradW(GpuDnnConvGradW):
"deprecated. Use 'algo' instead."), stacklevel=3)
assert algo is None
algo = workmem
good_algo = ['none', 'small',
'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change']
if version() < (5000, 5000) and algo == 'small':
algo = 'guess_once'
elif algo is None and config.dnn.conv.algo_bwd_filter not in good_algo:
algo = 'guess_once'
elif algo is not None and algo not in good_algo:
algo = 'guess_once'
super(GpuDnnConv3dGradW, self).__init__(inplace=inplace,
algo='none')
assert self.algo in ['none', 'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change']
algo=algo)
assert self.algo in good_algo
def grad(self, inp, grads):
img, top, output, desc, alpha, beta = inp
......@@ -766,11 +805,8 @@ class GpuDnnConvGradI(DnnBase, COp):
The convolution descriptor.
workmem
*deprecated*, use parameter algo instead.
algo
['none', 'deterministic', 'fft', 'fft_tiling', 'guess_once',
'guess_on_shape_change', 'time_once',
'time_on_shape_change']
algo : {'none', 'deterministic', 'fft', 'fft_tiling', 'winograd', 'guess_once',
'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Default is the value of :attr:`config.dnn.conv.algo_bwd_data`.
"""
......@@ -804,8 +840,13 @@ class GpuDnnConvGradI(DnnBase, COp):
raise RuntimeError("CuDNN's tiled-FFT convolution requires "
"CuDNN v4 or more recent")
if version() < (5000, 5000):
if self.algo == 'winograd':
raise RuntimeError("CuDNN's winograd convolution requires "
"CuDNN v5 or more recent")
assert self.algo in ['none', 'deterministic', 'fft', 'fft_tiling',
'guess_once', 'guess_on_shape_change',
'winograd', 'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change']
def __setstate__(self, d):
......@@ -859,7 +900,11 @@ class GpuDnnConvGradI(DnnBase, COp):
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT'
elif self.algo == 'fft_tiling':
# need v4, big workspace, but less then fft
# need v5, for conv3d.
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING'
elif self.algo == 'winograd':
# need v5
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD'
elif self.algo in ['guess_once', 'guess_on_shape_change']:
# The convolution implementation should be chosen according
# to a heuristic
......@@ -913,17 +958,20 @@ class GpuDnnConv3dGradI(GpuDnnConvGradI):
"""
The convolution gradient with respect to the inputs.
:param image:
:param kernel:
:param descr: the convolution descriptor
:param workmem:
Parameters
----------
image
kernel
descr
The convolution descriptor
workmem
*deprecated*, use parameter algo instead.
:param algo: ['none', 'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change']
algo : {'none', 'deterministic, 'fft_tiling', 'winograd', 'guess_once',
'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Default is the value of :attr:`config.dnn.conv.algo_bwd_data`.
"""
__props__ = ('algo', 'inplace',)
__input_name__ = ('kernel', 'grad', 'output', 'descriptor', 'alpha',
'beta')
......@@ -935,10 +983,24 @@ class GpuDnnConv3dGradI(GpuDnnConvGradI):
assert algo is None
algo = workmem
good_algo = ['none', 'deterministic', 'fft_tiling', 'winograd',
'guess_once', 'guess_on_shape_change', 'time_once',
'time_on_shape_change']
if algo is None and config.dnn.conv.algo_bwd_data not in good_algo:
algo = 'guess_once'
elif algo is not None and algo not in good_algo:
algo = 'guess_once'
super(GpuDnnConv3dGradI, self).__init__(inplace=inplace,
algo="none")
assert self.algo in ['none', 'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change']
algo=algo)
assert self.algo in good_algo
if version() < (5000, 5000):
if self.algo == 'fft_tiling':
raise RuntimeError("CuDNN 3d tiled-FFT convolution requires "
"CuDNN v5 or more recent")
elif self.algo == 'winograd':
raise RuntimeError("CuDNN 3d winograd convolution requires "
"CuDNN v5 or more recent")
def grad(self, inp, grads):
kerns, top, output, desc, alpha, beta = inp
......@@ -1100,7 +1162,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
conv_mode='conv', direction_hint=None, workmem=None,
algo='none', precision=None):
algo=None, precision=None):
"""
GPU convolution using cuDNN from NVIDIA.
......@@ -1360,8 +1422,9 @@ class GpuDnnPoolDesc(GpuOp):
int win[%(nd)d] = {%(win)s};
int pad[%(nd)d] = {%(pad)s};
int str[%(nd)d] = {%(str)s};
err = cudnnSetPoolingNdDescriptor(
%(desc)s, %(mode_flag)s, %(nd)d,
err = cudnnSetPoolingNdDescriptor_v4(
%(desc)s, %(mode_flag)s,
CUDNN_PROPAGATE_NAN, %(nd)d,
win, pad, str);
}
if (err != CUDNN_STATUS_SUCCESS) {
......@@ -1397,6 +1460,7 @@ class GpuDnnPool(DnnBase):
(padX, padY) padding information.
padX is the size of the left and right borders,
padY is the size of the top and bottom borders.
"""
__props__ = ("mode",)
......@@ -1542,8 +1606,9 @@ for(int i = 0; i < %(nd)d; i++) {
for(int i = 0; i < %(nd)d; i++) {
str[i] = *((npy_intp*)PyArray_GETPTR1(%(str)s, i));
}
err = cudnnSetPoolingNdDescriptor(
pool%(name)s, %(mode_flag)s, %(nd)d,
err = cudnnSetPoolingNdDescriptor_v4(
pool%(name)s, %(mode_flag)s,
CUDNN_PROPAGATE_NAN, %(nd)d,
win, pad, str);
if (err != CUDNN_STATUS_SUCCESS) {
......@@ -1820,8 +1885,9 @@ for(int i = 0; i < %(nd)d; i++) {
for(int i = 0; i < %(nd)d; i++) {
str[i] = *((npy_intp*)PyArray_GETPTR1(%(str)s, i));
}
err%(name)s = cudnnSetPoolingNdDescriptor(
pool%(name)s, %(mode_flag)s, %(nd)d,
err%(name)s = cudnnSetPoolingNdDescriptor_v4(
pool%(name)s, %(mode_flag)s,
CUDNN_PROPAGATE_NAN, %(nd)d,
win, pad, str);
if (err%(name)s != CUDNN_STATUS_SUCCESS) {
......@@ -1914,14 +1980,12 @@ class GpuDnnSoftmaxBase(DnnBase):
----------
tensor_format
Always set this to 'bc01'.
algo
'fast', 'accurate' or 'log' indicating whether, respectively, computations
should be optimized for speed, for accuracy, or if CuDNN should rather
compute the log-softmax instead.
mode
'instance' or 'channel' indicating whether the softmax should
be computed per image across 'c01' or per spatial location '01' per
image across 'c'.
algo : {'fast', 'accurate', 'log'}
Indicating whether, respectively, computations should be optimized for
speed, for accuracy, or if CuDNN should rather compute the log-softmax instead.
mode : {'instance', 'channel'}
Indicating whether the softmax should be computed per image across 'c01'
or per spatial location '01' per image across 'c'.
"""
......@@ -2077,13 +2141,12 @@ class GpuDnnSoftmax(GpuDnnSoftmaxBase):
----------
tensor_format
Always set to 'bc01'.
algo
'fast' or 'accurate' indicating whether computations should be
algo : {'fast', 'accurate'}
Indicating whether computations should be
optimized for speed or accuracy respectively.
mode
'instance' or 'channel' indicating whether the softmax should
be computed per image across 'c01' or per spatial location '01' per
image across 'c'.
mode : {'instance', 'channel'}
Indicating whether the softmax should be computed per image across 'c01'
or per spatial location '01' per image across 'c'.
"""
......@@ -2145,13 +2208,12 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
----------
tensor_format
Always set to 'bc01'.
algo
'fast' or 'accurate' indicating whether computations should be
algo : {'fast', 'accurate'}
Indicating whether computations should be
optimized for speed or accuracy respectively.
mode
'instance' or 'channel' indicating whether the softmax should
be computed per image across 'c01' or per spatial location '01' per
image across 'c'.
mode : {'instance', 'channel'}
Indicating whether the softmax should be computed per image across 'c01'
or per spatial location '01' per image across 'c'.
"""
......
......@@ -54,8 +54,11 @@ c_set_filterNd(CudaNdarray *var, cudnnFilterDescriptor_t desc) {
return -1;
}
int dim = CudaNdarray_NDIM(var);
cudnnStatus_t err = cudnnSetFilterNdDescriptor(desc, CUDNN_DATA_FLOAT, dim,
CudaNdarray_HOST_DIMS(var));
cudnnStatus_t err = cudnnSetFilterNdDescriptor_v4(desc,
CUDNN_DATA_FLOAT,
CUDNN_TENSOR_NCHW,
dim,
CudaNdarray_HOST_DIMS(var));
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not set filter descriptor: %s."
......
......@@ -179,8 +179,8 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
int upscale[2];
cudnnConvolutionMode_t mode;
cudnnDataType_t data_type;
err = cudnnGetConvolutionNdDescriptor_v3(desc, 2, &nd, pad, stride,
upscale, &mode, &data_type);
err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride,
upscale, &mode, &data_type);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
......@@ -224,6 +224,19 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
APPLY_SPECIFIC(output),
chosen_algo,
&worksize);
if (err == CUDNN_STATUS_NOT_SUPPORTED) {
// Fallback to none algo if not supported
// TODO: Print a warning
chosen_algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
err = cudnnGetConvolutionForwardWorkspaceSize(_handle,
APPLY_SPECIFIC(input),
APPLY_SPECIFIC(kerns),
desc,
APPLY_SPECIFIC(output),
chosen_algo,
&worksize);
}
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"GpuDnnConv: error getting worksize: %s",
......
......@@ -178,8 +178,8 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
int upscale[2];
cudnnConvolutionMode_t mode;
cudnnDataType_t data_type;
err = cudnnGetConvolutionNdDescriptor_v3(desc, 2, &nd, pad, stride,
upscale, &mode, &data_type);
err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride,
upscale, &mode, &data_type);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
......@@ -237,7 +237,7 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
return 1;
// Perform the convolution
err = cudnnConvolutionBackwardData_v3(
err = cudnnConvolutionBackwardData(
_handle,
(void *)&alpha,
APPLY_SPECIFIC(kerns), CudaNdarray_DEV_DATA(kerns),
......
......@@ -173,8 +173,8 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
int upscale[2];
cudnnConvolutionMode_t mode;
cudnnDataType_t data_type;
err = cudnnGetConvolutionNdDescriptor_v3(desc, 2, &nd, pad, stride,
upscale, &mode, &data_type);
err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride,
upscale, &mode, &data_type);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
......@@ -221,7 +221,7 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
return 1;
// Perform the convolution
err = cudnnConvolutionBackwardFilter_v3(
err = cudnnConvolutionBackwardFilter(
_handle,
(void *)&alpha,
APPLY_SPECIFIC(input), CudaNdarray_DEV_DATA(input),
......
......@@ -392,8 +392,9 @@ def test_pooling_with_tensor_vars():
def test_old_pool_interface():
if not cuda.dnn.dnn_available():
if not cuda.dnn.dnn_available() or cuda.dnn.version() > (5000, 5000):
raise SkipTest(cuda.dnn.dnn_available.msg)
testfile_dir = os.path.dirname(os.path.realpath(__file__))
fname = 'old_pool_interface.pkl'
with open(os.path.join(testfile_dir, fname), 'rb') as fp:
......
......@@ -35,7 +35,7 @@ int APPLY_SPECIFIC(conv_desc)(PyArrayObject *filt_shp,
return -1;
}
err = cudnnSetConvolutionNdDescriptor_v3(*desc, NB_DIMS, pad, strides,
upscale, CONV_MODE, PRECISION);
err = cudnnSetConvolutionNdDescriptor(*desc, NB_DIMS, pad, strides,
upscale, CONV_MODE, PRECISION);
return 0;
}
......@@ -33,6 +33,8 @@ from .nnet import GpuSoftmax
from .opt import gpu_seqopt, register_opt, conv_groupopt, op_lifter
from .opt_util import alpha_merge, output_merge, inplace_allocempty
from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_BWD_FILTER
def raise_no_cudnn(msg="CuDNN is required for convolution and pooling"):
raise RuntimeError(msg)
......@@ -232,6 +234,7 @@ def version(raises=True):
:raises: If True, raise an exception if CuDNN is not present or badly installed.
Otherwise, return -1.
"""
if not dnn_present():
if raises:
......@@ -397,9 +400,9 @@ class GpuDnnConv(DnnBase):
----------
image
kernel
descr
descr :
The convolution descriptor.
algo : {'small', 'none', 'large', 'fft', 'fft_tiling', 'guess_once',
algo : {'small', 'none', 'large', 'fft', 'fft_tiling', 'winograd', 'guess_once',
'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Default is the value of :attr:`config.dnn.conv.algo_fwd`.
......@@ -435,8 +438,12 @@ class GpuDnnConv(DnnBase):
raise RuntimeError("CuDNN tiled-FFT convolution requires "
"CuDNN v4 or more recent")
if version() < 5000 and self.algo == 'winograd':
raise RuntimeError("CuDNN winograd convolution requires "
"CuDNN v5 or more recent")
assert self.algo in ['none', 'small', 'large', 'fft', 'fft_tiling',
'guess_once', 'guess_on_shape_change',
'winograd', 'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change']
def __setstate__(self, d):
......@@ -468,6 +475,9 @@ class GpuDnnConv(DnnBase):
elif self.algo == 'fft_tiling':
# need v4
alg = 'CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING'
elif self.algo == 'winograd':
# need v5
alg = 'CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD'
defs.append(('CONV_ALGO', alg))
if self.algo in ['guess_once', 'guess_on_shape_change',
......@@ -565,8 +575,11 @@ class GpuDnnConvGradW(DnnBase):
----------
image
kernel
descr
descr :
The convolution descriptor.
algo : {'none', 'deterministic', 'fft', 'small', 'guess_once',
'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Default is the value of :attr:`config.dnn.conv.algo_bwd_filter`.
"""
......@@ -582,9 +595,7 @@ class GpuDnnConvGradW(DnnBase):
algo = config.dnn.conv.algo_bwd_filter
self.algo = algo
assert self.algo in ['none', 'deterministic', 'fft', 'small',
'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change']
assert self.algo in SUPPORTED_DNN_CONV_ALGO_BWD_FILTER
def __setstate__(self, d):
self.__dict__.update(d)
......@@ -688,6 +699,9 @@ class GpuDnnConvGradI(DnnBase):
kernel
descr
The convolution descriptor.
algo : {'none', 'deterministic', 'fft', 'fft_tiling', 'winograd', 'guess_once',
'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Default is the value of :attr:`config.dnn.conv.algo_bwd_data`.
"""
......@@ -708,9 +722,12 @@ class GpuDnnConvGradI(DnnBase):
if version() < 4000 and self.algo == 'fft_tiling':
raise RuntimeError("CuDNN's tiled-FFT convolution requires CuDNN "
"v4 or more recent")
if version() < 5000 and self.algo == 'winograd':
raise RuntimeError("CuDNN's winograd convolution requires CuDNN "
"v5 or more recent")
assert self.algo in ['none', 'deterministic', 'fft', 'fft_tiling',
'guess_once', 'guess_on_shape_change',
'winograd', 'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change']
def __setstate__(self, d):
......@@ -749,13 +766,16 @@ class GpuDnnConvGradI(DnnBase):
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0'
if self.algo == 'none':
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0'
if self.algo == 'deterministic':
elif self.algo == 'deterministic':
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_1'
if self.algo == 'fft':
elif self.algo == 'fft':
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT'
if self.algo == 'fft_tiling':
elif self.algo == 'fft_tiling':
# big workspace but less than fft
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING'
elif self.algo == 'winograd':
# need v5
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD'
if self.algo in ['guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change']:
......@@ -1047,9 +1067,13 @@ class GpuDnnPoolDesc(Op):
static const int win[%(nd)d] = {%(win)s};
static const int pad[%(nd)d] = {%(pad)s};
static const int str[%(nd)d] = {%(str)s};
err = cudnnSetPoolingNdDescriptor(
%(desc)s, %(mode_flag)s, %(nd)d,
win, pad, str);
#if CUDNN_VERSION >= 5000
err = cudnnSetPoolingNdDescriptor(%(desc)s, %(mode_flag)s, CUDNN_PROPAGATE_NAN, %(nd)d, win, pad, str);
#else
err = cudnnSetPoolingNdDescriptor(%(desc)s, %(mode_flag)s, %(nd)d, win, pad, str);
#endif
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor: %%s",
cudnnGetErrorString(err));
......@@ -1062,7 +1086,7 @@ class GpuDnnPoolDesc(Op):
str=', '.join(map(str, self.stride)))
def c_code_cache_version(self):
return (3, version())
return (4, version())
class GpuDnnPool(DnnBase):
......@@ -1070,18 +1094,17 @@ class GpuDnnPool(DnnBase):
"""
Parameters
----------
img
img : tensor
The image 4d or 5d tensor.
Parameters
----------
ws : tensor variable
ws : tensor
Window size.
stride : tensor variable
stride : tensor
(dx, dy) or (dx, dy, dz).
mode : {'max', 'average_inc_pad', 'average_exc_pad'}
The old deprecated name 'average' corresponds to 'average_inc_pad'.
pad : tensor
(padX, padY) or (padX, padY, padZ)
"""
__props__ = ('mode',)
......@@ -1255,14 +1278,12 @@ class GpuDnnSoftmaxBase(DnnBase):
Parameters
----------
algo
'fast', 'accurate' or 'log' indicating whether, respectively,
computations should be optimized for speed, for accuracy, or if CuDNN
should rather compute the log-softmax instead.
mode
'instance' or 'channel' indicating whether the softmax should be
computed per image across 'c01' or per spatial location '01' per
image across 'c'.
algo : {'fast', 'accurate', 'log'}
Indicating whether, respectively, computations should be optimized for
speed, for accuracy, or if CuDNN should rather compute the log-softmax instead.
mode : {'instance', 'channel'}
Indicating whether the softmax should be computed per image across 'c01'
or per spatial location '01' per image across 'c'.
"""
......@@ -1306,14 +1327,12 @@ class GpuDnnSoftmax(GpuDnnSoftmaxBase):
"""
Op for the cuDNN Softmax.
algo
'fast', 'accurate' or 'log' indicating whether, respectively,
computations should be optimized for speed, for accuracy, or if CuDNN
should rather compute the log-softmax instead.
mode
'instance' or 'channel' indicating whether the softmax should be
computed per image across 'c01' or per spatial location '01' per
image across 'c'.
algo : {'fast', 'accurate', 'log'}
Indicating whether, respectively, computations should be optimized for
speed, for accuracy, or if CuDNN should rather compute the log-softmax instead.
mode : {'instance', 'channel'}
Indicating whether the softmax should be computed per image across 'c01'
or per spatial location '01' per image across 'c'.
"""
direction = "forward"
......
......@@ -51,6 +51,8 @@ c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
static int
c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
cudnnDataType_t dt;
cudnnStatus_t err;
if (!GpuArray_IS_C_CONTIGUOUS(&var->ga)) {
PyErr_SetString(PyExc_ValueError,
"Only contiguous filters (kernels) are supported.");
......@@ -86,7 +88,12 @@ c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
dims[i] = PyGpuArray_DIM(var, i);
}
cudnnStatus_t err = cudnnSetFilterNdDescriptor(desc, dt, nd, dims);
#if CUDNN_VERSION >= 5000
err = cudnnSetFilterNdDescriptor(desc, dt, CUDNN_TENSOR_NCHW, nd, dims);
#else
err = cudnnSetFilterNdDescriptor(desc, dt, nd, dims);
#endif
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not set filter descriptor: %s.",
......
......@@ -92,12 +92,12 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
}
algo = choice.algo;
#else
size_t free = 0, total = 0;
cudaError_t err2 = cudaMemGetInfo(&free, &total);
if (err2 != cudaSuccess) {
size_t free;
int err2 = c->ops->property(c->ctx, NULL, NULL, GA_CTX_PROP_FREE_GMEM, &free);
if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the "
"memory information on the GPU: %s\n",
cudaGetErrorString(err2));
"memory information on the GPU");
cuda_exit(c->ctx);
return 1;
}
......@@ -154,7 +154,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
int upscale[2];
cudnnConvolutionMode_t mode;
cudnnDataType_t data_type;
err = cudnnGetConvolutionNdDescriptor_v3(desc, 2, &nd, pad, stride,
err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride,
upscale, &mode, &data_type);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
......@@ -193,6 +193,21 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
APPLY_SPECIFIC(output),
algo,
&worksize);
if (err == CUDNN_STATUS_NOT_SUPPORTED) {
// Fallback to none algo if not supported
// TODO: Print a warning
algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
err = cudnnGetConvolutionForwardWorkspaceSize(APPLY_SPECIFIC(_handle),
APPLY_SPECIFIC(input),
APPLY_SPECIFIC(kerns),
desc,
APPLY_SPECIFIC(output),
algo,
&worksize);
}
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"error getting worksize: %s",
......
......@@ -91,12 +91,12 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
algo = choice.algo;
#else
size_t free = 0, total = 0;
cudaError_t err2 = cudaMemGetInfo(&free, &total);
if (err2 != cudaSuccess){
cudaGetLastError();
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the memory "
"information on the GPU: %s\n", cudaGetErrorString(err2));
size_t free;
int err2 = c->ops->property(c->ctx, NULL, NULL, GA_CTX_PROP_FREE_GMEM, &free);
if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the "
"memory information on the GPU");
cuda_exit(c->ctx);
return 1;
}
......@@ -146,7 +146,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
int upscale[2];
cudnnConvolutionMode_t mode;
cudnnDataType_t data_type;
err = cudnnGetConvolutionNdDescriptor_v3(desc, 2, &nd, pad, stride,
err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride,
upscale, &mode, &data_type);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
......@@ -203,7 +203,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
cuda_wait(output->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_wait((*input)->ga.data, GPUARRAY_CUDA_WAIT_WRITE);
err = cudnnConvolutionBackwardData_v3(
err = cudnnConvolutionBackwardData(
APPLY_SPECIFIC(_handle),
alpha_p,
APPLY_SPECIFIC(kerns), PyGpuArray_DEV_DATA(kerns),
......
......@@ -92,12 +92,12 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
algo = choice.algo;
#else
size_t free = 0, total = 0;
cudaError_t err2 = cudaMemGetInfo(&free, &total);
if (err2 != cudaSuccess){
cudaGetLastError();
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the memory "
"information on the GPU: %s\n", cudaGetErrorString(err2));
size_t free;
int err2 = c->ops->property(c->ctx, NULL, NULL, GA_CTX_PROP_FREE_GMEM, &free);
if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the "
"memory information on the GPU");
cuda_exit(c->ctx);
return 1;
}
......@@ -146,7 +146,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
int upscale[2];
cudnnConvolutionMode_t mode;
cudnnDataType_t data_type;
err = cudnnGetConvolutionNdDescriptor_v3(desc, 2, &nd, pad, stride,
err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride,
upscale, &mode, &data_type);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
......@@ -190,7 +190,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
cuda_wait(output->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_wait((*kerns)->ga.data, GPUARRAY_CUDA_WAIT_WRITE);
err = cudnnConvolutionBackwardFilter_v3(
err = cudnnConvolutionBackwardFilter(
APPLY_SPECIFIC(_handle),
alpha_p,
APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(input),
......
......@@ -24,7 +24,7 @@ if ((APPLY_SPECIFIC(err) = cudnnCreateTensorDescriptor(&APPLY_SPECIFIC(output)))
}
if ((APPLY_SPECIFIC(err) = cudnnCreatePoolingDescriptor(&APPLY_SPECIFIC(pool))) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate pooling descriptor"
"(pool): %s", cudnnGetErrorString(APPLY_SPECIFIC(err)));
"(pool): %s", cudnnGetErrorString(APPLY_SPECIFIC(err)));
FAIL;
}
......@@ -38,7 +38,7 @@ if (APPLY_SPECIFIC(pool) != NULL) { cudnnDestroyPoolingDescriptor(APPLY_SPECIFIC
#section support_code_struct
int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
PyArrayObject *ws,
PyArrayObject *ws,
PyArrayObject *stride,
PyArrayObject *pad,
PyGpuArrayObject **out,
......@@ -69,7 +69,12 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
for(int i = 0; i < ndims; i++) {
s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i));
}
#if CUDNN_VERSION >= 5000
err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, CUDNN_PROPAGATE_NAN, ndims, w, p, s);
#else
err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, ndims, w, p, s);
#endif
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor %s", cudnnGetErrorString(err));
......
......@@ -42,7 +42,7 @@ APPLY_SPECIFIC(pool) = NULL;
}
if ((err = cudnnCreatePoolingDescriptor(&APPLY_SPECIFIC(pool))) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate pooling descriptor"
"(pool): %s", cudnnGetErrorString(err));
"(pool): %s", cudnnGetErrorString(err));
FAIL;
}
}
......@@ -60,7 +60,7 @@ if (APPLY_SPECIFIC(pool) != NULL) { cudnnDestroyPoolingDescriptor(APPLY_SPECIFIC
int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
PyGpuArrayObject *out,
PyGpuArrayObject *out_grad,
PyArrayObject *ws,
PyArrayObject *ws,
PyArrayObject *stride,
PyArrayObject *pad,
PyGpuArrayObject **inp_grad,
......@@ -109,7 +109,12 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
for(int i = 0; i < ndims; i++) {
s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i));
}
#if CUDNN_VERSION >= 5000
err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, CUDNN_PROPAGATE_NAN, ndims, w, p, s);
#else
err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, ndims, w, p, s);
#endif
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor %s", cudnnGetErrorString(err));
......
import logging
from nose.plugins.skip import SkipTest
from nose_parameterized import parameterized
import numpy
from itertools import product
from itertools import product, chain
import theano
from six import StringIO
......@@ -18,6 +19,8 @@ from ..basic_ops import GpuAllocEmpty
from .config import mode_with_gpu, mode_without_gpu, test_ctx_name
from . import test_nnet
from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_FWD
def test_dnn_conv_desc_merge():
if not dnn.dnn_available(test_ctx_name):
......@@ -393,6 +396,9 @@ def test_dnn_tag():
class TestDnnInferShapes(utt.InferShapeTester):
border_modes = ['valid', 'full', 'half']
conv_modes = ['conv', 'cross']
def setUp(self):
super(TestDnnInferShapes, self).setUp()
self.mode = mode_with_gpu
......@@ -427,37 +433,25 @@ class TestDnnInferShapes(utt.InferShapeTester):
dnn.GpuDnnSoftmaxGrad
)
def test_conv(self):
def _test_conv(self, img, kerns, out, img_val, kern_vals, border_mode, conv_mode, subsamples, algo):
if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg)
img = T.ftensor4('img')
kerns = T.ftensor4('kerns')
out = T.ftensor4('out')
img_val = numpy.asarray(
numpy.random.rand(7, 2, 6, 4),
dtype='float32'
)
kern_vals = numpy.asarray(
numpy.random.rand(8, 2, 4, 3),
dtype='float32'
)
for params in product(
['valid', 'full', 'half'],
[(1, 1), (2, 2)],
['conv', 'cross']
):
img_val = numpy.asarray(img_val, dtype='float32')
kern_vals = numpy.asarray(kern_vals, dtype='float32')
for subsample in subsamples:
out_vals = numpy.zeros(
dnn.GpuDnnConv.get_out_shape(img_val.shape, kern_vals.shape,
border_mode=params[0],
subsample=params[1]),
border_mode=border_mode,
subsample=subsample),
dtype='float32')
desc = dnn.GpuDnnConvDesc(
border_mode=params[0],
subsample=params[1],
conv_mode=params[2]
border_mode=border_mode,
subsample=subsample,
conv_mode=conv_mode
)(kerns.shape)
conv = dnn.GpuDnnConv()(img, kerns, out, desc)
conv = dnn.GpuDnnConv(algo=algo)(img, kerns, out, desc)
self._compile_and_check(
[img, kerns, out],
[conv],
......@@ -465,54 +459,92 @@ class TestDnnInferShapes(utt.InferShapeTester):
dnn.GpuDnnConv
)
def test_conv_gradw(self):
@parameterized.expand(chain(product([SUPPORTED_DNN_CONV_ALGO_FWD[0]],
border_modes,
conv_modes),
product(SUPPORTED_DNN_CONV_ALGO_FWD[1:],
[border_modes[0]],
[conv_modes[0]])),
testcase_func_name=utt.custom_name_func)
def test_conv(self, algo, border_mode, conv_mode):
if algo == 'winograd' and dnn.version() < 5000:
raise SkipTest(dnn.dnn_available.msg)
self._test_conv(T.ftensor4('img'),
T.ftensor4('kerns'),
T.ftensor4('out'),
numpy.random.rand(7, 2, 8, 4),
numpy.random.rand(8, 2, 4, 3),
border_mode,
conv_mode,
[(1, 1), (2, 2)],
algo)
@parameterized.expand(product(border_modes, conv_modes), utt.custom_name_func)
def test_conv3d_none(self, border_mode, conv_mode):
ftensor5 = T.TensorType(dtype="float32", broadcastable=(False,) * 5)
self._test_conv(ftensor5('img'),
ftensor5('kerns'),
ftensor5('out'),
numpy.random.rand(10, 2, 6, 4, 11),
numpy.random.rand(8, 2, 4, 3, 1),
border_mode,
conv_mode,
[(1, 1, 1), (2, 2, 2)],
'none')
def _test_conv_gradw(self, img, kerns, out, img_val, kern_vals, border_mode, conv_mode, subsample):
if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg)
img = T.ftensor4('img')
kerns = T.ftensor4('kerns')
out = T.ftensor4('out')
img_val = numpy.asarray(
numpy.random.rand(2, 5, 6, 8),
img_val,
dtype='float32'
)
kern_vals = numpy.asarray(
numpy.random.rand(2, 1, 5, 6),
kern_vals,
dtype='float32'
)
for params in product(
['valid', 'full', 'half'],
[(1, 1)], # strides besides (1, 1)
['conv', 'cross']
):
temp_img = img.dimshuffle(1, 0, 2, 3)
temp_kerns = kerns
if params[2] == 'conv':
temp_kerns = temp_kerns[:, :, ::-1, ::-1]
temp_kerns = temp_kerns.dimshuffle(1, 0, 2, 3)
shape = (
kern_vals.shape[1], img_val.shape[1],
img_val.shape[2] - kern_vals.shape[2] + 1,
img_val.shape[3] - kern_vals.shape[3] + 1
)
out_vals = numpy.zeros(shape, dtype='float32')
desc = dnn.GpuDnnConvDesc(
border_mode=params[0],
subsample=params[1],
conv_mode=params[2]
)(out.shape)
conv_grad_w = dnn.GpuDnnConvGradW()(
temp_img,
temp_kerns,
out,
desc,
)
self._compile_and_check(
[temp_img, temp_kerns, out],
[conv_grad_w],
[img_val, kern_vals, out_vals],
dnn.GpuDnnConvGradW
)
temp_img = img.dimshuffle(1, 0, 2, 3)
temp_kerns = kerns
if conv_mode == 'conv':
temp_kerns = temp_kerns[:, :, ::-1, ::-1]
temp_kerns = temp_kerns.dimshuffle(1, 0, 2, 3)
shape = (
kern_vals.shape[1], img_val.shape[1],
img_val.shape[2] - kern_vals.shape[2] + 1,
img_val.shape[3] - kern_vals.shape[3] + 1
)
out_vals = numpy.zeros(shape, dtype='float32')
desc = dnn.GpuDnnConvDesc(
border_mode=border_mode,
subsample=subsample,
conv_mode=conv_mode
)(out.shape)
conv_grad_w = dnn.GpuDnnConvGradW()(
temp_img,
temp_kerns,
out,
desc,
)
self._compile_and_check(
[temp_img, temp_kerns, out],
[conv_grad_w],
[img_val, kern_vals, out_vals],
dnn.GpuDnnConvGradW
)
@parameterized.expand(product(border_modes, conv_modes), utt.custom_name_func)
def test_conv_gradw(self, border_mode, conv_mode):
self._test_conv_gradw(T.ftensor4('img'),
T.ftensor4('kerns'),
T.ftensor4('out'),
numpy.random.rand(2, 5, 6, 8),
numpy.random.rand(2, 1, 5, 6),
border_mode,
conv_mode,
(1, 1))
def test_conv_gradi(self):
if not dnn.dnn_available(test_ctx_name):
......
......@@ -4,6 +4,7 @@ from functools import wraps
import logging
import sys
import unittest
from nose_parameterized import parameterized
from six import integer_types
from six.moves import StringIO
......@@ -31,6 +32,13 @@ except ImportError:
_logger = logging.getLogger("theano.tests.unittest_tools")
def custom_name_func(testcase_func, param_num, param):
return "%s_%s" % (
testcase_func.__name__,
parameterized.to_safe_name("_".join(str(x) for x in param.args)),
)
def fetch_seed(pseed=None):
"""
Returns the seed to use for running the unit tests.
......@@ -96,6 +104,7 @@ verify_grad.E_grad = T.verify_grad.E_grad
class TestOptimizationMixin(object):
def assertFunctionContains(self, f, op, min=1, max=sys.maxsize):
toposort = f.maker.fgraph.toposort()
matches = [node for node in toposort if node.op == op]
......@@ -172,6 +181,7 @@ class T_OpContractMixin(object):
class InferShapeTester(unittest.TestCase):
def setUp(self):
seed_rng()
# Take into account any mode that may be defined in a child class
......@@ -311,6 +321,7 @@ def str_diagnostic(expected, value, rtol, atol):
class WrongValue(Exception):
def __init__(self, expected_val, val, rtol, atol):
Exception.__init__(self) # to be compatible with python2.4
self.val1 = expected_val
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论