提交 ef0fc56e authored 作者: Frédéric Bastien's avatar Frédéric Bastien 提交者: Frederic Bastien

Merge pull request #4344 from mgermain/cudnn5

Added cuDNN v5 support & Added optional dependencies to setup.py Conflicts: .travis.yml doc/install.txt
上级 02453383
...@@ -37,6 +37,7 @@ install: ...@@ -37,6 +37,7 @@ install:
- source activate pyenv - source activate pyenv
- if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install pydot; fi - if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install pydot; fi
- pip install . --no-deps - pip install . --no-deps
- pip install nose-parameterized==0.5.0
# command to run tests # command to run tests
env: env:
......
...@@ -49,7 +49,7 @@ instructions below for detailed installation steps): ...@@ -49,7 +49,7 @@ instructions below for detailed installation steps):
The following libraries and software are optional: The following libraries and software are optional:
`nose <http://somethingaboutorange.com/mrl/projects/nose/>`_ >= 1.3.0 `nose <http://nose.readthedocs.org/en/latest/>`_ >= 1.3.0 and `nose-parameterized <https://pypi.python.org/pypi/nose-parameterized/>`_ >= 0.5.0
Recommended, to run Theano's test-suite. Recommended, to run Theano's test-suite.
`Sphinx <http://sphinx.pocoo.org/>`_ >= 0.5.1, `pygments <http://pygments.org/>`_ `Sphinx <http://sphinx.pocoo.org/>`_ >= 0.5.1, `pygments <http://pygments.org/>`_
......
sphinx>=1.3.0 sphinx>=1.3.0
pygments
nose>=1.3.0 nose>=1.3.0
nose-parameterized>=0.5.0
...@@ -163,6 +163,11 @@ def do_setup(): ...@@ -163,6 +163,11 @@ def do_setup():
packages=find_packages(), packages=find_packages(),
# 1.7.0 give too much warning related to numpy.diagonal. # 1.7.0 give too much warning related to numpy.diagonal.
install_requires=['numpy>=1.7.1', 'scipy>=0.11', 'six>=1.9.0'], install_requires=['numpy>=1.7.1', 'scipy>=0.11', 'six>=1.9.0'],
# pygments is a dependency for Sphinx code highlight
extras_require={
'test': ['nose>=1.3.0', 'nose-parameterized>=0.5.0'],
'doc': ['Sphinx>=0.5.1', 'pygments']
},
package_data={ package_data={
'': ['*.txt', '*.rst', '*.cu', '*.cuh', '*.c', '*.sh', '*.pkl', '': ['*.txt', '*.rst', '*.cu', '*.cuh', '*.c', '*.sh', '*.pkl',
'*.h', '*.cpp', 'ChangeLog'], '*.h', '*.cpp', 'ChangeLog'],
......
...@@ -286,6 +286,20 @@ def safe_no_dnn_algo_bwd(algo): ...@@ -286,6 +286,20 @@ def safe_no_dnn_algo_bwd(algo):
'`dnn.conv.algo_bwd_filter` and `dnn.conv.algo_bwd_data` instead.') '`dnn.conv.algo_bwd_filter` and `dnn.conv.algo_bwd_data` instead.')
return True return True
# Those are the supported algorithm by Theano,
# The tests will reference those lists.
SUPPORTED_DNN_CONV_ALGO_FWD = ('small', 'none', 'large', 'fft', 'fft_tiling',
'winograd', 'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change')
SUPPORTED_DNN_CONV_ALGO_BWD_DATA = ('none', 'deterministic', 'fft', 'fft_tiling',
'winograd', 'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change')
SUPPORTED_DNN_CONV_ALGO_BWD_FILTER = ('none', 'deterministic', 'fft', 'small',
'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change')
AddConfigVar('dnn.conv.algo_bwd', AddConfigVar('dnn.conv.algo_bwd',
"This flag is deprecated; use dnn.conv.algo_bwd_data and " "This flag is deprecated; use dnn.conv.algo_bwd_data and "
"dnn.conv.algo_bwd_filter.", "dnn.conv.algo_bwd_filter.",
...@@ -295,26 +309,20 @@ AddConfigVar('dnn.conv.algo_bwd', ...@@ -295,26 +309,20 @@ AddConfigVar('dnn.conv.algo_bwd',
AddConfigVar('dnn.conv.algo_fwd', AddConfigVar('dnn.conv.algo_fwd',
"Default implementation to use for CuDNN forward convolution.", "Default implementation to use for CuDNN forward convolution.",
EnumStr('small', 'none', 'large', 'fft', 'fft_tiling', EnumStr(*SUPPORTED_DNN_CONV_ALGO_FWD),
'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change'),
in_c_key=False) in_c_key=False)
AddConfigVar('dnn.conv.algo_bwd_data', AddConfigVar('dnn.conv.algo_bwd_data',
"Default implementation to use for CuDNN backward convolution to " "Default implementation to use for CuDNN backward convolution to "
"get the gradients of the convolution with regard to the inputs.", "get the gradients of the convolution with regard to the inputs.",
EnumStr('none', 'deterministic', 'fft', 'fft_tiling', EnumStr(*SUPPORTED_DNN_CONV_ALGO_BWD_DATA),
'guess_once', 'guess_on_shape_change', 'time_once',
'time_on_shape_change'),
in_c_key=False) in_c_key=False)
AddConfigVar('dnn.conv.algo_bwd_filter', AddConfigVar('dnn.conv.algo_bwd_filter',
"Default implementation to use for CuDNN backward convolution to " "Default implementation to use for CuDNN backward convolution to "
"get the gradients of the convolution with regard to the " "get the gradients of the convolution with regard to the "
"filters.", "filters.",
EnumStr('none', 'deterministic', 'fft', 'small', 'guess_once', EnumStr(*SUPPORTED_DNN_CONV_ALGO_BWD_FILTER),
'guess_on_shape_change', 'time_once',
'time_on_shape_change'),
in_c_key=False) in_c_key=False)
AddConfigVar('dnn.conv.precision', AddConfigVar('dnn.conv.precision',
......
差异被折叠。
...@@ -54,8 +54,11 @@ c_set_filterNd(CudaNdarray *var, cudnnFilterDescriptor_t desc) { ...@@ -54,8 +54,11 @@ c_set_filterNd(CudaNdarray *var, cudnnFilterDescriptor_t desc) {
return -1; return -1;
} }
int dim = CudaNdarray_NDIM(var); int dim = CudaNdarray_NDIM(var);
cudnnStatus_t err = cudnnSetFilterNdDescriptor(desc, CUDNN_DATA_FLOAT, dim, cudnnStatus_t err = cudnnSetFilterNdDescriptor_v4(desc,
CudaNdarray_HOST_DIMS(var)); CUDNN_DATA_FLOAT,
CUDNN_TENSOR_NCHW,
dim,
CudaNdarray_HOST_DIMS(var));
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"Could not set filter descriptor: %s." "Could not set filter descriptor: %s."
......
...@@ -179,8 +179,8 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, ...@@ -179,8 +179,8 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
int upscale[2]; int upscale[2];
cudnnConvolutionMode_t mode; cudnnConvolutionMode_t mode;
cudnnDataType_t data_type; cudnnDataType_t data_type;
err = cudnnGetConvolutionNdDescriptor_v3(desc, 2, &nd, pad, stride, err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride,
upscale, &mode, &data_type); upscale, &mode, &data_type);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
...@@ -224,6 +224,19 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, ...@@ -224,6 +224,19 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
APPLY_SPECIFIC(output), APPLY_SPECIFIC(output),
chosen_algo, chosen_algo,
&worksize); &worksize);
if (err == CUDNN_STATUS_NOT_SUPPORTED) {
// Fallback to none algo if not supported
// TODO: Print a warning
chosen_algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
err = cudnnGetConvolutionForwardWorkspaceSize(_handle,
APPLY_SPECIFIC(input),
APPLY_SPECIFIC(kerns),
desc,
APPLY_SPECIFIC(output),
chosen_algo,
&worksize);
}
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"GpuDnnConv: error getting worksize: %s", "GpuDnnConv: error getting worksize: %s",
......
...@@ -178,8 +178,8 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output, ...@@ -178,8 +178,8 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
int upscale[2]; int upscale[2];
cudnnConvolutionMode_t mode; cudnnConvolutionMode_t mode;
cudnnDataType_t data_type; cudnnDataType_t data_type;
err = cudnnGetConvolutionNdDescriptor_v3(desc, 2, &nd, pad, stride, err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride,
upscale, &mode, &data_type); upscale, &mode, &data_type);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
...@@ -237,7 +237,7 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output, ...@@ -237,7 +237,7 @@ APPLY_SPECIFIC(conv_gi)(CudaNdarray *kerns, CudaNdarray *output,
return 1; return 1;
// Perform the convolution // Perform the convolution
err = cudnnConvolutionBackwardData_v3( err = cudnnConvolutionBackwardData(
_handle, _handle,
(void *)&alpha, (void *)&alpha,
APPLY_SPECIFIC(kerns), CudaNdarray_DEV_DATA(kerns), APPLY_SPECIFIC(kerns), CudaNdarray_DEV_DATA(kerns),
......
...@@ -173,8 +173,8 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output, ...@@ -173,8 +173,8 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
int upscale[2]; int upscale[2];
cudnnConvolutionMode_t mode; cudnnConvolutionMode_t mode;
cudnnDataType_t data_type; cudnnDataType_t data_type;
err = cudnnGetConvolutionNdDescriptor_v3(desc, 2, &nd, pad, stride, err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride,
upscale, &mode, &data_type); upscale, &mode, &data_type);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
...@@ -221,7 +221,7 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output, ...@@ -221,7 +221,7 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
return 1; return 1;
// Perform the convolution // Perform the convolution
err = cudnnConvolutionBackwardFilter_v3( err = cudnnConvolutionBackwardFilter(
_handle, _handle,
(void *)&alpha, (void *)&alpha,
APPLY_SPECIFIC(input), CudaNdarray_DEV_DATA(input), APPLY_SPECIFIC(input), CudaNdarray_DEV_DATA(input),
......
...@@ -392,8 +392,9 @@ def test_pooling_with_tensor_vars(): ...@@ -392,8 +392,9 @@ def test_pooling_with_tensor_vars():
def test_old_pool_interface(): def test_old_pool_interface():
if not cuda.dnn.dnn_available(): if not cuda.dnn.dnn_available() or cuda.dnn.version() > (5000, 5000):
raise SkipTest(cuda.dnn.dnn_available.msg) raise SkipTest(cuda.dnn.dnn_available.msg)
testfile_dir = os.path.dirname(os.path.realpath(__file__)) testfile_dir = os.path.dirname(os.path.realpath(__file__))
fname = 'old_pool_interface.pkl' fname = 'old_pool_interface.pkl'
with open(os.path.join(testfile_dir, fname), 'rb') as fp: with open(os.path.join(testfile_dir, fname), 'rb') as fp:
......
...@@ -35,7 +35,7 @@ int APPLY_SPECIFIC(conv_desc)(PyArrayObject *filt_shp, ...@@ -35,7 +35,7 @@ int APPLY_SPECIFIC(conv_desc)(PyArrayObject *filt_shp,
return -1; return -1;
} }
err = cudnnSetConvolutionNdDescriptor_v3(*desc, NB_DIMS, pad, strides, err = cudnnSetConvolutionNdDescriptor(*desc, NB_DIMS, pad, strides,
upscale, CONV_MODE, PRECISION); upscale, CONV_MODE, PRECISION);
return 0; return 0;
} }
...@@ -33,6 +33,8 @@ from .nnet import GpuSoftmax ...@@ -33,6 +33,8 @@ from .nnet import GpuSoftmax
from .opt import gpu_seqopt, register_opt, conv_groupopt, op_lifter from .opt import gpu_seqopt, register_opt, conv_groupopt, op_lifter
from .opt_util import alpha_merge, output_merge, inplace_allocempty from .opt_util import alpha_merge, output_merge, inplace_allocempty
from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_BWD_FILTER
def raise_no_cudnn(msg="CuDNN is required for convolution and pooling"): def raise_no_cudnn(msg="CuDNN is required for convolution and pooling"):
raise RuntimeError(msg) raise RuntimeError(msg)
...@@ -232,6 +234,7 @@ def version(raises=True): ...@@ -232,6 +234,7 @@ def version(raises=True):
:raises: If True, raise an exception if CuDNN is not present or badly installed. :raises: If True, raise an exception if CuDNN is not present or badly installed.
Otherwise, return -1. Otherwise, return -1.
""" """
if not dnn_present(): if not dnn_present():
if raises: if raises:
...@@ -397,9 +400,9 @@ class GpuDnnConv(DnnBase): ...@@ -397,9 +400,9 @@ class GpuDnnConv(DnnBase):
---------- ----------
image image
kernel kernel
descr descr :
The convolution descriptor. The convolution descriptor.
algo : {'small', 'none', 'large', 'fft', 'fft_tiling', 'guess_once', algo : {'small', 'none', 'large', 'fft', 'fft_tiling', 'winograd', 'guess_once',
'guess_on_shape_change', 'time_once', 'time_on_shape_change'} 'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Default is the value of :attr:`config.dnn.conv.algo_fwd`. Default is the value of :attr:`config.dnn.conv.algo_fwd`.
...@@ -435,8 +438,12 @@ class GpuDnnConv(DnnBase): ...@@ -435,8 +438,12 @@ class GpuDnnConv(DnnBase):
raise RuntimeError("CuDNN tiled-FFT convolution requires " raise RuntimeError("CuDNN tiled-FFT convolution requires "
"CuDNN v4 or more recent") "CuDNN v4 or more recent")
if version() < 5000 and self.algo == 'winograd':
raise RuntimeError("CuDNN winograd convolution requires "
"CuDNN v5 or more recent")
assert self.algo in ['none', 'small', 'large', 'fft', 'fft_tiling', assert self.algo in ['none', 'small', 'large', 'fft', 'fft_tiling',
'guess_once', 'guess_on_shape_change', 'winograd', 'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change'] 'time_once', 'time_on_shape_change']
def __setstate__(self, d): def __setstate__(self, d):
...@@ -468,6 +475,9 @@ class GpuDnnConv(DnnBase): ...@@ -468,6 +475,9 @@ class GpuDnnConv(DnnBase):
elif self.algo == 'fft_tiling': elif self.algo == 'fft_tiling':
# need v4 # need v4
alg = 'CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING' alg = 'CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING'
elif self.algo == 'winograd':
# need v5
alg = 'CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD'
defs.append(('CONV_ALGO', alg)) defs.append(('CONV_ALGO', alg))
if self.algo in ['guess_once', 'guess_on_shape_change', if self.algo in ['guess_once', 'guess_on_shape_change',
...@@ -565,8 +575,11 @@ class GpuDnnConvGradW(DnnBase): ...@@ -565,8 +575,11 @@ class GpuDnnConvGradW(DnnBase):
---------- ----------
image image
kernel kernel
descr descr :
The convolution descriptor. The convolution descriptor.
algo : {'none', 'deterministic', 'fft', 'small', 'guess_once',
'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Default is the value of :attr:`config.dnn.conv.algo_bwd_filter`.
""" """
...@@ -582,9 +595,7 @@ class GpuDnnConvGradW(DnnBase): ...@@ -582,9 +595,7 @@ class GpuDnnConvGradW(DnnBase):
algo = config.dnn.conv.algo_bwd_filter algo = config.dnn.conv.algo_bwd_filter
self.algo = algo self.algo = algo
assert self.algo in ['none', 'deterministic', 'fft', 'small', assert self.algo in SUPPORTED_DNN_CONV_ALGO_BWD_FILTER
'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change']
def __setstate__(self, d): def __setstate__(self, d):
self.__dict__.update(d) self.__dict__.update(d)
...@@ -688,6 +699,9 @@ class GpuDnnConvGradI(DnnBase): ...@@ -688,6 +699,9 @@ class GpuDnnConvGradI(DnnBase):
kernel kernel
descr descr
The convolution descriptor. The convolution descriptor.
algo : {'none', 'deterministic', 'fft', 'fft_tiling', 'winograd', 'guess_once',
'guess_on_shape_change', 'time_once', 'time_on_shape_change'}
Default is the value of :attr:`config.dnn.conv.algo_bwd_data`.
""" """
...@@ -708,9 +722,12 @@ class GpuDnnConvGradI(DnnBase): ...@@ -708,9 +722,12 @@ class GpuDnnConvGradI(DnnBase):
if version() < 4000 and self.algo == 'fft_tiling': if version() < 4000 and self.algo == 'fft_tiling':
raise RuntimeError("CuDNN's tiled-FFT convolution requires CuDNN " raise RuntimeError("CuDNN's tiled-FFT convolution requires CuDNN "
"v4 or more recent") "v4 or more recent")
if version() < 5000 and self.algo == 'winograd':
raise RuntimeError("CuDNN's winograd convolution requires CuDNN "
"v5 or more recent")
assert self.algo in ['none', 'deterministic', 'fft', 'fft_tiling', assert self.algo in ['none', 'deterministic', 'fft', 'fft_tiling',
'guess_once', 'guess_on_shape_change', 'winograd', 'guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change'] 'time_once', 'time_on_shape_change']
def __setstate__(self, d): def __setstate__(self, d):
...@@ -749,13 +766,16 @@ class GpuDnnConvGradI(DnnBase): ...@@ -749,13 +766,16 @@ class GpuDnnConvGradI(DnnBase):
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0' alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0'
if self.algo == 'none': if self.algo == 'none':
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0' alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0'
if self.algo == 'deterministic': elif self.algo == 'deterministic':
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_1' alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_1'
if self.algo == 'fft': elif self.algo == 'fft':
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT' alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT'
if self.algo == 'fft_tiling': elif self.algo == 'fft_tiling':
# big workspace but less than fft # big workspace but less than fft
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING' alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING'
elif self.algo == 'winograd':
# need v5
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD'
if self.algo in ['guess_once', 'guess_on_shape_change', if self.algo in ['guess_once', 'guess_on_shape_change',
'time_once', 'time_on_shape_change']: 'time_once', 'time_on_shape_change']:
...@@ -1047,9 +1067,13 @@ class GpuDnnPoolDesc(Op): ...@@ -1047,9 +1067,13 @@ class GpuDnnPoolDesc(Op):
static const int win[%(nd)d] = {%(win)s}; static const int win[%(nd)d] = {%(win)s};
static const int pad[%(nd)d] = {%(pad)s}; static const int pad[%(nd)d] = {%(pad)s};
static const int str[%(nd)d] = {%(str)s}; static const int str[%(nd)d] = {%(str)s};
err = cudnnSetPoolingNdDescriptor(
%(desc)s, %(mode_flag)s, %(nd)d, #if CUDNN_VERSION >= 5000
win, pad, str); err = cudnnSetPoolingNdDescriptor(%(desc)s, %(mode_flag)s, CUDNN_PROPAGATE_NAN, %(nd)d, win, pad, str);
#else
err = cudnnSetPoolingNdDescriptor(%(desc)s, %(mode_flag)s, %(nd)d, win, pad, str);
#endif
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor: %%s", PyErr_Format(PyExc_RuntimeError, "could not set op descriptor: %%s",
cudnnGetErrorString(err)); cudnnGetErrorString(err));
...@@ -1062,7 +1086,7 @@ class GpuDnnPoolDesc(Op): ...@@ -1062,7 +1086,7 @@ class GpuDnnPoolDesc(Op):
str=', '.join(map(str, self.stride))) str=', '.join(map(str, self.stride)))
def c_code_cache_version(self): def c_code_cache_version(self):
return (3, version()) return (4, version())
class GpuDnnPool(DnnBase): class GpuDnnPool(DnnBase):
...@@ -1070,18 +1094,17 @@ class GpuDnnPool(DnnBase): ...@@ -1070,18 +1094,17 @@ class GpuDnnPool(DnnBase):
""" """
Parameters Parameters
---------- ----------
img img : tensor
The image 4d or 5d tensor. The image 4d or 5d tensor.
Parameters ws : tensor
----------
ws : tensor variable
Window size. Window size.
stride : tensor variable stride : tensor
(dx, dy) or (dx, dy, dz). (dx, dy) or (dx, dy, dz).
mode : {'max', 'average_inc_pad', 'average_exc_pad'} mode : {'max', 'average_inc_pad', 'average_exc_pad'}
The old deprecated name 'average' corresponds to 'average_inc_pad'. The old deprecated name 'average' corresponds to 'average_inc_pad'.
pad : tensor pad : tensor
(padX, padY) or (padX, padY, padZ) (padX, padY) or (padX, padY, padZ)
""" """
__props__ = ('mode',) __props__ = ('mode',)
...@@ -1255,14 +1278,12 @@ class GpuDnnSoftmaxBase(DnnBase): ...@@ -1255,14 +1278,12 @@ class GpuDnnSoftmaxBase(DnnBase):
Parameters Parameters
---------- ----------
algo algo : {'fast', 'accurate', 'log'}
'fast', 'accurate' or 'log' indicating whether, respectively, Indicating whether, respectively, computations should be optimized for
computations should be optimized for speed, for accuracy, or if CuDNN speed, for accuracy, or if CuDNN should rather compute the log-softmax instead.
should rather compute the log-softmax instead. mode : {'instance', 'channel'}
mode Indicating whether the softmax should be computed per image across 'c01'
'instance' or 'channel' indicating whether the softmax should be or per spatial location '01' per image across 'c'.
computed per image across 'c01' or per spatial location '01' per
image across 'c'.
""" """
...@@ -1306,14 +1327,12 @@ class GpuDnnSoftmax(GpuDnnSoftmaxBase): ...@@ -1306,14 +1327,12 @@ class GpuDnnSoftmax(GpuDnnSoftmaxBase):
""" """
Op for the cuDNN Softmax. Op for the cuDNN Softmax.
algo algo : {'fast', 'accurate', 'log'}
'fast', 'accurate' or 'log' indicating whether, respectively, Indicating whether, respectively, computations should be optimized for
computations should be optimized for speed, for accuracy, or if CuDNN speed, for accuracy, or if CuDNN should rather compute the log-softmax instead.
should rather compute the log-softmax instead. mode : {'instance', 'channel'}
mode Indicating whether the softmax should be computed per image across 'c01'
'instance' or 'channel' indicating whether the softmax should be or per spatial location '01' per image across 'c'.
computed per image across 'c01' or per spatial location '01' per
image across 'c'.
""" """
direction = "forward" direction = "forward"
......
...@@ -51,6 +51,8 @@ c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) { ...@@ -51,6 +51,8 @@ c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
static int static int
c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) { c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
cudnnDataType_t dt; cudnnDataType_t dt;
cudnnStatus_t err;
if (!GpuArray_IS_C_CONTIGUOUS(&var->ga)) { if (!GpuArray_IS_C_CONTIGUOUS(&var->ga)) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"Only contiguous filters (kernels) are supported."); "Only contiguous filters (kernels) are supported.");
...@@ -86,7 +88,12 @@ c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) { ...@@ -86,7 +88,12 @@ c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
dims[i] = PyGpuArray_DIM(var, i); dims[i] = PyGpuArray_DIM(var, i);
} }
cudnnStatus_t err = cudnnSetFilterNdDescriptor(desc, dt, nd, dims); #if CUDNN_VERSION >= 5000
err = cudnnSetFilterNdDescriptor(desc, dt, CUDNN_TENSOR_NCHW, nd, dims);
#else
err = cudnnSetFilterNdDescriptor(desc, dt, nd, dims);
#endif
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"Could not set filter descriptor: %s.", "Could not set filter descriptor: %s.",
......
...@@ -92,12 +92,12 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -92,12 +92,12 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
} }
algo = choice.algo; algo = choice.algo;
#else #else
size_t free = 0, total = 0; size_t free;
cudaError_t err2 = cudaMemGetInfo(&free, &total); int err2 = c->ops->property(c->ctx, NULL, NULL, GA_CTX_PROP_FREE_GMEM, &free);
if (err2 != cudaSuccess) {
if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the " PyErr_Format(PyExc_RuntimeError, "Error when trying to find the "
"memory information on the GPU: %s\n", "memory information on the GPU");
cudaGetErrorString(err2));
cuda_exit(c->ctx); cuda_exit(c->ctx);
return 1; return 1;
} }
...@@ -154,7 +154,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -154,7 +154,7 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
int upscale[2]; int upscale[2];
cudnnConvolutionMode_t mode; cudnnConvolutionMode_t mode;
cudnnDataType_t data_type; cudnnDataType_t data_type;
err = cudnnGetConvolutionNdDescriptor_v3(desc, 2, &nd, pad, stride, err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride,
upscale, &mode, &data_type); upscale, &mode, &data_type);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
...@@ -193,6 +193,21 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -193,6 +193,21 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
APPLY_SPECIFIC(output), APPLY_SPECIFIC(output),
algo, algo,
&worksize); &worksize);
if (err == CUDNN_STATUS_NOT_SUPPORTED) {
// Fallback to none algo if not supported
// TODO: Print a warning
algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
err = cudnnGetConvolutionForwardWorkspaceSize(APPLY_SPECIFIC(_handle),
APPLY_SPECIFIC(input),
APPLY_SPECIFIC(kerns),
desc,
APPLY_SPECIFIC(output),
algo,
&worksize);
}
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"error getting worksize: %s", "error getting worksize: %s",
......
...@@ -91,12 +91,12 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -91,12 +91,12 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
algo = choice.algo; algo = choice.algo;
#else #else
size_t free = 0, total = 0; size_t free;
cudaError_t err2 = cudaMemGetInfo(&free, &total); int err2 = c->ops->property(c->ctx, NULL, NULL, GA_CTX_PROP_FREE_GMEM, &free);
if (err2 != cudaSuccess){
cudaGetLastError(); if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the memory " PyErr_Format(PyExc_RuntimeError, "Error when trying to find the "
"information on the GPU: %s\n", cudaGetErrorString(err2)); "memory information on the GPU");
cuda_exit(c->ctx); cuda_exit(c->ctx);
return 1; return 1;
} }
...@@ -146,7 +146,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -146,7 +146,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
int upscale[2]; int upscale[2];
cudnnConvolutionMode_t mode; cudnnConvolutionMode_t mode;
cudnnDataType_t data_type; cudnnDataType_t data_type;
err = cudnnGetConvolutionNdDescriptor_v3(desc, 2, &nd, pad, stride, err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride,
upscale, &mode, &data_type); upscale, &mode, &data_type);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
...@@ -203,7 +203,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -203,7 +203,7 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
cuda_wait(output->ga.data, GPUARRAY_CUDA_WAIT_READ); cuda_wait(output->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_wait((*input)->ga.data, GPUARRAY_CUDA_WAIT_WRITE); cuda_wait((*input)->ga.data, GPUARRAY_CUDA_WAIT_WRITE);
err = cudnnConvolutionBackwardData_v3( err = cudnnConvolutionBackwardData(
APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(_handle),
alpha_p, alpha_p,
APPLY_SPECIFIC(kerns), PyGpuArray_DEV_DATA(kerns), APPLY_SPECIFIC(kerns), PyGpuArray_DEV_DATA(kerns),
......
...@@ -92,12 +92,12 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -92,12 +92,12 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
algo = choice.algo; algo = choice.algo;
#else #else
size_t free = 0, total = 0; size_t free;
cudaError_t err2 = cudaMemGetInfo(&free, &total); int err2 = c->ops->property(c->ctx, NULL, NULL, GA_CTX_PROP_FREE_GMEM, &free);
if (err2 != cudaSuccess){
cudaGetLastError(); if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the memory " PyErr_Format(PyExc_RuntimeError, "Error when trying to find the "
"information on the GPU: %s\n", cudaGetErrorString(err2)); "memory information on the GPU");
cuda_exit(c->ctx); cuda_exit(c->ctx);
return 1; return 1;
} }
...@@ -146,7 +146,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -146,7 +146,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
int upscale[2]; int upscale[2];
cudnnConvolutionMode_t mode; cudnnConvolutionMode_t mode;
cudnnDataType_t data_type; cudnnDataType_t data_type;
err = cudnnGetConvolutionNdDescriptor_v3(desc, 2, &nd, pad, stride, err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride,
upscale, &mode, &data_type); upscale, &mode, &data_type);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
...@@ -190,7 +190,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -190,7 +190,7 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
cuda_wait(output->ga.data, GPUARRAY_CUDA_WAIT_READ); cuda_wait(output->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_wait((*kerns)->ga.data, GPUARRAY_CUDA_WAIT_WRITE); cuda_wait((*kerns)->ga.data, GPUARRAY_CUDA_WAIT_WRITE);
err = cudnnConvolutionBackwardFilter_v3( err = cudnnConvolutionBackwardFilter(
APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(_handle),
alpha_p, alpha_p,
APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(input), APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(input),
......
...@@ -24,7 +24,7 @@ if ((APPLY_SPECIFIC(err) = cudnnCreateTensorDescriptor(&APPLY_SPECIFIC(output))) ...@@ -24,7 +24,7 @@ if ((APPLY_SPECIFIC(err) = cudnnCreateTensorDescriptor(&APPLY_SPECIFIC(output)))
} }
if ((APPLY_SPECIFIC(err) = cudnnCreatePoolingDescriptor(&APPLY_SPECIFIC(pool))) != CUDNN_STATUS_SUCCESS) { if ((APPLY_SPECIFIC(err) = cudnnCreatePoolingDescriptor(&APPLY_SPECIFIC(pool))) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate pooling descriptor" PyErr_Format(PyExc_MemoryError, "could not allocate pooling descriptor"
"(pool): %s", cudnnGetErrorString(APPLY_SPECIFIC(err))); "(pool): %s", cudnnGetErrorString(APPLY_SPECIFIC(err)));
FAIL; FAIL;
} }
...@@ -38,7 +38,7 @@ if (APPLY_SPECIFIC(pool) != NULL) { cudnnDestroyPoolingDescriptor(APPLY_SPECIFIC ...@@ -38,7 +38,7 @@ if (APPLY_SPECIFIC(pool) != NULL) { cudnnDestroyPoolingDescriptor(APPLY_SPECIFIC
#section support_code_struct #section support_code_struct
int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img, int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
PyArrayObject *ws, PyArrayObject *ws,
PyArrayObject *stride, PyArrayObject *stride,
PyArrayObject *pad, PyArrayObject *pad,
PyGpuArrayObject **out, PyGpuArrayObject **out,
...@@ -69,7 +69,12 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img, ...@@ -69,7 +69,12 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
for(int i = 0; i < ndims; i++) { for(int i = 0; i < ndims; i++) {
s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i)); s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i));
} }
#if CUDNN_VERSION >= 5000
err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, CUDNN_PROPAGATE_NAN, ndims, w, p, s);
#else
err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, ndims, w, p, s); err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, ndims, w, p, s);
#endif
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor %s", cudnnGetErrorString(err)); PyErr_Format(PyExc_RuntimeError, "could not set op descriptor %s", cudnnGetErrorString(err));
......
...@@ -42,7 +42,7 @@ APPLY_SPECIFIC(pool) = NULL; ...@@ -42,7 +42,7 @@ APPLY_SPECIFIC(pool) = NULL;
} }
if ((err = cudnnCreatePoolingDescriptor(&APPLY_SPECIFIC(pool))) != CUDNN_STATUS_SUCCESS) { if ((err = cudnnCreatePoolingDescriptor(&APPLY_SPECIFIC(pool))) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate pooling descriptor" PyErr_Format(PyExc_MemoryError, "could not allocate pooling descriptor"
"(pool): %s", cudnnGetErrorString(err)); "(pool): %s", cudnnGetErrorString(err));
FAIL; FAIL;
} }
} }
...@@ -60,7 +60,7 @@ if (APPLY_SPECIFIC(pool) != NULL) { cudnnDestroyPoolingDescriptor(APPLY_SPECIFIC ...@@ -60,7 +60,7 @@ if (APPLY_SPECIFIC(pool) != NULL) { cudnnDestroyPoolingDescriptor(APPLY_SPECIFIC
int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp, int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
PyGpuArrayObject *out, PyGpuArrayObject *out,
PyGpuArrayObject *out_grad, PyGpuArrayObject *out_grad,
PyArrayObject *ws, PyArrayObject *ws,
PyArrayObject *stride, PyArrayObject *stride,
PyArrayObject *pad, PyArrayObject *pad,
PyGpuArrayObject **inp_grad, PyGpuArrayObject **inp_grad,
...@@ -109,7 +109,12 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp, ...@@ -109,7 +109,12 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
for(int i = 0; i < ndims; i++) { for(int i = 0; i < ndims; i++) {
s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i)); s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i));
} }
#if CUDNN_VERSION >= 5000
err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, CUDNN_PROPAGATE_NAN, ndims, w, p, s);
#else
err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, ndims, w, p, s); err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, ndims, w, p, s);
#endif
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor %s", cudnnGetErrorString(err)); PyErr_Format(PyExc_RuntimeError, "could not set op descriptor %s", cudnnGetErrorString(err));
......
import logging import logging
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
from nose_parameterized import parameterized
import numpy import numpy
from itertools import product from itertools import product, chain
import theano import theano
from six import StringIO from six import StringIO
...@@ -18,6 +19,8 @@ from ..basic_ops import GpuAllocEmpty ...@@ -18,6 +19,8 @@ from ..basic_ops import GpuAllocEmpty
from .config import mode_with_gpu, mode_without_gpu, test_ctx_name from .config import mode_with_gpu, mode_without_gpu, test_ctx_name
from . import test_nnet from . import test_nnet
from theano.configdefaults import SUPPORTED_DNN_CONV_ALGO_FWD
def test_dnn_conv_desc_merge(): def test_dnn_conv_desc_merge():
if not dnn.dnn_available(test_ctx_name): if not dnn.dnn_available(test_ctx_name):
...@@ -393,6 +396,9 @@ def test_dnn_tag(): ...@@ -393,6 +396,9 @@ def test_dnn_tag():
class TestDnnInferShapes(utt.InferShapeTester): class TestDnnInferShapes(utt.InferShapeTester):
border_modes = ['valid', 'full', 'half']
conv_modes = ['conv', 'cross']
def setUp(self): def setUp(self):
super(TestDnnInferShapes, self).setUp() super(TestDnnInferShapes, self).setUp()
self.mode = mode_with_gpu self.mode = mode_with_gpu
...@@ -427,37 +433,25 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -427,37 +433,25 @@ class TestDnnInferShapes(utt.InferShapeTester):
dnn.GpuDnnSoftmaxGrad dnn.GpuDnnSoftmaxGrad
) )
def test_conv(self): def _test_conv(self, img, kerns, out, img_val, kern_vals, border_mode, conv_mode, subsamples, algo):
if not dnn.dnn_available(test_ctx_name): if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
img = T.ftensor4('img')
kerns = T.ftensor4('kerns')
out = T.ftensor4('out')
img_val = numpy.asarray(
numpy.random.rand(7, 2, 6, 4),
dtype='float32'
)
kern_vals = numpy.asarray(
numpy.random.rand(8, 2, 4, 3),
dtype='float32'
)
for params in product( img_val = numpy.asarray(img_val, dtype='float32')
['valid', 'full', 'half'], kern_vals = numpy.asarray(kern_vals, dtype='float32')
[(1, 1), (2, 2)],
['conv', 'cross'] for subsample in subsamples:
):
out_vals = numpy.zeros( out_vals = numpy.zeros(
dnn.GpuDnnConv.get_out_shape(img_val.shape, kern_vals.shape, dnn.GpuDnnConv.get_out_shape(img_val.shape, kern_vals.shape,
border_mode=params[0], border_mode=border_mode,
subsample=params[1]), subsample=subsample),
dtype='float32') dtype='float32')
desc = dnn.GpuDnnConvDesc( desc = dnn.GpuDnnConvDesc(
border_mode=params[0], border_mode=border_mode,
subsample=params[1], subsample=subsample,
conv_mode=params[2] conv_mode=conv_mode
)(kerns.shape) )(kerns.shape)
conv = dnn.GpuDnnConv()(img, kerns, out, desc) conv = dnn.GpuDnnConv(algo=algo)(img, kerns, out, desc)
self._compile_and_check( self._compile_and_check(
[img, kerns, out], [img, kerns, out],
[conv], [conv],
...@@ -465,54 +459,92 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -465,54 +459,92 @@ class TestDnnInferShapes(utt.InferShapeTester):
dnn.GpuDnnConv dnn.GpuDnnConv
) )
def test_conv_gradw(self): @parameterized.expand(chain(product([SUPPORTED_DNN_CONV_ALGO_FWD[0]],
border_modes,
conv_modes),
product(SUPPORTED_DNN_CONV_ALGO_FWD[1:],
[border_modes[0]],
[conv_modes[0]])),
testcase_func_name=utt.custom_name_func)
def test_conv(self, algo, border_mode, conv_mode):
if algo == 'winograd' and dnn.version() < 5000:
raise SkipTest(dnn.dnn_available.msg)
self._test_conv(T.ftensor4('img'),
T.ftensor4('kerns'),
T.ftensor4('out'),
numpy.random.rand(7, 2, 8, 4),
numpy.random.rand(8, 2, 4, 3),
border_mode,
conv_mode,
[(1, 1), (2, 2)],
algo)
@parameterized.expand(product(border_modes, conv_modes), utt.custom_name_func)
def test_conv3d_none(self, border_mode, conv_mode):
ftensor5 = T.TensorType(dtype="float32", broadcastable=(False,) * 5)
self._test_conv(ftensor5('img'),
ftensor5('kerns'),
ftensor5('out'),
numpy.random.rand(10, 2, 6, 4, 11),
numpy.random.rand(8, 2, 4, 3, 1),
border_mode,
conv_mode,
[(1, 1, 1), (2, 2, 2)],
'none')
def _test_conv_gradw(self, img, kerns, out, img_val, kern_vals, border_mode, conv_mode, subsample):
if not dnn.dnn_available(test_ctx_name): if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
img = T.ftensor4('img')
kerns = T.ftensor4('kerns')
out = T.ftensor4('out')
img_val = numpy.asarray( img_val = numpy.asarray(
numpy.random.rand(2, 5, 6, 8), img_val,
dtype='float32' dtype='float32'
) )
kern_vals = numpy.asarray( kern_vals = numpy.asarray(
numpy.random.rand(2, 1, 5, 6), kern_vals,
dtype='float32' dtype='float32'
) )
for params in product( temp_img = img.dimshuffle(1, 0, 2, 3)
['valid', 'full', 'half'], temp_kerns = kerns
[(1, 1)], # strides besides (1, 1) if conv_mode == 'conv':
['conv', 'cross'] temp_kerns = temp_kerns[:, :, ::-1, ::-1]
): temp_kerns = temp_kerns.dimshuffle(1, 0, 2, 3)
temp_img = img.dimshuffle(1, 0, 2, 3) shape = (
temp_kerns = kerns kern_vals.shape[1], img_val.shape[1],
if params[2] == 'conv': img_val.shape[2] - kern_vals.shape[2] + 1,
temp_kerns = temp_kerns[:, :, ::-1, ::-1] img_val.shape[3] - kern_vals.shape[3] + 1
temp_kerns = temp_kerns.dimshuffle(1, 0, 2, 3) )
shape = ( out_vals = numpy.zeros(shape, dtype='float32')
kern_vals.shape[1], img_val.shape[1], desc = dnn.GpuDnnConvDesc(
img_val.shape[2] - kern_vals.shape[2] + 1, border_mode=border_mode,
img_val.shape[3] - kern_vals.shape[3] + 1 subsample=subsample,
) conv_mode=conv_mode
out_vals = numpy.zeros(shape, dtype='float32') )(out.shape)
desc = dnn.GpuDnnConvDesc( conv_grad_w = dnn.GpuDnnConvGradW()(
border_mode=params[0], temp_img,
subsample=params[1], temp_kerns,
conv_mode=params[2] out,
)(out.shape) desc,
conv_grad_w = dnn.GpuDnnConvGradW()( )
temp_img, self._compile_and_check(
temp_kerns, [temp_img, temp_kerns, out],
out, [conv_grad_w],
desc, [img_val, kern_vals, out_vals],
) dnn.GpuDnnConvGradW
self._compile_and_check( )
[temp_img, temp_kerns, out],
[conv_grad_w], @parameterized.expand(product(border_modes, conv_modes), utt.custom_name_func)
[img_val, kern_vals, out_vals], def test_conv_gradw(self, border_mode, conv_mode):
dnn.GpuDnnConvGradW self._test_conv_gradw(T.ftensor4('img'),
) T.ftensor4('kerns'),
T.ftensor4('out'),
numpy.random.rand(2, 5, 6, 8),
numpy.random.rand(2, 1, 5, 6),
border_mode,
conv_mode,
(1, 1))
def test_conv_gradi(self): def test_conv_gradi(self):
if not dnn.dnn_available(test_ctx_name): if not dnn.dnn_available(test_ctx_name):
......
...@@ -4,6 +4,7 @@ from functools import wraps ...@@ -4,6 +4,7 @@ from functools import wraps
import logging import logging
import sys import sys
import unittest import unittest
from nose_parameterized import parameterized
from six import integer_types from six import integer_types
from six.moves import StringIO from six.moves import StringIO
...@@ -31,6 +32,13 @@ except ImportError: ...@@ -31,6 +32,13 @@ except ImportError:
_logger = logging.getLogger("theano.tests.unittest_tools") _logger = logging.getLogger("theano.tests.unittest_tools")
def custom_name_func(testcase_func, param_num, param):
return "%s_%s" % (
testcase_func.__name__,
parameterized.to_safe_name("_".join(str(x) for x in param.args)),
)
def fetch_seed(pseed=None): def fetch_seed(pseed=None):
""" """
Returns the seed to use for running the unit tests. Returns the seed to use for running the unit tests.
...@@ -96,6 +104,7 @@ verify_grad.E_grad = T.verify_grad.E_grad ...@@ -96,6 +104,7 @@ verify_grad.E_grad = T.verify_grad.E_grad
class TestOptimizationMixin(object): class TestOptimizationMixin(object):
def assertFunctionContains(self, f, op, min=1, max=sys.maxsize): def assertFunctionContains(self, f, op, min=1, max=sys.maxsize):
toposort = f.maker.fgraph.toposort() toposort = f.maker.fgraph.toposort()
matches = [node for node in toposort if node.op == op] matches = [node for node in toposort if node.op == op]
...@@ -172,6 +181,7 @@ class T_OpContractMixin(object): ...@@ -172,6 +181,7 @@ class T_OpContractMixin(object):
class InferShapeTester(unittest.TestCase): class InferShapeTester(unittest.TestCase):
def setUp(self): def setUp(self):
seed_rng() seed_rng()
# Take into account any mode that may be defined in a child class # Take into account any mode that may be defined in a child class
...@@ -311,6 +321,7 @@ def str_diagnostic(expected, value, rtol, atol): ...@@ -311,6 +321,7 @@ def str_diagnostic(expected, value, rtol, atol):
class WrongValue(Exception): class WrongValue(Exception):
def __init__(self, expected_val, val, rtol, atol): def __init__(self, expected_val, val, rtol, atol):
Exception.__init__(self) # to be compatible with python2.4 Exception.__init__(self) # to be compatible with python2.4
self.val1 = expected_val self.val1 = expected_val
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论