提交 c6ffa460 authored 作者: Pascal Lamblin's avatar Pascal Lamblin 提交者: GitHub

Merge pull request #5357 from nouiz/abergeron-dnn_mem

Select the dnn convolution algorithm using actually available memory
...@@ -8,7 +8,7 @@ set -x ...@@ -8,7 +8,7 @@ set -x
# Anaconda python # Anaconda python
export PATH=/usr/local/miniconda2/bin:$PATH export PATH=/usr/local/miniconda2/bin:$PATH
# CUDA # CUDA
export PATH=/usr/local/cuda/bin:$PATH export PATH=/usr/local/cuda/bin:$PATH
export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
export LIBRARY_PATH=/usr/local/cuda/lib64:$LIBRARY_PATH export LIBRARY_PATH=/usr/local/cuda/lib64:$LIBRARY_PATH
...@@ -38,13 +38,13 @@ echo "===== Testing gpuarray backend" ...@@ -38,13 +38,13 @@ echo "===== Testing gpuarray backend"
GPUARRAY_CONFIG="Release" GPUARRAY_CONFIG="Release"
DEVICE=cuda0 DEVICE=cuda0
LIBDIR=~/tmp/local LIBDIR=${WORKSPACE}/local
# Make fresh clones of libgpuarray (with no history since we don't need it) # Make fresh clones of libgpuarray (with no history since we don't need it)
rm -rf libgpuarray rm -rf libgpuarray
git clone --depth 1 "https://github.com/Theano/libgpuarray.git" git clone --depth 1 "https://github.com/Theano/libgpuarray.git"
# Clean up previous installs (to make sure no old files are left) # Clean up previous installs (to make sure no old files are left)
rm -rf $LIBDIR rm -rf $LIBDIR
mkdir $LIBDIR mkdir $LIBDIR
...@@ -52,25 +52,25 @@ mkdir $LIBDIR ...@@ -52,25 +52,25 @@ mkdir $LIBDIR
mkdir libgpuarray/build mkdir libgpuarray/build
(cd libgpuarray/build && cmake .. -DCMAKE_BUILD_TYPE=${GPUARRAY_CONFIG} -DCMAKE_INSTALL_PREFIX=$LIBDIR && make) (cd libgpuarray/build && cmake .. -DCMAKE_BUILD_TYPE=${GPUARRAY_CONFIG} -DCMAKE_INSTALL_PREFIX=$LIBDIR && make)
# Finally install # Finally install
(cd libgpuarray/build && make install) (cd libgpuarray/build && make install)
# Export paths # Export paths
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$LIBDIR/lib64/
export LIBRARY_PATH=$LIBRARY_PATH:$LIBDIR/lib64/
export CPATH=$CPATH:$LIBDIR/include export CPATH=$CPATH:$LIBDIR/include
export LIBRARY_PATH=$LIBRARY_PATH:$LIBDIR/lib export LIBRARY_PATH=$LIBRARY_PATH:$LIBDIR/lib
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$LIBDIR/lib export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$LIBDIR/lib
# Build the pygpu modules # Build the pygpu modules
(cd libgpuarray && python setup.py build_ext --inplace -I$LIBDIR/include -L$LIBDIR/lib) (cd libgpuarray && python setup.py build_ext --inplace -I$LIBDIR/include -L$LIBDIR/lib)
ls $LIBDIR ls $LIBDIR
mkdir $LIBDIR/lib/python mkdir $LIBDIR/lib/python
export PYTHONPATH=${PYTHONPATH}:$LIBDIR/lib/python export PYTHONPATH=${PYTHONPATH}:$LIBDIR/lib/python
# Then install # Then install
(cd libgpuarray && python setup.py install --home=$LIBDIR) (cd libgpuarray && python setup.py install --home=$LIBDIR)
# Testing theano (the gpuarray parts) python -c 'import pygpu; print(pygpu.__file__)'
# Testing theano (the gpuarray parts)
THEANO_GPUARRAY_TESTS="theano/gpuarray/tests \ THEANO_GPUARRAY_TESTS="theano/gpuarray/tests \
theano/sandbox/tests/test_rng_mrg.py:test_consistency_GPUA_serial \ theano/sandbox/tests/test_rng_mrg.py:test_consistency_GPUA_serial \
theano/sandbox/tests/test_rng_mrg.py:test_consistency_GPUA_parallel \ theano/sandbox/tests/test_rng_mrg.py:test_consistency_GPUA_parallel \
......
...@@ -27,7 +27,7 @@ except ImportError: ...@@ -27,7 +27,7 @@ except ImportError:
# This is for documentation not to depend on the availability of pygpu # This is for documentation not to depend on the availability of pygpu
from .type import (GpuArrayType, GpuArrayVariable, GpuArrayConstant, from .type import (GpuArrayType, GpuArrayVariable, GpuArrayConstant,
GpuArraySharedVariable, gpuarray_shared_constructor, GpuArraySharedVariable, gpuarray_shared_constructor,
reg_context, get_context, ContextNotDefined, _get_props) reg_context, get_context, ContextNotDefined)
from .basic_ops import as_gpuarray_variable from .basic_ops import as_gpuarray_variable
from . import fft, dnn, opt, nerv, extra_ops, multinomial, reduction from . import fft, dnn, opt, nerv, extra_ops, multinomial, reduction
...@@ -46,63 +46,66 @@ def init_dev(dev, name=None): ...@@ -46,63 +46,66 @@ def init_dev(dev, name=None):
if not config.cxx: if not config.cxx:
raise RuntimeError("The new gpu-backend need a c++ compiler.") raise RuntimeError("The new gpu-backend need a c++ compiler.")
if (pygpu.version.major, pygpu.version.minor) < (0, 6): if (pygpu.version.major, pygpu.version.minor) < (0, 6):
raise ValueError("Your installed version of pygpu is too old, please upgrade to 0.6 or later") raise ValueError(
"Your installed version of pygpu is too old, please upgrade to 0.6 or later")
# This is for the C headers API
if pygpu.gpuarray.api_version()[0] < 0:
raise ValueError(
"Your installed libgpuarray is too old, please update")
if dev not in init_dev.devmap: if dev not in init_dev.devmap:
ctx = pygpu.init(dev, context = pygpu.init(
disable_alloc_cache=config.gpuarray.preallocate < 0, dev,
single_stream=config.gpuarray.single_stream, disable_alloc_cache=config.gpuarray.preallocate < 0,
sched=config.gpuarray.sched) single_stream=config.gpuarray.single_stream,
init_dev.devmap[dev] = ctx sched=config.gpuarray.sched)
context.dev = dev
init_dev.devmap[dev] = context
reg_context(name, context)
if dev.startswith('cuda'):
avail = dnn.dnn_available(name)
if avail:
context.cudnn_handle = dnn._make_handle(context)
if config.print_active_device:
if avail:
print("Using cuDNN version %d on context %s" % (dnn.version(), name),
file=sys.stderr)
else:
print("Can not use cuDNN on context %s: %s" % (name, dnn.dnn_available.msg),
file=sys.stderr)
if config.gpuarray.preallocate < 0: if config.gpuarray.preallocate < 0:
print("Disabling allocation cache on %s" % (dev,)) print("Disabling allocation cache on %s" % (dev,))
elif config.gpuarray.preallocate > 0: elif config.gpuarray.preallocate > 0:
MB = (1024 * 1024) MB = (1024 * 1024)
if config.gpuarray.preallocate <= 1: if config.gpuarray.preallocate <= 1:
gmem = min(config.gpuarray.preallocate, 0.95) * ctx.total_gmem gmem = min(config.gpuarray.preallocate, 0.95) * context.total_gmem
else: else:
gmem = config.gpuarray.preallocate * MB gmem = config.gpuarray.preallocate * MB
if gmem > context.free_gmem - 50 * MB:
print(
"WARNING: Preallocating too much memory can prevent cudnn and cublas from working properly")
# This will allocate and immediatly free an object of size gmem # This will allocate and immediatly free an object of size gmem
# which will reserve that amount of memory on the GPU. # which will reserve that amount of memory on the GPU.
pygpu.empty((gmem,), dtype='int8', context=ctx) pygpu.empty((gmem,), dtype='int8', context=context)
if config.print_active_device: if config.print_active_device:
print("Preallocating %d/%d Mb (%f) on %s" % print("Preallocating %d/%d Mb (%f) on %s" %
(gmem//MB, ctx.total_gmem//MB, gmem/ctx.total_gmem, dev), (gmem//MB, context.total_gmem//MB,
gmem/context.total_gmem, dev),
file=sys.stderr) file=sys.stderr)
context = init_dev.devmap[dev] else:
context = init_dev.devmap[dev]
# This will map the context name to the real context object. # This will map the context name to the real context object.
reg_context(name, context)
if config.print_active_device: if config.print_active_device:
try: try:
pcibusid = context.pcibusid pcibusid = '(' + context.pcibusid + ')'
except pygpu.gpuarray.UnsupportedException: except pygpu.gpuarray.UnsupportedException:
pcibusid = '(unsupported for device %s)' % dev pcibusid = ''
except Exception:
warnings.warn('Unable to get PCI Bus ID. Please consider updating libgpuarray and pygpu.')
pcibusid = 'unknown'
print("Mapped name %s to device %s: %s" % print("Mapped name %s to device %s: %s %s" %
(name, dev, context.devname), (name, dev, context.devname, pcibusid),
file=sys.stderr) file=sys.stderr)
print("PCI Bus ID:", pcibusid, file=sys.stderr)
pygpu_activated = True pygpu_activated = True
ctx_props = _get_props(name)
ctx_props['dev'] = dev
if dev.startswith('cuda'):
if 'cudnn_version' not in ctx_props:
try:
ctx_props['cudnn_version'] = dnn.version()
# 5200 should not print warning with cudnn 5.1 final.
if ctx_props['cudnn_version'] >= 5200:
warnings.warn("Your cuDNN version is more recent than "
"Theano. If you encounter problems, try "
"updating Theano or downgrading cuDNN to "
"version 5.1.")
if config.print_active_device:
print("Using cuDNN version %d on context %s" %
(ctx_props['cudnn_version'], name), file=sys.stderr)
ctx_props['cudnn_handle'] = dnn._make_handle(context)
except Exception:
pass
# This maps things like 'cuda0' to the context object on that device. # This maps things like 'cuda0' to the context object on that device.
init_dev.devmap = {} init_dev.devmap = {}
...@@ -119,7 +122,8 @@ if pygpu: ...@@ -119,7 +122,8 @@ if pygpu:
elif (config.init_gpu_device.startswith('cuda') or elif (config.init_gpu_device.startswith('cuda') or
config.init_gpu_device.startswith('opencl')): config.init_gpu_device.startswith('opencl')):
if config.device != 'cpu': if config.device != 'cpu':
raise ValueError('you must set device=cpu to use init_gpu_device.') raise ValueError(
'you must set device=cpu to use init_gpu_device.')
if config.contexts != '': if config.contexts != '':
print("Using contexts will make init_gpu_device act like device and move all computations by default, which might not be what you want.") print("Using contexts will make init_gpu_device act like device and move all computations by default, which might not be what you want.")
init_dev(config.init_gpu_device) init_dev(config.init_gpu_device)
...@@ -147,4 +151,5 @@ else: ...@@ -147,4 +151,5 @@ else:
config.device.startswith('opencl') or config.device.startswith('opencl') or
config.device.startswith('cuda') or config.device.startswith('cuda') or
config.contexts != ''): config.contexts != ''):
error("pygpu was configured but could not be imported or is too old (version 0.6 or higher required)", exc_info=True) error("pygpu was configured but could not be imported or is too old (version 0.6 or higher required)",
exc_info=True)
差异被折叠。
...@@ -115,11 +115,7 @@ c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) { ...@@ -115,11 +115,7 @@ c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
if (nd < 3) if (nd < 3)
nd = 3; nd = 3;
#if CUDNN_VERSION >= 5000
err = cudnnSetFilterNdDescriptor(desc, dt, CUDNN_TENSOR_NCHW, nd, dims); err = cudnnSetFilterNdDescriptor(desc, dt, CUDNN_TENSOR_NCHW, nd, dims);
#else
err = cudnnSetFilterNdDescriptor(desc, dt, nd, dims);
#endif
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
......
...@@ -98,12 +98,37 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -98,12 +98,37 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
#endif #endif
if (!reuse_algo) { if (!reuse_algo) {
size_t free;
int err2 = gpucontext_property(c->ctx, GA_CTX_PROP_LARGEST_MEMBLOCK, &free);
if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the "
"memory information on the GPU");
cuda_exit(c->ctx);
return 1;
}
// Guess 4Mb if the info is not available
if (free == 0) free = 4 * 1024 * 1024;
#ifdef CHOOSE_TIME #ifdef CHOOSE_TIME
int count; int count;
cudnnConvolutionFwdAlgoPerf_t choice; cudnnConvolutionFwdAlgoPerf_t choice;
err = cudnnFindConvolutionForwardAlgorithm( gpudata *tmpmem;
_handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns),
desc, APPLY_SPECIFIC(output), 1, &count, &choice); tmpmem = gpudata_alloc(c->ctx, free, NULL, 0, NULL);
if (tmpmem == NULL) {
PyErr_SetString(PyExc_MemoryError, "Could not allocate working GPU memory");
return -1;
}
// We don't sync the buffer as we don't care about the values.
err = cudnnFindConvolutionForwardAlgorithmEx(
_handle, APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(input),
APPLY_SPECIFIC(kerns), PyGpuArray_DEV_DATA(kerns),
desc, APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(*output),
1, &count, &choice, *(void **)tmpmem,
free);
gpudata_release(tmpmem);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
...@@ -114,16 +139,6 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, ...@@ -114,16 +139,6 @@ APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
} }
algo = choice.algo; algo = choice.algo;
#else #else
size_t free;
int err2 = gpucontext_property(c->ctx, GA_CTX_PROP_FREE_GMEM, &free);
if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the "
"memory information on the GPU");
cuda_exit(c->ctx);
return 1;
}
err = cudnnGetConvolutionForwardAlgorithm( err = cudnnGetConvolutionForwardAlgorithm(
_handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns), _handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns),
desc, APPLY_SPECIFIC(output), desc, APPLY_SPECIFIC(output),
......
...@@ -140,13 +140,34 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -140,13 +140,34 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
#endif #endif
if (!reuse_algo) { if (!reuse_algo) {
size_t free;
int err2 = gpucontext_property(c->ctx, GA_CTX_PROP_LARGEST_MEMBLOCK, &free);
if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the "
"memory information on the GPU");
cuda_exit(c->ctx);
return 1;
}
// Guess 4Mb if the info is not available
if (free == 0) free = 4 * 1024 * 1024;
#ifdef CHOOSE_TIME #ifdef CHOOSE_TIME
int count; int count;
cudnnConvolutionBwdDataAlgoPerf_t choice; cudnnConvolutionBwdDataAlgoPerf_t choice;
gpudata *tmpmem;
err = cudnnFindConvolutionBackwardDataAlgorithm( tmpmem = gpudata_alloc(c->ctx, free, NULL, 0, NULL);
if (tmpmem == NULL) {
PyErr_SetString(PyExc_MemoryError, "Could not allocate working GPU memory");
return -1;
}
err = cudnnFindConvolutionBackwardDataAlgorithmEx(
_handle, APPLY_SPECIFIC(kerns), APPLY_SPECIFIC(output), desc, _handle, APPLY_SPECIFIC(kerns), APPLY_SPECIFIC(output), desc,
APPLY_SPECIFIC(input), 1, &count, &choice); APPLY_SPECIFIC(input), 1, &count, &choice, *(void **)tmpmem, free);
gpudata_release(tmpmem);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "error selecting convolution algo: %s", PyErr_Format(PyExc_RuntimeError, "error selecting convolution algo: %s",
...@@ -157,16 +178,6 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output, ...@@ -157,16 +178,6 @@ APPLY_SPECIFIC(conv_gi)(PyGpuArrayObject *kerns, PyGpuArrayObject *output,
algo = choice.algo; algo = choice.algo;
#else #else
size_t free;
int err2 = gpucontext_property(c->ctx, GA_CTX_PROP_FREE_GMEM, &free);
if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the "
"memory information on the GPU");
cuda_exit(c->ctx);
return 1;
}
err = cudnnGetConvolutionBackwardDataAlgorithm( err = cudnnGetConvolutionBackwardDataAlgorithm(
_handle, APPLY_SPECIFIC(kerns), APPLY_SPECIFIC(output), _handle, APPLY_SPECIFIC(kerns), APPLY_SPECIFIC(output),
desc, APPLY_SPECIFIC(input), desc, APPLY_SPECIFIC(input),
......
...@@ -140,13 +140,34 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -140,13 +140,34 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
#endif #endif
if (!reuse_algo) { if (!reuse_algo) {
size_t free;
int err2 = gpucontext_property(c->ctx, GA_CTX_PROP_LARGEST_MEMBLOCK, &free);
if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the "
"memory information on the GPU");
cuda_exit(c->ctx);
return 1;
}
// Guess 4Mb if the info is not available
if (free == 0) free = 4 * 1024 * 1024;
#ifdef CHOOSE_TIME #ifdef CHOOSE_TIME
int count; int count;
cudnnConvolutionBwdFilterAlgoPerf_t choice; cudnnConvolutionBwdFilterAlgoPerf_t choice;
gpudata *tmpmem;
err = cudnnFindConvolutionBackwardFilterAlgorithm( tmpmem = gpudata_alloc(c->ctx, free, NULL, 0, NULL);
if (tmpmem == NULL) {
PyErr_SetString(PyExc_MemoryError, "Could not allocate working GPU memory");
return -1;
}
err = cudnnFindConvolutionBackwardFilterAlgorithmEx(
_handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(output), desc, _handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(output), desc,
APPLY_SPECIFIC(kerns), 1, &count, &choice); APPLY_SPECIFIC(kerns), 1, &count, &choice, *(void **)tmpmem, free);
gpudata_release(tmpmem);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
...@@ -158,16 +179,6 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output, ...@@ -158,16 +179,6 @@ APPLY_SPECIFIC(conv_gw)(PyGpuArrayObject *input, PyGpuArrayObject *output,
algo = choice.algo; algo = choice.algo;
#else #else
size_t free;
int err2 = gpucontext_property(c->ctx, GA_CTX_PROP_FREE_GMEM, &free);
if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the "
"memory information on the GPU");
cuda_exit(c->ctx);
return 1;
}
err = cudnnGetConvolutionBackwardFilterAlgorithm( err = cudnnGetConvolutionBackwardFilterAlgorithm(
_handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(output), _handle, APPLY_SPECIFIC(input), APPLY_SPECIFIC(output),
desc, APPLY_SPECIFIC(kerns), desc, APPLY_SPECIFIC(kerns),
......
...@@ -71,11 +71,7 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img, ...@@ -71,11 +71,7 @@ int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i)); s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i));
} }
#if CUDNN_VERSION >= 5000
err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, CUDNN_PROPAGATE_NAN, ndims, w, p, s); err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, CUDNN_PROPAGATE_NAN, ndims, w, p, s);
#else
err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, ndims, w, p, s);
#endif
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor %s", cudnnGetErrorString(err)); PyErr_Format(PyExc_RuntimeError, "could not set op descriptor %s", cudnnGetErrorString(err));
......
...@@ -111,11 +111,7 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp, ...@@ -111,11 +111,7 @@ int APPLY_SPECIFIC(dnn_pool_grad)(PyGpuArrayObject *inp,
s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i)); s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i));
} }
#if CUDNN_VERSION >= 5000
err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, CUDNN_PROPAGATE_NAN, ndims, w, p, s); err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, CUDNN_PROPAGATE_NAN, ndims, w, p, s);
#else
err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), MODE_FLAG, ndims, w, p, s);
#endif
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set op descriptor %s", cudnnGetErrorString(err)); PyErr_Format(PyExc_RuntimeError, "could not set op descriptor %s", cudnnGetErrorString(err));
......
...@@ -604,9 +604,6 @@ class TestDnnInferShapes(utt.InferShapeTester): ...@@ -604,9 +604,6 @@ class TestDnnInferShapes(utt.InferShapeTester):
[conv_modes[0]])), [conv_modes[0]])),
testcase_func_name=utt.custom_name_func) testcase_func_name=utt.custom_name_func)
def test_conv(self, algo, border_mode, conv_mode): def test_conv(self, algo, border_mode, conv_mode):
if algo == 'winograd' and dnn.version(raises=False) < 5000:
raise SkipTest(dnn.dnn_available.msg)
self._test_conv(T.tensor4('img'), self._test_conv(T.tensor4('img'),
T.tensor4('kerns'), T.tensor4('kerns'),
T.tensor4('out'), T.tensor4('out'),
...@@ -1361,8 +1358,6 @@ class test_SoftMax(test_nnet.test_SoftMax): ...@@ -1361,8 +1358,6 @@ class test_SoftMax(test_nnet.test_SoftMax):
def test_dnn_batchnorm_train(): def test_dnn_batchnorm_train():
if not dnn.dnn_available(test_ctx_name): if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
if dnn.version(raises=False) < 5000:
raise SkipTest("batch normalization requires cudnn v5+")
utt.seed_rng() utt.seed_rng()
for mode in ('per-activation', 'spatial'): for mode in ('per-activation', 'spatial'):
...@@ -1416,8 +1411,6 @@ def test_dnn_batchnorm_train(): ...@@ -1416,8 +1411,6 @@ def test_dnn_batchnorm_train():
def test_batchnorm_inference(): def test_batchnorm_inference():
if not dnn.dnn_available(test_ctx_name): if not dnn.dnn_available(test_ctx_name):
raise SkipTest(dnn.dnn_available.msg) raise SkipTest(dnn.dnn_available.msg)
if dnn.version(raises=False) < 5000:
raise SkipTest("batch normalization requires cudnn v5+")
utt.seed_rng() utt.seed_rng()
for mode in ('per-activation', 'spatial'): for mode in ('per-activation', 'spatial'):
......
...@@ -68,7 +68,6 @@ def reg_context(name, ctx): ...@@ -68,7 +68,6 @@ def reg_context(name, ctx):
if not isinstance(ctx, gpuarray.GpuContext): if not isinstance(ctx, gpuarray.GpuContext):
raise TypeError("context is not GpuContext") raise TypeError("context is not GpuContext")
_context_reg[name] = ctx _context_reg[name] = ctx
_props_map[ctx] = dict()
def get_context(name): def get_context(name):
...@@ -97,26 +96,6 @@ def list_contexts(): ...@@ -97,26 +96,6 @@ def list_contexts():
""" """
return _context_reg.keys() return _context_reg.keys()
# Mappings of properties to contexts. Please never use this if you
# can avoid it.
# This is basically a way to store "global" variables that depend on
# the context.
_props_map = {}
def _get_props(name):
ctx = get_context(name)
return _props_map[ctx]
def get_prop(name, k):
return _get_props(name)[k]
def set_prop(name, k, v):
_get_props(name)[k] = v
# Private method # Private method
def _name_for_ctx(ctx): def _name_for_ctx(ctx):
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论