提交 0f7d5930 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Get dnn_available() and version() to work.

上级 b49ac076
...@@ -2,7 +2,7 @@ import os ...@@ -2,7 +2,7 @@ import os
import numpy import numpy
import theano import theano
from theano import Apply, tensor, config, Variable from theano import Op, Apply, tensor, config, Variable
from theano.scalar import as_scalar, constant from theano.scalar import as_scalar, constant
from theano.gradient import DisconnectedType, grad_not_implemented from theano.gradient import DisconnectedType, grad_not_implemented
from theano.gof import Optimizer, local_optimizer, COp from theano.gof import Optimizer, local_optimizer, COp
...@@ -13,37 +13,45 @@ from theano.configparser import AddConfigVar, EnumStr ...@@ -13,37 +13,45 @@ from theano.configparser import AddConfigVar, EnumStr
from theano.tensor.nnet import SoftmaxGrad from theano.tensor.nnet import SoftmaxGrad
from theano.tensor.signal.downsample import ( from theano.tensor.signal.downsample import (
DownsampleFactorMax, DownsampleFactorMaxGrad) DownsampleFactorMax, DownsampleFactorMaxGrad)
from theano.sandbox.cuda import GpuOp
from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable, from . import pygpu, init_dev
from .basic_ops import (as_gpuarray_variable,
host_from_gpu, host_from_gpu,
gpu_contiguous, HostFromGpu, gpu_contiguous, HostFromGpu,
gpu_alloc_empty, GpuAllocEmpty) # No GpuAllocEmpty (yet)
from theano.sandbox.cuda.blas import (GpuConv, GpuDownsampleFactorMax, gpu_alloc, GpuAlloc)
GpuDownsampleFactorMaxGrad) from .conv import GpuConv
from theano.sandbox.cuda.nnet import GpuSoftmax
from theano.sandbox.cuda.opt_util import alpha_merge, output_merge
from theano.sandbox.cuda import gpu_seqopt, register_opt
from theano.sandbox.cuda.nvcc_compiler import NVCC_compiler # These don't exist in gpuarray
# GpuDownsampleFactorMax, GpuDownsampleFactorMaxGrad
from .nnet import GpuSoftmax
from .opt import gpu_seqopt, register_opt
from .opt_util import alpha_merge, output_merge
from .comp import NVCC_compiler
def dnn_available(): def dnn_available():
if dnn_available.avail is None: if dnn_available.avail is not None:
if not theano.sandbox.cuda.cuda_available: return dnn_available.avail
dnn_available.msg = "CUDA not available" if pygpu is None:
dnn_available.msg = "PyGPU not available"
dnn_available.avail = False
return False
if not init_dev.device.startswith('cuda'):
dnn_available.msg = "Not on a CUDA device"
dnn_available.avail = False dnn_available.avail = False
return False return False
dev = theano.sandbox.cuda.active_device_number() # This is a hack because bin_id is in the from of
if theano.sandbox.cuda.device_properties(dev)['major'] < 3: # "sm_<major><minor>" for cuda devices.
if pygpu.get_default_context().bin_id < 'sm_30':
dnn_available.msg = "Device not supported by cuDNN" dnn_available.msg = "Device not supported by cuDNN"
dnn_available.avail = False dnn_available.avail = False
else:
preambule = """ preambule = """
#include <stdio.h> #include <stdio.h>
#include <cuda.h> #include <cuda.h>
#include <cudnn.h> #include <cudnn.h>
#include <cudnn_helper.h> #include <cudnn_helper.h>
""" """
body = """ body = """
cudnnHandle_t _handle = NULL; cudnnHandle_t _handle = NULL;
...@@ -87,8 +95,6 @@ if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) { ...@@ -87,8 +95,6 @@ if ((err = cudnnCreate(&_handle)) != CUDNN_STATUS_SUCCESS) {
" Update to CuDNN v2 final version.") " Update to CuDNN v2 final version.")
raise RuntimeError(dnn_available.msg) raise RuntimeError(dnn_available.msg)
return dnn_available.avail
dnn_available.avail = None dnn_available.avail = None
dnn_available.msg = None dnn_available.msg = None
...@@ -124,11 +130,10 @@ if (%(err)s != CUDNN_STATUS_SUCCESS) { ...@@ -124,11 +130,10 @@ if (%(err)s != CUDNN_STATUS_SUCCESS) {
%(fail)s %(fail)s
} }
} }
""" % dict(var=var, err=err, desc=desc, fail=fail) """ % dict(var=var, err=err, desc=desc, fail=fail)
class DnnBase(GpuOp, COp): class DnnBase(COp):
""" """
Creates a handle for cudnn and pulls in the cudnn libraries and headers. Creates a handle for cudnn and pulls in the cudnn libraries and headers.
""" """
...@@ -140,16 +145,18 @@ class DnnBase(GpuOp, COp): ...@@ -140,16 +145,18 @@ class DnnBase(GpuOp, COp):
COp.__init__(self, "dnn_base.c") COp.__init__(self, "dnn_base.c")
def c_headers(self): def c_headers(self):
return ['cudnn.h', 'cudnn_helper.h'] return ['cudnn.h', 'cudnn_helper.h',
'gpuarray/types.h', 'gpuarray/array.h',
'gpuarray_api.h']
def c_header_dirs(self): def c_header_dirs(self):
return [os.path.dirname(__file__)] return [os.path.dirname(__file__), pygpu.get_include()]
def c_libraries(self): def c_libraries(self):
return ['cudnn'] return ['cudnn', 'gpuarray']
class DnnVersion(GpuOp): class DnnVersion(Op):
def c_compiler(self): def c_compiler(self):
return NVCC_compiler return NVCC_compiler
...@@ -210,7 +217,7 @@ def version(): ...@@ -210,7 +217,7 @@ def version():
version.v = None version.v = None
class GpuDnnConvDesc(GpuOp): class GpuDnnConvDesc(Op):
"""This Op builds a convolution descriptor for use in the other """This Op builds a convolution descriptor for use in the other
convolution operations. convolution operations.
...@@ -343,9 +350,11 @@ class GpuDnnConvDesc(GpuOp): ...@@ -343,9 +350,11 @@ class GpuDnnConvDesc(GpuOp):
def c_code_cache_version(self): def c_code_cache_version(self):
return (2, version()) return (2, version())
# This is to avoid conflict with the one in cuda/dnn.py
AddConfigVar('dnn.conv.workmem', if not hasattr(config, 'dnn'):
"Default value for the workmem attribute of cudnn convolutions.", AddConfigVar('dnn.conv.workmem',
"Default value for the workmem attribute of cudnn "
"convolutions.",
EnumStr('small', 'none', 'large'), EnumStr('small', 'none', 'large'),
in_c_key=False) in_c_key=False)
...@@ -566,7 +575,7 @@ class GpuDnnConvGradW(DnnBase, COp): ...@@ -566,7 +575,7 @@ class GpuDnnConvGradW(DnnBase, COp):
return [shape[2]] return [shape[2]]
class GpuDnnConvGradI(DnnBase, COp): class GpuDnnConvGradI(DnnBase):
""" """
The convolution gradient with respect to the inputs. The convolution gradient with respect to the inputs.
...@@ -719,7 +728,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1), ...@@ -719,7 +728,7 @@ def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
return GpuDnnConv(workmem=workmem)(img, kerns, out, desc) return GpuDnnConv(workmem=workmem)(img, kerns, out, desc)
class GpuDnnPoolDesc(GpuOp): class GpuDnnPoolDesc(Op):
""" """
This Op builds a pooling descriptor for use in the other This Op builds a pooling descriptor for use in the other
pooling operations. pooling operations.
...@@ -1488,7 +1497,7 @@ err%(name)s = cudnnSoftmaxBackward( ...@@ -1488,7 +1497,7 @@ err%(name)s = cudnnSoftmaxBackward(
# Intentation for history # Intentation for history
if True: if False:
# @register_opt('cudnn') # this optimizer is registered in opt.py instead. # @register_opt('cudnn') # this optimizer is registered in opt.py instead.
@local_optimizer([GpuConv]) @local_optimizer([GpuConv])
def local_conv_dnn(node): def local_conv_dnn(node):
......
...@@ -2,60 +2,81 @@ ...@@ -2,60 +2,81 @@
static cudnnHandle_t _handle = NULL; static cudnnHandle_t _handle = NULL;
static int static int
c_set_tensor4d(CudaNdarray *var, cudnnTensorDescriptor_t desc) { c_set_tensor4d(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
cudnnDataType_t dt;
switch (var->ga.typecode) {
case GA_FLOAT:
dt = CUDNN_DATA_FLOAT;
break;
case GA_DOUBLE:
dt = CUDNN_DATA_DOUBLE;
break;
case GA_HALF:
dt = CUDNN_DATA_HALF;
break;
default:
PyErr_SetString(PyExc_TypeError, "Non-float datatype in c_set_tensor4d");
return -1;
}
cudnnStatus_t err = cudnnSetTensor4dDescriptorEx( cudnnStatus_t err = cudnnSetTensor4dDescriptorEx(
desc, CUDNN_DATA_FLOAT, desc, dt,
CudaNdarray_HOST_DIMS(var)[0], PyGpuArray_DIM(var, 0), PyGpuArray_DIM(var, 1),
CudaNdarray_HOST_DIMS(var)[1], PyGpuArray_DIM(var, 2), PyGpuArray_DIM(var, 3),
CudaNdarray_HOST_DIMS(var)[2], PyGpuArray_STRIDE(var, 0), PyGpuArray_STRIDE(var, 1),
CudaNdarray_HOST_DIMS(var)[3], PyGpuArray_STRIDE(var, 2), PyGpuArray_STRIDE(var, 3));
CudaNdarray_HOST_STRIDES(var)[0]?CudaNdarray_HOST_STRIDES(var)[0]:CudaNdarray_HOST_DIMS(var)[2]*CudaNdarray_HOST_DIMS(var)[3]*CudaNdarray_HOST_DIMS(var)[1],
CudaNdarray_HOST_STRIDES(var)[1]?CudaNdarray_HOST_STRIDES(var)[1]:CudaNdarray_HOST_DIMS(var)[2]*CudaNdarray_HOST_DIMS(var)[3],
CudaNdarray_HOST_STRIDES(var)[2]?CudaNdarray_HOST_STRIDES(var)[2]:CudaNdarray_HOST_DIMS(var)[3],
CudaNdarray_HOST_STRIDES(var)[3]?CudaNdarray_HOST_STRIDES(var)[3]:1
);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"Could not set tensor4d descriptor: %s" "Could not set tensor4d descriptor: %s"
"shapes=%d %d %d %d strides=%d %d %d %d", "shapes=%d %d %d %d strides=%d %d %d %d",
cudnnGetErrorString(err), cudnnGetErrorString(err),
CudaNdarray_HOST_DIMS(var)[0], PyGpuArray_DIMS(var)[0],
CudaNdarray_HOST_DIMS(var)[1], PyGpuArray_DIMS(var)[1],
CudaNdarray_HOST_DIMS(var)[2], PyGpuArray_DIMS(var)[2],
CudaNdarray_HOST_DIMS(var)[3], PyGpuArray_DIMS(var)[3],
CudaNdarray_HOST_STRIDES(var)[0]?CudaNdarray_HOST_STRIDES(var)[0]:CudaNdarray_HOST_DIMS(var)[2]*CudaNdarray_HOST_DIMS(var)[3]*CudaNdarray_HOST_DIMS(var)[1], PyGpuArray_STRIDES(var)[0],
CudaNdarray_HOST_STRIDES(var)[1]?CudaNdarray_HOST_STRIDES(var)[1]:CudaNdarray_HOST_DIMS(var)[2]*CudaNdarray_HOST_DIMS(var)[3], PyGpuArray_STRIDES(var)[1],
CudaNdarray_HOST_STRIDES(var)[2]?CudaNdarray_HOST_STRIDES(var)[2]:CudaNdarray_HOST_DIMS(var)[3], PyGpuArray_STRIDES(var)[2],
CudaNdarray_HOST_STRIDES(var)[3]?CudaNdarray_HOST_STRIDES(var)[3]:1 PyGpuArray_STRIDES(var)[3]);
);
return -1; return -1;
} }
return 0; return 0;
} }
static int static int
c_set_filter(CudaNdarray *var, cudnnFilterDescriptor_t desc) { c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
if (!CudaNdarray_is_c_contiguous(var)) { cudnnDataType_t dt;
if (!GpuArray_IS_C_CONTIGUOUS(&var->ga))
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
"Only contiguous filters (kernels) are supported."); "Only contiguous filters (kernels) are supported.");
return -1; return -1;
} }
switch (var->ga.typecode) {
case GA_FLOAT:
dt = CUDNN_DATA_FLOAT;
break;
case GA_DOUBLE:
dt = CUDNN_DATA_DOUBLE;
break;
case GA_HALF:
dt = CUDNN_DATA_HALF;
break;
default:
PyErr_SetString(PyExc_TypeError, "Non-float datatype in c_set_filter");
return -1;
}
cudnnStatus_t err = cudnnSetFilter4dDescriptor( cudnnStatus_t err = cudnnSetFilter4dDescriptor(
desc, CUDNN_DATA_FLOAT, desc, dt,
CudaNdarray_HOST_DIMS(var)[0], PyGpuArray_DIMS(var)[0], PyGpuArray_DIMS(var)[1],
CudaNdarray_HOST_DIMS(var)[1], PyGpuArray_DIMS(var)[2], PyGpuArray_DIMS(var)[3]);
CudaNdarray_HOST_DIMS(var)[2],
CudaNdarray_HOST_DIMS(var)[3]
);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"Could not set filter descriptor: %s." "Could not set filter descriptor: %s."
" dims= %d %d %d %d", " dims= %d %d %d %d",
cudnnGetErrorString(err), cudnnGetErrorString(err),
CudaNdarray_HOST_DIMS(var)[0], PyGpuArray_DIMS(var)[0],
CudaNdarray_HOST_DIMS(var)[1], PyGpuArray_DIMS(var)[1],
CudaNdarray_HOST_DIMS(var)[2], PyGpuArray_DIMS(var)[2],
CudaNdarray_HOST_DIMS(var)[3]); PyGpuArray_DIMS(var)[3]);
return -1; return -1;
} }
return 0; return 0;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论