提交 4dea62db authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Add algorithm selection in terms of workspace size.

上级 7c70b97b
......@@ -335,13 +335,31 @@ class GpuDnnConv(DnnBase, COp):
:param image:
:param kernel:
:param descr: the convolution descriptor
"""
__props__ = ()
__props__ = ('workmem',)
def __init__(self):
def __init__(self, workmem='small'):
"""
:param workmem: either 'none', 'small' or 'large'. Default is 'small'.
"""
COp.__init__(self, ["dnn_base.c", "dnn_conv_base.c", "dnn_fwd.c"],
"APPLY_SPECIFIC(conv_fwd)")
self.workmem = workmem
assert self.workmem in ['none', 'small', 'large']
def __setstate__(self, d):
self.__dict__.update(d)
if not hasattr(self, 'workmem'):
self.workmem = 'small'
def get_op_params(self):
if self.workmem == 'none':
alg = 'CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM'
elif self.workmem == 'small':
alg = 'CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM'
elif self.workmem == 'large':
alg = 'CUDNN_CONVOLUTION_FWD_ALGO_GEMM'
return [('CONV_ALGO', alg)]
def make_node(self, img, kern, desc):
img = as_cuda_ndarray_variable(img)
......
......@@ -33,24 +33,27 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
return 1;
{
cudnnConvolutionFwdAlgo_t algo;
err = cudnnGetConvolutionForwardAlgorithm(
_handle,
APPLY_SPECIFIC(input),
APPLY_SPECIFIC(kerns),
desc,
APPLY_SPECIFIC(output),
CUDNN_CONVOLUTION_FWD_NO_WORKSPACE, // TODO: add op param
0,
&algo);
size_t worksize;
void *workspace;
err = cudnnGetConvolutionForwardWorkspaceSize(_handle,
APPLY_SPECIFIC(input),
APPLY_SPECIFIC(kerns),
desc,
APPLY_SPECIFIC(output),
CONV_ALGO,
&worksize);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"GpuDnnConv: Couldn't select convolution algorithm: %s",
cudnnGetErrorString(err));
"GpuDnnConv: error getting worksize: %s",
cudnnGetErrorString(err));
return 1;
}
workspace = get_work_mem(worksize);
if (workspace == NULL && worksize != 0)
return 1;
const float alpha = 1;
const float beta = 0;
......@@ -60,8 +63,8 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
APPLY_SPECIFIC(input), CudaNdarray_DEV_DATA(input),
APPLY_SPECIFIC(kerns), CudaNdarray_DEV_DATA(kerns),
desc,
algo,
NULL, 0,
CONV_ALGO,
workspace, worksize,
(void *)&beta,
APPLY_SPECIFIC(output), CudaNdarray_DEV_DATA(*output));
}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论