提交 e200ca29 authored 作者: Frederic's avatar Frederic

start new cudnn v4 convolution mode

上级 bcb9318e
...@@ -477,8 +477,15 @@ class GpuDnnConv(DnnBase, COp): ...@@ -477,8 +477,15 @@ class GpuDnnConv(DnnBase, COp):
alg = 'CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM' alg = 'CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM'
elif self.algo == 'large': elif self.algo == 'large':
alg = 'CUDNN_CONVOLUTION_FWD_ALGO_GEMM' alg = 'CUDNN_CONVOLUTION_FWD_ALGO_GEMM'
elif self.algo == 'direct':
# need v2
alg = 'CUDNN_CONVOLUTION_FWD_ALGO_DIRECT'
elif self.algo == 'fft': elif self.algo == 'fft':
# need v3
alg = 'CUDNN_CONVOLUTION_FWD_ALGO_FFT' alg = 'CUDNN_CONVOLUTION_FWD_ALGO_FFT'
elif self.algo == 'fft_tiliing':
# need v4
alg = 'CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING'
elif self.algo in ['guess_once', 'guess_on_shape_change']: elif self.algo in ['guess_once', 'guess_on_shape_change']:
# The convolution implementation should be choosen according # The convolution implementation should be choosen according
# to a heuristic # to a heuristic
...@@ -724,11 +731,15 @@ class GpuDnnConvGradW(DnnBase, COp): ...@@ -724,11 +731,15 @@ class GpuDnnConvGradW(DnnBase, COp):
alg = "0" alg = "0"
else: else:
if self.algo == 'none': if self.algo == 'none':
# non-deterministic
alg = 'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0' alg = 'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0'
elif self.algo == 'deterministic': elif self.algo == 'deterministic':
alg = 'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1' alg = 'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1'
elif self.algo == 'fft': elif self.algo == 'fft':
alg = 'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT' alg = 'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT'
elif self.algo == 'none2':
# need v3, non-deterministic, small workspace
alg = 'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3'
elif self.algo in ['guess_once', 'guess_on_shape_change']: elif self.algo in ['guess_once', 'guess_on_shape_change']:
# The convolution implementation should be chosen according # The convolution implementation should be chosen according
# to a heuristic # to a heuristic
...@@ -936,7 +947,11 @@ class GpuDnnConvGradI(DnnBase, COp): ...@@ -936,7 +947,11 @@ class GpuDnnConvGradI(DnnBase, COp):
elif self.algo == 'deterministic': elif self.algo == 'deterministic':
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_1' alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_1'
elif self.algo == 'fft': elif self.algo == 'fft':
# need v3, big workspace
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT' alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT'
elif self.algo == 'fft':
# need v4, big workspace, but less then fft
alg = 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING'
elif self.algo in ['guess_once', 'guess_on_shape_change']: elif self.algo in ['guess_once', 'guess_on_shape_change']:
# The convolution implementation should be chosen according # The convolution implementation should be chosen according
# to a heuristic # to a heuristic
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论