提交 822c06d4 authored 作者: Nicolas Ballas's avatar Nicolas Ballas 提交者: --global

fix guess/time workmem bug

上级 7f78fce1
...@@ -713,11 +713,11 @@ class GpuDnnConv3d(GpuDnnConv): ...@@ -713,11 +713,11 @@ class GpuDnnConv3d(GpuDnnConv):
def __init__(self, workmem=None, inplace=False): def __init__(self, workmem=None, inplace=False):
""" """
:param workmem: either 'none', 'small', 'large', 'fft', 'time' or :param workmem: either 'none' 'time' or 'guess'.
'guess'. Default is the value of :attr:`config.dnn.conv.workmem`. Default is the value of :attr:`config.dnn.conv.workmem`.
""" """
### Only workmem = 'none' work with cudnn conv 3d super(GpuDnnConv3d, self).__init__(workmem='guess', inplace=inplace)
super(GpuDnnConv3d, self).__init__(workmem='none', inplace=inplace) assert self.workmem in ['none' 'time','guess']
def make_node(self, img, kern, output, desc, alpha=None, beta=None, nb_dim=None): def make_node(self, img, kern, output, desc, alpha=None, beta=None, nb_dim=None):
......
...@@ -36,17 +36,17 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, ...@@ -36,17 +36,17 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
void *workspace; void *workspace;
cudnnConvolutionFwdAlgo_t chosen_algo; cudnnConvolutionFwdAlgo_t chosen_algo;
if (CHOOSE_ALGO) if (CHOOSE_ALGO)
{ {
// Check if the input and the kernels have the same shape as they have // Check if the input and the kernels have the same shape as they have
// last time the apply node was executed // last time the apply node was executed
bool same_shapes = true; bool same_shapes = true;
for (int i = 0; (i < nb_dim) && same_shapes; i++) for (int i = 0; (i < nb_dim) && same_shapes; i++)
{ {
same_shapes &= (CudaNdarray_HOST_DIMS(input)[i] != same_shapes &= (CudaNdarray_HOST_DIMS(input)[i] ==
APPLY_SPECIFIC(previous_input_shape)[i]); APPLY_SPECIFIC(previous_input_shape)[i]);
same_shapes &= (CudaNdarray_HOST_DIMS(kerns)[i] != same_shapes &= (CudaNdarray_HOST_DIMS(kerns)[i] ==
APPLY_SPECIFIC(previous_kerns_shape)[i]); APPLY_SPECIFIC(previous_kerns_shape)[i]);
} }
...@@ -67,6 +67,7 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, ...@@ -67,6 +67,7 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
return 1; return 1;
} }
// Obtain a convolution algorithm appropriate for the input and kernel // Obtain a convolution algorithm appropriate for the input and kernel
// shapes. Either by choosing one according to heuristics or by making // shapes. Either by choosing one according to heuristics or by making
// CuDNN time every implementation and choose the best one. // CuDNN time every implementation and choose the best one.
...@@ -131,7 +132,6 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, ...@@ -131,7 +132,6 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
// be used here // be used here
chosen_algo = APPLY_SPECIFIC(previous_algo); chosen_algo = APPLY_SPECIFIC(previous_algo);
} }
} }
else else
{ {
...@@ -179,7 +179,6 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, ...@@ -179,7 +179,6 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
} }
} }
err = cudnnGetConvolutionForwardWorkspaceSize(_handle, err = cudnnGetConvolutionForwardWorkspaceSize(_handle,
APPLY_SPECIFIC(input), APPLY_SPECIFIC(input),
APPLY_SPECIFIC(kerns), APPLY_SPECIFIC(kerns),
...@@ -188,7 +187,6 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns, ...@@ -188,7 +187,6 @@ APPLY_SPECIFIC(conv_fwd)(CudaNdarray *input, CudaNdarray *kerns,
chosen_algo, chosen_algo,
&worksize); &worksize);
if (err != CUDNN_STATUS_SUCCESS) { if (err != CUDNN_STATUS_SUCCESS) {
std::cout << "here" << std::endl;
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"GpuDnnConv: error getting worksize: %s", "GpuDnnConv: error getting worksize: %s",
cudnnGetErrorString(err)); cudnnGetErrorString(err));
......
...@@ -12,11 +12,6 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output, ...@@ -12,11 +12,6 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
return 1; return 1;
} }
/* if (c_set_tensor4d(input, APPLY_SPECIFIC(input)) == -1) */
/* return 1; */
/* if (c_set_tensor4d(output, APPLY_SPECIFIC(output)) == -1) */
/* return 1; */
if (c_set_tensorNd(input, nb_dim, APPLY_SPECIFIC(input)) == -1) if (c_set_tensorNd(input, nb_dim, APPLY_SPECIFIC(input)) == -1)
return 1; return 1;
if (c_set_tensorNd(output, nb_dim, APPLY_SPECIFIC(output)) == -1) if (c_set_tensorNd(output, nb_dim, APPLY_SPECIFIC(output)) == -1)
...@@ -33,8 +28,6 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output, ...@@ -33,8 +28,6 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
return 1; return 1;
#endif #endif
/* if (c_set_filter(*kerns, APPLY_SPECIFIC(kerns)) == -1) */
/* return 1; */
if (c_set_filterNd(*kerns, nb_dim, APPLY_SPECIFIC(kerns)) == -1) if (c_set_filterNd(*kerns, nb_dim, APPLY_SPECIFIC(kerns)) == -1)
return 1; return 1;
...@@ -50,9 +43,9 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output, ...@@ -50,9 +43,9 @@ APPLY_SPECIFIC(conv_gw)(CudaNdarray *input, CudaNdarray *output,
bool same_shapes = true; bool same_shapes = true;
for (int i = 0; (i < nb_dim) && same_shapes; i++) for (int i = 0; (i < nb_dim) && same_shapes; i++)
{ {
same_shapes &= (CudaNdarray_HOST_DIMS(input)[i] != same_shapes &= (CudaNdarray_HOST_DIMS(input)[i] ==
APPLY_SPECIFIC(previous_input_shape)[i]); APPLY_SPECIFIC(previous_input_shape)[i]);
same_shapes &= (CudaNdarray_HOST_DIMS(output)[i] != same_shapes &= (CudaNdarray_HOST_DIMS(output)[i] ==
APPLY_SPECIFIC(previous_output_shape)[i]); APPLY_SPECIFIC(previous_output_shape)[i]);
} }
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论