提交 5895d5a2 authored 作者: abergeron's avatar abergeron

Merge pull request #2357 from nouiz/cudnn_repair_r1

[CRASH] Cudnn repair r1 softmax
...@@ -1029,13 +1029,13 @@ class GpuDnnSoftmaxBase(DnnBase): ...@@ -1029,13 +1029,13 @@ class GpuDnnSoftmaxBase(DnnBase):
def _define_tensor4d_desc(self, name, id): def _define_tensor4d_desc(self, name, id):
return """ return """
cudnnTensor4dDescriptor_t %(name)s_%(id)s; cudnnTensor4dDescriptor_t %(id)s_%(name)s;
""" % dict(name=name, id=id) """ % dict(name=name, id=id)
def _init_tensor4d_desc(self, name, id, fail): def _init_tensor4d_desc(self, name, id, fail):
return """ return """
%(name)s_%(id)s = NULL; %(id)s_%(name)s = NULL;
if ((err%(name)s = cudnnCreateTensor4dDescriptor(&%(name)s_%(id)s)) != CUDNN_STATUS_SUCCESS) { if ((err%(name)s = cudnnCreateTensor4dDescriptor(&%(id)s_%(name)s)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor " PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor "
": %%s", cudnnGetErrorString(err%(name)s)); ": %%s", cudnnGetErrorString(err%(name)s));
%(fail)s %(fail)s
...@@ -1044,14 +1044,14 @@ if ((err%(name)s = cudnnCreateTensor4dDescriptor(&%(name)s_%(id)s)) != CUDNN_STA ...@@ -1044,14 +1044,14 @@ if ((err%(name)s = cudnnCreateTensor4dDescriptor(&%(name)s_%(id)s)) != CUDNN_STA
def _clean_tensor4d_desc(self, name, id): def _clean_tensor4d_desc(self, name, id):
return """ return """
if(%(name)s_%(name)s!= NULL) if(%(id)s_%(name)s!= NULL)
cudnnDestroyTensor4dDescriptor(%(name)s_%(id)s); cudnnDestroyTensor4dDescriptor(%(id)s_%(name)s);
""" % dict(name=name, id=id) """ % dict(name=name, id=id)
def c_support_code_struct(self, node, name): def c_support_code_struct(self, node, name):
result = '' result = ''
for name in self.tensor_4d_descs: for id in self.tensor_4d_descs:
result += self._define_tensor4d_desc(name, name) result += self._define_tensor4d_desc(name, id)
return result return result
def c_init_code_struct(self, node, name, sub): def c_init_code_struct(self, node, name, sub):
...@@ -1059,14 +1059,14 @@ if(%(name)s_%(name)s!= NULL) ...@@ -1059,14 +1059,14 @@ if(%(name)s_%(name)s!= NULL)
cudnnStatus_t err%(name)s; cudnnStatus_t err%(name)s;
""" % dict(name=name) """ % dict(name=name)
for name in self.tensor_4d_descs: for id in self.tensor_4d_descs:
result += self._init_tensor4d_desc(name, name, sub['fail']) result += self._init_tensor4d_desc(name, id, sub['fail'])
return result return result
def c_cleanup_code_struct(self, node, name): def c_cleanup_code_struct(self, node, name):
result = '' result = ''
for name in self.tensor_4d_descs: for id in self.tensor_4d_descs:
result += self._clean_tensor4d_desc(name, name) result += self._clean_tensor4d_desc(name, id)
return result return result
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
...@@ -1107,28 +1107,15 @@ if (%(mode)d == 1) ...@@ -1107,28 +1107,15 @@ if (%(mode)d == 1)
# Validate the input and build the input variables. # Validate the input and build the input variables.
for input_idx, input_name in enumerate(self.softmax_inputs): for input_idx, input_name in enumerate(self.softmax_inputs):
result += """ result += c_set_tensor4d(ins[input_idx], input_name + "_" + name,
if (!CudaNdarray_is_c_contiguous(%(ins)s)) { "err" + name, sub['fail'])
PyErr_SetString(PyExc_ValueError, "Only contiguous inputs are supported.");
%(fail)s
}
err%(name)s = cudnnSetTensor4dDescriptor( subs = dict(ins=ins[-1], outs=outs, fail=sub['fail'],
%(input_name)s_%(name)s, name=name)
format%(name)s,
CUDNN_DATA_FLOAT, for idx, softmax_input in enumerate(self.softmax_inputs):
CudaNdarray_HOST_DIMS(%(ins)s)[0], subs['name%d' % idx] = softmax_input
CudaNdarray_HOST_DIMS(%(ins)s)[1], subs['ins%d' % idx] = inputs[idx]
CudaNdarray_HOST_DIMS(%(ins)s)[2],
CudaNdarray_HOST_DIMS(%(ins)s)[3]
);
if (err%(name)s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set tensor4d descriptor: %%%%s",
cudnnGetErrorString(err%(name)s));
%(fail)s
}
""" % dict(name=name, input_name=input_name,
ins=ins[input_idx], fail=sub['fail'])
# Build and prepare the output variable. # Build and prepare the output variable.
result += """ result += """
...@@ -1136,34 +1123,15 @@ if (CudaNdarray_prep_output(&%(outs)s, 4, CudaNdarray_HOST_DIMS(%(ins)s)) != 0) ...@@ -1136,34 +1123,15 @@ if (CudaNdarray_prep_output(&%(outs)s, 4, CudaNdarray_HOST_DIMS(%(ins)s)) != 0)
{ {
%(fail)s %(fail)s
} }
""" % subs
err%(name)s = cudnnSetTensor4dDescriptor( result += c_set_tensor4d(outs,
softmax_output_%(name)s, "softmax_output_" + name,
format%(name)s, "err" + name, sub['fail'])
CUDNN_DATA_FLOAT,
CudaNdarray_HOST_DIMS(%(outs)s)[0],
CudaNdarray_HOST_DIMS(%(outs)s)[1],
CudaNdarray_HOST_DIMS(%(outs)s)[2],
CudaNdarray_HOST_DIMS(%(outs)s)[3]
);
if (err%(name)s != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not set out descriptor: %%%%s",
cudnnGetErrorString(err%(name)s));
%(fail)s
}
"""
# Add on a call to the method that does the actual work. # Add on a call to the method that does the actual work.
result += self.method() result += self.method() % subs
subs = dict(ins=ins[-1], outs=outs, fail=sub['fail'], return result
name=name)
for idx, softmax_input in enumerate(self.softmax_inputs):
subs['name%d' % idx] = softmax_input
subs['ins%d' % idx] = inputs[idx]
return result % subs
def c_code_cache_version(self): def c_code_cache_version(self):
return (0, 6) return (0, 6)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论