提交 e34f6e59 authored 作者: abergeron's avatar abergeron

Merge pull request #2355 from nouiz/cudnn_repair_r1

[CRASH] Repair crash with cudnn r-1 following the removing of struct_id
...@@ -249,42 +249,42 @@ class GpuDnnConvDesc(GpuOp): ...@@ -249,42 +249,42 @@ class GpuDnnConvDesc(GpuOp):
class GpuDnnConvBase(DnnBase): class GpuDnnConvBase(DnnBase):
__props__ = () __props__ = ()
def c_support_code_struct(self, node, struct_id): def c_support_code_struct(self, node, name):
return """ return """
cudnnTensor4dDescriptor_t input%(id)d; cudnnTensor4dDescriptor_t input%(name)s;
cudnnTensor4dDescriptor_t output%(id)d; cudnnTensor4dDescriptor_t output%(name)s;
cudnnFilterDescriptor_t kerns%(id)d; cudnnFilterDescriptor_t kerns%(name)s;
""" % dict(id=struct_id) """ % dict(name=name)
def c_init_code_struct(self, node, struct_id, sub): def c_init_code_struct(self, node, name, sub):
return """ return """
cudnnStatus_t err%(id)d; cudnnStatus_t err%(name)s;
input%(id)d = NULL; input%(name)s = NULL;
output%(id)d = NULL; output%(name)s = NULL;
kerns%(id)d = NULL; kerns%(name)s = NULL;
if ((err%(id)d = cudnnCreateTensor4dDescriptor(&input%(id)d)) != CUDNN_STATUS_SUCCESS) { if ((err%(name)s = cudnnCreateTensor4dDescriptor(&input%(name)s)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor " PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor "
"(inp): %%s", cudnnGetErrorString(err%(id)d)); "(inp): %%s", cudnnGetErrorString(err%(name)s));
%(fail)s %(fail)s
} }
if ((err%(id)d = cudnnCreateTensor4dDescriptor(&output%(id)d)) != CUDNN_STATUS_SUCCESS) { if ((err%(name)s = cudnnCreateTensor4dDescriptor(&output%(name)s)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor " PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor "
"(out): %%s", cudnnGetErrorString(err%(id)d)); "(out): %%s", cudnnGetErrorString(err%(name)s));
%(fail)s %(fail)s
} }
if ((err%(id)d = cudnnCreateFilterDescriptor(&kerns%(id)d)) != CUDNN_STATUS_SUCCESS) { if ((err%(name)s = cudnnCreateFilterDescriptor(&kerns%(name)s)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate filter descriptor: %%s", PyErr_Format(PyExc_MemoryError, "could not allocate filter descriptor: %%s",
cudnnGetErrorString(err%(id)d)); cudnnGetErrorString(err%(name)s));
%(fail)s %(fail)s
} }
""" % dict(id=struct_id, fail=sub['fail']) """ % dict(name=name, fail=sub['fail'])
def c_cleanup_code_struct(self, node, struct_id): def c_cleanup_code_struct(self, node, name):
return """ return """
if (input%(id)d != NULL) {cudnnDestroyTensor4dDescriptor(input%(id)d);} if (input%(name)s != NULL) {cudnnDestroyTensor4dDescriptor(input%(name)s);}
if (output%(id)d != NULL) {cudnnDestroyTensor4dDescriptor(output%(id)d);} if (output%(name)s != NULL) {cudnnDestroyTensor4dDescriptor(output%(name)s);}
if (kerns%(id)d != NULL) {cudnnDestroyFilterDescriptor(kerns%(id)d);} if (kerns%(name)s != NULL) {cudnnDestroyFilterDescriptor(kerns%(name)s);}
""" % dict(id=struct_id) """ % dict(name=name)
def c_set_filter(self, var, desc, err, fail): def c_set_filter(self, var, desc, err, fail):
return """ return """
...@@ -320,11 +320,11 @@ if (!CudaNdarray_is_c_contiguous(%s)) { ...@@ -320,11 +320,11 @@ if (!CudaNdarray_is_c_contiguous(%s)) {
sets = [] sets = []
for p, v, d in zip(inputs[:2], self.conv_inputs, self.conv_types[:2]): for p, v, d in zip(inputs[:2], self.conv_inputs, self.conv_types[:2]):
sets.append(getattr(self, 'c_set_'+d)(p, v + str(sub['struct_id']), sets.append(getattr(self, 'c_set_'+d)(p, v + name,
'err' + name, sub['fail'])) 'err' + name, sub['fail']))
set_out = getattr(self, 'c_set_' + self.conv_types[2])( set_out = getattr(self, 'c_set_' + self.conv_types[2])(
out, self.conv_output + str(sub['struct_id']), 'err' + name, out, self.conv_output + name, 'err' + name,
sub['fail']) sub['fail'])
return """ return """
...@@ -377,12 +377,12 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) { ...@@ -377,12 +377,12 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
cudnnGetErrorString(err%(name)s)); cudnnGetErrorString(err%(name)s));
%(fail)s %(fail)s
} }
""" % dict(out=out, desc=desc, fail=sub['fail'], id=sub['struct_id'], """ % dict(out=out, desc=desc, fail=sub['fail'],
name=name, checks='\n'.join(checks), sets='\n'.join(sets), name=name, checks='\n'.join(checks), sets='\n'.join(sets),
set_out=set_out, input1=inputs[0], input2=inputs[1], set_out=set_out, input1=inputs[0], input2=inputs[1],
input1_desc=self.conv_inputs[0]+str(sub['struct_id']), input1_desc=self.conv_inputs[0]+name,
input2_desc=self.conv_inputs[1]+str(sub['struct_id']), input2_desc=self.conv_inputs[1]+name,
output_desc=self.conv_output+str(sub['struct_id']), output_desc=self.conv_output+name,
method=self.conv_op, path=self.path_flag) method=self.conv_op, path=self.path_flag)
def c_code_cache_version(self): def c_code_cache_version(self):
...@@ -667,7 +667,7 @@ class GpuDnnPoolDesc(GpuOp): ...@@ -667,7 +667,7 @@ class GpuDnnPoolDesc(GpuOp):
err = cudnnSetPoolingDescriptor( err = cudnnSetPoolingDescriptor(
%(desc)s, %(desc)s,
%(mode_flag)s, %(mode_flag)s,
%(wsX)d, %(wsY)d, %(wsX)d, %(wsY)d,
%(stridex)d, %(stridey)d %(stridex)d, %(stridey)d
); );
...@@ -707,43 +707,43 @@ class GpuDnnPool(DnnBase): ...@@ -707,43 +707,43 @@ class GpuDnnPool(DnnBase):
return Apply(self, [img, desc], return Apply(self, [img, desc],
[img.type()]) [img.type()])
def c_support_code_struct(self, node, struct_id): def c_support_code_struct(self, node, name):
return """ return """
cudnnTensor4dDescriptor_t input%(id)d; cudnnTensor4dDescriptor_t input%(name)s;
cudnnTensor4dDescriptor_t output%(id)d; cudnnTensor4dDescriptor_t output%(name)s;
""" % dict(id=struct_id) """ % dict(name=name)
def c_init_code_struct(self, node, struct_id, sub): def c_init_code_struct(self, node, name, sub):
return """ return """
cudnnStatus_t err%(id)d; cudnnStatus_t err%(name)s;
input%(id)d = NULL; input%(name)s = NULL;
output%(id)d = NULL; output%(name)s = NULL;
if ((err%(id)d = cudnnCreateTensor4dDescriptor(&input%(id)d)) != CUDNN_STATUS_SUCCESS) { if ((err%(name)s = cudnnCreateTensor4dDescriptor(&input%(name)s)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor " PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor "
"(inp): %%s", cudnnGetErrorString(err%(id)d)); "(inp): %%s", cudnnGetErrorString(err%(name)s));
%(fail)s %(fail)s
} }
if ((err%(id)d = cudnnCreateTensor4dDescriptor(&output%(id)d)) != CUDNN_STATUS_SUCCESS) { if ((err%(name)s = cudnnCreateTensor4dDescriptor(&output%(name)s)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor " PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor "
"(out): %%s", cudnnGetErrorString(err%(id)d)); "(out): %%s", cudnnGetErrorString(err%(name)s));
%(fail)s %(fail)s
} }
""" % dict(id=struct_id, fail=sub['fail']) """ % dict(name=name, fail=sub['fail'])
def c_cleanup_code_struct(self, node, struct_id): def c_cleanup_code_struct(self, node, name):
return """ return """
if (input%(id)d != NULL) { cudnnDestroyTensor4dDescriptor(input%(id)d); } if (input%(name)s != NULL) { cudnnDestroyTensor4dDescriptor(input%(name)s); }
if (output%(id)d != NULL) { cudnnDestroyTensor4dDescriptor(output%(id)d); } if (output%(name)s != NULL) { cudnnDestroyTensor4dDescriptor(output%(name)s); }
""" % dict(id=struct_id) """ % dict(name=name)
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
desc = inputs[1] desc = inputs[1]
out, = outputs out, = outputs
set_in = c_set_tensor4d(inputs[0], "input" + str(sub['struct_id']), set_in = c_set_tensor4d(inputs[0], "input" + str(name),
'err' + name, sub['fail']) 'err' + name, sub['fail'])
set_out = c_set_tensor4d(out, "output" + str(sub['struct_id']), set_out = c_set_tensor4d(out, "output" + str(name),
'err' + name, sub['fail']) 'err' + name, sub['fail'])
return """ return """
...@@ -794,11 +794,11 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) { ...@@ -794,11 +794,11 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
cudnnGetErrorString(err%(name)s)); cudnnGetErrorString(err%(name)s));
%(fail)s %(fail)s
} }
""" % dict(out=out, desc=desc, fail=sub['fail'], id=sub['struct_id'], """ % dict(out=out, desc=desc, fail=sub['fail'],
name=name, set_in=set_in, name=name, set_in=set_in,
set_out=set_out, input=inputs[0], set_out=set_out, input=inputs[0],
input_desc="input"+str(sub['struct_id']), input_desc="input"+name,
output_desc="output"+str(sub['struct_id'])) output_desc="output"+name)
def grad(self, inp, grads): def grad(self, inp, grads):
img, desc = inp img, desc = inp
...@@ -851,54 +851,54 @@ class GpuDnnPoolGrad(DnnBase): ...@@ -851,54 +851,54 @@ class GpuDnnPoolGrad(DnnBase):
return Apply(self, [inp, out, inp_grad, desc], return Apply(self, [inp, out, inp_grad, desc],
[inp.type()]) [inp.type()])
def c_support_code_struct(self, node, struct_id): def c_support_code_struct(self, node, name):
return """ return """
cudnnTensor4dDescriptor_t input%(id)d; cudnnTensor4dDescriptor_t input%(name)s;
cudnnTensor4dDescriptor_t input_grad%(id)d; cudnnTensor4dDescriptor_t input_grad%(name)s;
cudnnTensor4dDescriptor_t output%(id)d; cudnnTensor4dDescriptor_t output%(name)s;
cudnnTensor4dDescriptor_t output_grad%(id)d; cudnnTensor4dDescriptor_t output_grad%(name)s;
""" % dict(id=struct_id) """ % dict(name=name)
def c_init_code_struct(self, node, struct_id, sub): def c_init_code_struct(self, node, name, sub):
return """ return """
cudnnStatus_t err%(id)d; cudnnStatus_t err%(name)s;
input%(id)d = NULL; input%(name)s = NULL;
input_grad%(id)d = NULL; input_grad%(name)s = NULL;
output%(id)d = NULL; output%(name)s = NULL;
output_grad%(id)d = NULL; output_grad%(name)s = NULL;
if ((err%(id)d = cudnnCreateTensor4dDescriptor(&input%(id)d)) != CUDNN_STATUS_SUCCESS) { if ((err%(name)s = cudnnCreateTensor4dDescriptor(&input%(name)s)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, PyErr_Format(PyExc_MemoryError,
"GpuDnnPoolGrad: could not allocate tensor4d descriptor " "GpuDnnPoolGrad: could not allocate tensor4d descriptor "
"(input): %%s", cudnnGetErrorString(err%(id)d)); "(input): %%s", cudnnGetErrorString(err%(name)s));
%(fail)s %(fail)s
} }
if ((err%(id)d = cudnnCreateTensor4dDescriptor(&input_grad%(id)d)) != CUDNN_STATUS_SUCCESS) { if ((err%(name)s = cudnnCreateTensor4dDescriptor(&input_grad%(name)s)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, PyErr_Format(PyExc_MemoryError,
"GpuDnnPoolGrad: could not allocate tensor4d descriptor " "GpuDnnPoolGrad: could not allocate tensor4d descriptor "
"(input_grad): %%s", cudnnGetErrorString(err%(id)d)); "(input_grad): %%s", cudnnGetErrorString(err%(name)s));
%(fail)s %(fail)s
} }
if ((err%(id)d = cudnnCreateTensor4dDescriptor(&output%(id)d)) != CUDNN_STATUS_SUCCESS) { if ((err%(name)s = cudnnCreateTensor4dDescriptor(&output%(name)s)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, PyErr_Format(PyExc_MemoryError,
"GpuDnnPoolGrad: could not allocate tensor4d descriptor " "GpuDnnPoolGrad: could not allocate tensor4d descriptor "
"(output): %%s", cudnnGetErrorString(err%(id)d)); "(output): %%s", cudnnGetErrorString(err%(name)s));
%(fail)s %(fail)s
} }
if ((err%(id)d = cudnnCreateTensor4dDescriptor(&output_grad%(id)d)) != CUDNN_STATUS_SUCCESS) { if ((err%(name)s = cudnnCreateTensor4dDescriptor(&output_grad%(name)s)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, PyErr_Format(PyExc_MemoryError,
"GpuDnnPoolGrad: could not allocate tensor4d descriptor " "GpuDnnPoolGrad: could not allocate tensor4d descriptor "
"(output_grad): %%s", cudnnGetErrorString(err%(id)d)); "(output_grad): %%s", cudnnGetErrorString(err%(name)s));
%(fail)s %(fail)s
} }
""" % dict(id=struct_id, fail=sub['fail']) """ % dict(name=name, fail=sub['fail'])
def c_cleanup_code_struct(self, node, struct_id): def c_cleanup_code_struct(self, node, name):
return """ return """
if (input%(id)d != NULL) { cudnnDestroyTensor4dDescriptor(input%(id)d); } if (input%(name)s != NULL) { cudnnDestroyTensor4dDescriptor(input%(name)s); }
if (input_grad%(id)d != NULL) { cudnnDestroyTensor4dDescriptor(input_grad%(id)d); } if (input_grad%(name)s != NULL) { cudnnDestroyTensor4dDescriptor(input_grad%(name)s); }
if (output%(id)d != NULL) { cudnnDestroyTensor4dDescriptor(output%(id)d); } if (output%(name)s != NULL) { cudnnDestroyTensor4dDescriptor(output%(name)s); }
if (output_grad%(id)d != NULL) { cudnnDestroyTensor4dDescriptor(output_grad%(id)d); } if (output_grad%(name)s != NULL) { cudnnDestroyTensor4dDescriptor(output_grad%(name)s); }
""" % dict(id=struct_id) """ % dict(name=name)
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
# Here the name out and inp are based on the cudnn definition. # Here the name out and inp are based on the cudnn definition.
...@@ -908,15 +908,15 @@ if (output_grad%(id)d != NULL) { cudnnDestroyTensor4dDescriptor(output_grad%(id) ...@@ -908,15 +908,15 @@ if (output_grad%(id)d != NULL) { cudnnDestroyTensor4dDescriptor(output_grad%(id)
out_grad, = outputs out_grad, = outputs
set_in = "\n".join([ set_in = "\n".join([
c_set_tensor4d(inp, "input" + str(sub['struct_id']), c_set_tensor4d(inp, "input" + name,
'err' + name, sub['fail']), 'err' + name, sub['fail']),
c_set_tensor4d(inp_grad, "input_grad" + str(sub['struct_id']), c_set_tensor4d(inp_grad, "input_grad" + name,
'err' + name, sub['fail']), 'err' + name, sub['fail']),
c_set_tensor4d(out, "output" + str(sub['struct_id']), c_set_tensor4d(out, "output" + name,
'err' + name, sub['fail']) 'err' + name, sub['fail'])
]) ])
set_out = c_set_tensor4d(out, "output_grad" + str(sub['struct_id']), set_out = c_set_tensor4d(out, "output_grad" + name,
'err' + name, sub['fail']) 'err' + name, sub['fail'])
return """ return """
...@@ -965,13 +965,13 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) { ...@@ -965,13 +965,13 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
%(fail)s %(fail)s
} }
""" % dict(output_grad=out_grad, desc=desc, """ % dict(output_grad=out_grad, desc=desc,
fail=sub['fail'], id=sub['struct_id'], fail=sub['fail'],
name=name, set_in=set_in, name=name, set_in=set_in,
set_out=set_out, input=inp, input_grad=inp_grad, output=out, set_out=set_out, input=inp, input_grad=inp_grad, output=out,
input_desc="input"+str(sub['struct_id']), input_desc="input"+name,
input_grad_desc="input_grad"+str(sub['struct_id']), input_grad_desc="input_grad"+name,
output_desc="output"+str(sub['struct_id']), output_desc="output"+name,
output_grad_desc="output_grad"+str(sub['struct_id'])) output_grad_desc="output_grad"+name)
def c_code_cache_version(self): def c_code_cache_version(self):
return (4,) return (4,)
...@@ -1029,44 +1029,44 @@ class GpuDnnSoftmaxBase(DnnBase): ...@@ -1029,44 +1029,44 @@ class GpuDnnSoftmaxBase(DnnBase):
def _define_tensor4d_desc(self, name, id): def _define_tensor4d_desc(self, name, id):
return """ return """
cudnnTensor4dDescriptor_t %(name)s_%(id)d; cudnnTensor4dDescriptor_t %(name)s_%(id)s;
""" % dict(name=name, id=id) """ % dict(name=name, id=id)
def _init_tensor4d_desc(self, name, id, fail): def _init_tensor4d_desc(self, name, id, fail):
return """ return """
%(name)s_%(id)d = NULL; %(name)s_%(id)s = NULL;
if ((err%(id)d = cudnnCreateTensor4dDescriptor(&%(name)s_%(id)d)) != CUDNN_STATUS_SUCCESS) { if ((err%(name)s = cudnnCreateTensor4dDescriptor(&%(name)s_%(id)s)) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor " PyErr_Format(PyExc_MemoryError, "could not allocate tensor4d descriptor "
": %%s", cudnnGetErrorString(err%(id)d)); ": %%s", cudnnGetErrorString(err%(name)s));
%(fail)s %(fail)s
} }
""" % dict(name=name, id=id, fail=fail) """ % dict(name=name, id=id, fail=fail)
def _clean_tensor4d_desc(self, name, id): def _clean_tensor4d_desc(self, name, id):
return """ return """
if(%(name)s_%(id)d!= NULL) if(%(name)s_%(name)s!= NULL)
cudnnDestroyTensor4dDescriptor(%(name)s_%(id)d); cudnnDestroyTensor4dDescriptor(%(name)s_%(id)s);
""" % dict(name=name, id=id) """ % dict(name=name, id=id)
def c_support_code_struct(self, node, struct_id): def c_support_code_struct(self, node, name):
result = '' result = ''
for name in self.tensor_4d_descs: for name in self.tensor_4d_descs:
result += self._define_tensor4d_desc(name, struct_id) result += self._define_tensor4d_desc(name, name)
return result return result
def c_init_code_struct(self, node, struct_id, sub): def c_init_code_struct(self, node, name, sub):
result = """ result = """
cudnnStatus_t err%(id)d; cudnnStatus_t err%(name)s;
""" % dict(id=struct_id) """ % dict(name=name)
for name in self.tensor_4d_descs: for name in self.tensor_4d_descs:
result += self._init_tensor4d_desc(name, struct_id, sub['fail']) result += self._init_tensor4d_desc(name, name, sub['fail'])
return result return result
def c_cleanup_code_struct(self, node, struct_id): def c_cleanup_code_struct(self, node, name):
result = '' result = ''
for name in self.tensor_4d_descs: for name in self.tensor_4d_descs:
result += self._clean_tensor4d_desc(name, struct_id) result += self._clean_tensor4d_desc(name, name)
return result return result
def c_code(self, node, name, inputs, outputs, sub): def c_code(self, node, name, inputs, outputs, sub):
...@@ -1091,18 +1091,18 @@ cudnnStatus_t err%(id)d; ...@@ -1091,18 +1091,18 @@ cudnnStatus_t err%(id)d;
# Setup configuration variables. # Setup configuration variables.
result = """ result = """
cudnnStatus_t err%(name)s; cudnnStatus_t err%(name)s;
cudnnTensorFormat_t format%(id)d = CUDNN_TENSOR_NCHW; cudnnTensorFormat_t format%(name)s = CUDNN_TENSOR_NCHW;
if (%(tensor_format)d == 1) if (%(tensor_format)d == 1)
format%(id)d = CUDNN_TENSOR_NHWC; format%(name)s = CUDNN_TENSOR_NHWC;
cudnnSoftmaxAlgorithm_t algo%(id)d = CUDNN_SOFTMAX_ACCURATE; cudnnSoftmaxAlgorithm_t algo%(name)s = CUDNN_SOFTMAX_ACCURATE;
if (%(algo)d == 1) if (%(algo)d == 1)
algo%(id)d = CUDNN_SOFTMAX_FAST; algo%(name)s = CUDNN_SOFTMAX_FAST;
cudnnSoftmaxMode_t mode%(id)d = CUDNN_SOFTMAX_MODE_CHANNEL; cudnnSoftmaxMode_t mode%(name)s = CUDNN_SOFTMAX_MODE_CHANNEL;
if (%(mode)d == 1) if (%(mode)d == 1)
mode%(id)d = CUDNN_SOFTMAX_MODE_INSTANCE; mode%(name)s = CUDNN_SOFTMAX_MODE_INSTANCE;
""" % dict(id=sub['struct_id'], name=name, """ % dict(name=name,
tensor_format=tensor_format, mode=mode, algo=algo) tensor_format=tensor_format, mode=mode, algo=algo)
# Validate the input and build the input variables. # Validate the input and build the input variables.
...@@ -1114,8 +1114,8 @@ if (!CudaNdarray_is_c_contiguous(%(ins)s)) { ...@@ -1114,8 +1114,8 @@ if (!CudaNdarray_is_c_contiguous(%(ins)s)) {
} }
err%(name)s = cudnnSetTensor4dDescriptor( err%(name)s = cudnnSetTensor4dDescriptor(
%(input_name)s_%(id)d, %(input_name)s_%(name)s,
format%(id)d, format%(name)s,
CUDNN_DATA_FLOAT, CUDNN_DATA_FLOAT,
CudaNdarray_HOST_DIMS(%(ins)s)[0], CudaNdarray_HOST_DIMS(%(ins)s)[0],
CudaNdarray_HOST_DIMS(%(ins)s)[1], CudaNdarray_HOST_DIMS(%(ins)s)[1],
...@@ -1127,7 +1127,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) { ...@@ -1127,7 +1127,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
cudnnGetErrorString(err%(name)s)); cudnnGetErrorString(err%(name)s));
%(fail)s %(fail)s
} }
""" % dict(id=sub['struct_id'], name=name, input_name=input_name, """ % dict(name=name, input_name=input_name,
ins=ins[input_idx], fail=sub['fail']) ins=ins[input_idx], fail=sub['fail'])
# Build and prepare the output variable. # Build and prepare the output variable.
...@@ -1138,8 +1138,8 @@ if (CudaNdarray_prep_output(&%(outs)s, 4, CudaNdarray_HOST_DIMS(%(ins)s)) != 0) ...@@ -1138,8 +1138,8 @@ if (CudaNdarray_prep_output(&%(outs)s, 4, CudaNdarray_HOST_DIMS(%(ins)s)) != 0)
} }
err%(name)s = cudnnSetTensor4dDescriptor( err%(name)s = cudnnSetTensor4dDescriptor(
softmax_output_%(id)d, softmax_output_%(name)s,
format%(id)d, format%(name)s,
CUDNN_DATA_FLOAT, CUDNN_DATA_FLOAT,
CudaNdarray_HOST_DIMS(%(outs)s)[0], CudaNdarray_HOST_DIMS(%(outs)s)[0],
CudaNdarray_HOST_DIMS(%(outs)s)[1], CudaNdarray_HOST_DIMS(%(outs)s)[1],
...@@ -1157,7 +1157,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) { ...@@ -1157,7 +1157,7 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
result += self.method() result += self.method()
subs = dict(ins=ins[-1], outs=outs, fail=sub['fail'], subs = dict(ins=ins[-1], outs=outs, fail=sub['fail'],
id=sub['struct_id'], name=name) name=name)
for idx, softmax_input in enumerate(self.softmax_inputs): for idx, softmax_input in enumerate(self.softmax_inputs):
subs['name%d' % idx] = softmax_input subs['name%d' % idx] = softmax_input
...@@ -1184,11 +1184,11 @@ class GpuDnnSoftmax(GpuDnnSoftmaxBase): ...@@ -1184,11 +1184,11 @@ class GpuDnnSoftmax(GpuDnnSoftmaxBase):
return """ return """
err%(name)s = cudnnSoftmaxForward( err%(name)s = cudnnSoftmaxForward(
_handle, _handle,
algo%(id)d, algo%(name)s,
mode%(id)d, mode%(name)s,
softmax_input_%(id)d, softmax_input_%(name)s,
CudaNdarray_DEV_DATA(%(ins)s), CudaNdarray_DEV_DATA(%(ins)s),
softmax_output_%(id)d, softmax_output_%(name)s,
CudaNdarray_DEV_DATA(%(outs)s) CudaNdarray_DEV_DATA(%(outs)s)
); );
""" """
...@@ -1218,13 +1218,13 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase): ...@@ -1218,13 +1218,13 @@ class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
return """ return """
err%(name)s = cudnnSoftmaxBackward( err%(name)s = cudnnSoftmaxBackward(
_handle, _handle,
algo%(id)d, algo%(name)s,
mode%(id)d, mode%(name)s,
%(name1)s_%(id)d, %(name1)s_%(name)s,
CudaNdarray_DEV_DATA(%(ins1)s), CudaNdarray_DEV_DATA(%(ins1)s),
%(name0)s_%(id)d, %(name0)s_%(name)s,
CudaNdarray_DEV_DATA(%(ins0)s), CudaNdarray_DEV_DATA(%(ins0)s),
softmax_output_%(id)d, softmax_output_%(name)s,
CudaNdarray_DEV_DATA(%(outs)s) CudaNdarray_DEV_DATA(%(outs)s)
); );
""" """
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论