提交 18385728 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Set the proper context when destroying the cudnn handle.

This also requires adding the extra sub parameter to c_cleanup_code_struct().
上级 511b7591
...@@ -820,7 +820,8 @@ class CLinker(link.Linker): ...@@ -820,7 +820,8 @@ class CLinker(link.Linker):
pass pass
try: try:
struct_cleanup = op.c_cleanup_code_struct(node, name) struct_cleanup = op.c_cleanup_code_struct(node, name,
sub_struct)
assert isinstance(struct_cleanup, string_types), ( assert isinstance(struct_cleanup, string_types), (
str(node.op) + str(node.op) +
" didn't return a string for c_cleanup_code_struct") " didn't return a string for c_cleanup_code_struct")
......
...@@ -402,7 +402,7 @@ class CLinkerOp(CLinkerObject): ...@@ -402,7 +402,7 @@ class CLinkerOp(CLinkerObject):
node : an Apply instance in the graph being compiled node : an Apply instance in the graph being compiled
name : str name : str
A unique name to distinguish variables from those of other nodes. A unique name to distinguish variables from those of other nodes.
sub sub : dict
A dictionary of values to substitute in the code. A dictionary of values to substitute in the code.
Most notably it contains a 'fail' entry that you should place in Most notably it contains a 'fail' entry that you should place in
your code after setting a python exception to indicate an error. your code after setting a python exception to indicate an error.
...@@ -438,7 +438,7 @@ class CLinkerOp(CLinkerObject): ...@@ -438,7 +438,7 @@ class CLinkerOp(CLinkerObject):
raise utils.MethodNotDefined("c_support_code_struct", raise utils.MethodNotDefined("c_support_code_struct",
type(self), self.__class__.__name__) type(self), self.__class__.__name__)
def c_cleanup_code_struct(self, node, name): def c_cleanup_code_struct(self, node, name, sub):
""" """
Optional: return a code string specific to the apply to be Optional: return a code string specific to the apply to be
inserted in the struct cleanup code. inserted in the struct cleanup code.
...@@ -448,6 +448,10 @@ class CLinkerOp(CLinkerObject): ...@@ -448,6 +448,10 @@ class CLinkerOp(CLinkerObject):
node : an Apply instance in the graph being compiled node : an Apply instance in the graph being compiled
name : str name : str
A unique name to distinguish variables from those of other nodes. A unique name to distinguish variables from those of other nodes.
sub : dict
A dictionary of values to substitute in the code.
Most notably it contains a 'fail' entry that you should place in
your code after setting a python exception to indicate an error.
Raises Raises
------ ------
...@@ -1343,7 +1347,6 @@ class COp(Op): ...@@ -1343,7 +1347,6 @@ class COp(Op):
c_support_code = simple_meth('support_code') c_support_code = simple_meth('support_code')
c_support_code_apply = apply_meth('support_code_apply') c_support_code_apply = apply_meth('support_code_apply')
c_support_code_struct = apply_meth('support_code_struct') c_support_code_struct = apply_meth('support_code_struct')
c_cleanup_code_struct = apply_meth('cleanup_code_struct')
def format_c_function_args(self, inp, out): def format_c_function_args(self, inp, out):
# Generate an string containing the arguments sent to the external C # Generate an string containing the arguments sent to the external C
...@@ -1449,6 +1452,20 @@ class COp(Op): ...@@ -1449,6 +1452,20 @@ class COp(Op):
raise utils.MethodNotDefined( raise utils.MethodNotDefined(
'c_init_code_struct', type(self), type(self).__name__) 'c_init_code_struct', type(self), type(self).__name__)
def c_cleanup_code_struct(self, node, name, sub):
if 'cleanup_code_struct' in self.code_sections:
op_code = self.code_sections['cleanup_code_struct']
def_macros, undef_macros = self.get_c_macros(node, name)
def_sub, undef_sub = self.get_sub_macros(sub)
return os.linesep.join(['', def_macros, def_sub,
op_code,
undef_sub, undef_macros])
else:
raise utils.MethodNotDefined(
'c_cleanup_code_struct', type(self), type(self).__name__)
def c_code(self, node, name, inp, out, sub): def c_code(self, node, name, inp, out, sub):
if self.func_name is not None: if self.func_name is not None:
assert 'code' not in self.code_sections assert 'code' not in self.code_sections
......
...@@ -1532,7 +1532,7 @@ if ((err%(name)s = cudnnCreateTensorDescriptor(&output%(name)s)) != CUDNN_STATUS ...@@ -1532,7 +1532,7 @@ if ((err%(name)s = cudnnCreateTensorDescriptor(&output%(name)s)) != CUDNN_STATUS
} }
""" % dict(name=name, fail=sub['fail']) """ % dict(name=name, fail=sub['fail'])
def c_cleanup_code_struct(self, node, name): def c_cleanup_code_struct(self, node, name, sub):
return """ return """
if (input%(name)s != NULL) { cudnnDestroyTensorDescriptor(input%(name)s); } if (input%(name)s != NULL) { cudnnDestroyTensorDescriptor(input%(name)s); }
if (output%(name)s != NULL) { cudnnDestroyTensorDescriptor(output%(name)s); } if (output%(name)s != NULL) { cudnnDestroyTensorDescriptor(output%(name)s); }
...@@ -1714,7 +1714,7 @@ if ((err%(name)s = cudnnCreateTensorDescriptor(&output_grad%(name)s)) != CUDNN_S ...@@ -1714,7 +1714,7 @@ if ((err%(name)s = cudnnCreateTensorDescriptor(&output_grad%(name)s)) != CUDNN_S
} }
""" % dict(name=name, fail=sub['fail']) """ % dict(name=name, fail=sub['fail'])
def c_cleanup_code_struct(self, node, name): def c_cleanup_code_struct(self, node, name, sub):
return """ return """
if (input%(name)s != NULL) { cudnnDestroyTensorDescriptor(input%(name)s); } if (input%(name)s != NULL) { cudnnDestroyTensorDescriptor(input%(name)s); }
if (input_grad%(name)s != NULL) { cudnnDestroyTensorDescriptor(input_grad%(name)s); } if (input_grad%(name)s != NULL) { cudnnDestroyTensorDescriptor(input_grad%(name)s); }
...@@ -1938,7 +1938,7 @@ cudnnStatus_t err%(name)s; ...@@ -1938,7 +1938,7 @@ cudnnStatus_t err%(name)s;
result += self._init_tensor4d_desc(name, id, sub['fail']) result += self._init_tensor4d_desc(name, id, sub['fail'])
return result return result
def c_cleanup_code_struct(self, node, name): def c_cleanup_code_struct(self, node, name, sub):
result = '' result = ''
for id in self.tensor_4d_descs: for id in self.tensor_4d_descs:
result += self._clean_tensor4d_desc(name, id) result += self._clean_tensor4d_desc(name, id)
......
...@@ -284,7 +284,7 @@ class GpuKernelBase(object): ...@@ -284,7 +284,7 @@ class GpuKernelBase(object):
def _generate_kernel_cleanup(self, k): def _generate_kernel_cleanup(self, k):
return "GpuKernel_clear(&%(ovar)s);" % dict(ovar=k.objvar) return "GpuKernel_clear(&%(ovar)s);" % dict(ovar=k.objvar)
def c_cleanup_code_struct(self, node, name): def c_cleanup_code_struct(self, node, name, sub):
kernels = self.gpu_kernels(node, name) kernels = self.gpu_kernels(node, name)
cleanups = '\n'.join(self._generate_kernel_cleanup(k) for k in kernels) cleanups = '\n'.join(self._generate_kernel_cleanup(k) for k in kernels)
return cleanups return cleanups
......
...@@ -128,4 +128,6 @@ cudnnHandle_t APPLY_SPECIFIC(_handle); ...@@ -128,4 +128,6 @@ cudnnHandle_t APPLY_SPECIFIC(_handle);
#section cleanup_code_struct #section cleanup_code_struct
cuda_enter(PARAMS->ctx);
cudnnDestroy(APPLY_SPECIFIC(_handle)); cudnnDestroy(APPLY_SPECIFIC(_handle));
cuda_exit(PARAMS->ctx);
...@@ -139,7 +139,7 @@ if (GpuKernel_init(&k_%(name)s, c->ops, c->ctx, 1, &bcode, &sz, ...@@ -139,7 +139,7 @@ if (GpuKernel_init(&k_%(name)s, c->ops, c->ctx, 1, &bcode, &sz,
codel.append(self.init_gpukernel(name, sub['fail'])) codel.append(self.init_gpukernel(name, sub['fail']))
return '\n'.join(codel) return '\n'.join(codel)
def c_cleanup_code_struct(self, node, nodename): def c_cleanup_code_struct(self, node, nodename, sub):
codel = [] codel = []
for name in self.KERN_NAMES: for name in self.KERN_NAMES:
codel.append("GpuKernel_clear(&k_{0});".format(name)) codel.append("GpuKernel_clear(&k_{0});".format(name))
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论