提交 51637afd authored 作者: nouiz's avatar nouiz

Merge pull request #808 from jaberg/cuda_clear_error

FIX: cuda: clear error flag when handling error
...@@ -2180,16 +2180,30 @@ class GpuAlloc(GpuOp): ...@@ -2180,16 +2180,30 @@ class GpuAlloc(GpuOp):
str += "if(%(out)s==NULL\n" % locals() str += "if(%(out)s==NULL\n" % locals()
for idx,sh in enumerate(shps): for idx,sh in enumerate(shps):
str += "||CudaNdarray_HOST_DIMS(%(out)s)[%(idx)s]!=dims[%(idx)s]" % locals() str += "||CudaNdarray_HOST_DIMS(%(out)s)[%(idx)s]!=dims[%(idx)s]" % locals()
str+="""){ str += """){
Py_XDECREF(%(out)s); Py_XDECREF(%(out)s);
%(out)s= (CudaNdarray*)CudaNdarray_New(); %(out)s = (CudaNdarray*)CudaNdarray_New();
CudaNdarray_alloc_contiguous(%(out)s, %(nd)s, dims); if (!%(out)s)
} {
if (CudaNdarray_CopyFromCudaNdarray(%(out)s, %(value)s, true)) // exception already set
{ %(fail)s;
%(fail)s; }
} if (CudaNdarray_alloc_contiguous(%(out)s, %(nd)s, dims))
""" % locals() {
// exception already set
Py_XDECREF(%(out)s);
%(out)s = NULL;
%(fail)s;
}
}
if (CudaNdarray_CopyFromCudaNdarray(%(out)s, %(value)s, true))
{
// exception already set
Py_XDECREF(%(out)s);
%(out)s = NULL;
%(fail)s;
}
""" % locals()
return str return str
def infer_shape(self, node, input_shapes): def infer_shape(self, node, input_shapes):
...@@ -2200,7 +2214,7 @@ class GpuAlloc(GpuOp): ...@@ -2200,7 +2214,7 @@ class GpuAlloc(GpuOp):
return [None for i in inputs] return [None for i in inputs]
def c_code_cache_version(self): def c_code_cache_version(self):
return (3,) return (4,)
def do_constant_folding(self, node): def do_constant_folding(self, node):
for client in node.outputs[0].clients: for client in node.outputs[0].clients:
......
...@@ -48,6 +48,11 @@ void * device_malloc(size_t size) ...@@ -48,6 +48,11 @@ void * device_malloc(size_t size)
cudaError_t err = cudaMalloc(&rval, size); cudaError_t err = cudaMalloc(&rval, size);
if (cudaSuccess != err) if (cudaSuccess != err)
{ {
// Clear the error flag, cudaMalloc doesn't do it.
// Currently this returns the same thing as err, but if in future
// it returns something else I still don't see why we should ignore
// it. All we want to do here is reset the flag.
cudaGetLastError();
#if COMPUTE_GPU_MEM_USED #if COMPUTE_GPU_MEM_USED
fprintf(stderr, "Error allocating %li bytes of device memory (%s). new total bytes allocated: %d\n", (long)size, cudaGetErrorString(err),_allocated_size); fprintf(stderr, "Error allocating %li bytes of device memory (%s). new total bytes allocated: %d\n", (long)size, cudaGetErrorString(err),_allocated_size);
#else #else
...@@ -86,6 +91,11 @@ int device_free(void *ptr) ...@@ -86,6 +91,11 @@ int device_free(void *ptr)
cudaError_t err = cudaFree(ptr); cudaError_t err = cudaFree(ptr);
if (cudaSuccess != err) if (cudaSuccess != err)
{ {
// Clear the error flag, cudaFree doesn't do it.
// Currently this returns the same thing as err, but if in future
// it returns something else I still don't see why we should ignore
// it. All we want to do here is reset the flag.
cudaGetLastError();
#if COMPUTE_GPU_MEM_USED #if COMPUTE_GPU_MEM_USED
fprintf(stderr, "Error freeing device pointer %p (%s).%d byte already allocated\n", ptr, cudaGetErrorString(err), _allocated_size); fprintf(stderr, "Error freeing device pointer %p (%s).%d byte already allocated\n", ptr, cudaGetErrorString(err), _allocated_size);
#else #else
...@@ -910,6 +920,11 @@ CudaNdarray_TakeFrom(CudaNdarray * self, PyObject *args){ ...@@ -910,6 +920,11 @@ CudaNdarray_TakeFrom(CudaNdarray * self, PyObject *args){
} }
cudaError_t err = cudaMemset((void*)err_var, 0, sizeof(int)); cudaError_t err = cudaMemset((void*)err_var, 0, sizeof(int));
if (cudaSuccess != err) { if (cudaSuccess != err) {
// Clear the error flag, cudaMemset doesn't do it.
// Currently this returns the same thing as err, but if in future
// it returns something else I still don't see why we should ignore
// it. All we want to do here is reset the flag.
cudaGetLastError();
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"Error setting device error code to 0. %s", "Error setting device error code to 0. %s",
cudaGetErrorString(err)); cudaGetErrorString(err));
...@@ -2129,6 +2144,11 @@ CudaNdarray_setitem(PyObject *o, PyObject *key, PyObject *value) ...@@ -2129,6 +2144,11 @@ CudaNdarray_setitem(PyObject *o, PyObject *key, PyObject *value)
Py_XDECREF(rval); Py_XDECREF(rval);
if (err) if (err)
{ {
// Clear the error flag, cudaMemset doesn't do it.
// Currently this returns the same thing as err, but if in future
// it returns something else I still don't see why we should ignore
// it. All we want to do here is reset the flag.
cudaGetLastError();
PyErr_SetString(PyExc_RuntimeError, PyErr_SetString(PyExc_RuntimeError,
"CudaNdarray.__setitem__: cudaMemset failed"); "CudaNdarray.__setitem__: cudaMemset failed");
return -1; return -1;
...@@ -2401,6 +2421,11 @@ GetDeviceMemInfo(PyObject* _unused, PyObject* dummy) ...@@ -2401,6 +2421,11 @@ GetDeviceMemInfo(PyObject* _unused, PyObject* dummy)
cudaError_t err = cudaMemGetInfo(&free, &total); cudaError_t err = cudaMemGetInfo(&free, &total);
if (err != cudaSuccess){ if (err != cudaSuccess){
// Clear the error flag, cudaMemGetInfo doesn't do it.
// Currently this returns the same thing as err, but if in future
// it returns something else I still don't see why we should ignore
// it. All we want to do here is reset the flag.
cudaGetLastError();
PyErr_Format(PyExc_RuntimeError, PyErr_Format(PyExc_RuntimeError,
"Error while getting memory info about the gpu: %s", "Error while getting memory info about the gpu: %s",
cudaGetErrorString(err)); cudaGetErrorString(err));
......
...@@ -36,8 +36,14 @@ class SupportCodeError(Exception): ...@@ -36,8 +36,14 @@ class SupportCodeError(Exception):
class NaiveAlgo(object): class NaiveAlgo(object):
verbose = 0 # 1, 2 or 3 for more verbose output. verbose = 0 # 1, 2 or 3 for more verbose output.
#cache_version = ()
cache_version = (15, verbose) @property
def cache_version(self):
ver = self.scalar_op.c_code_cache_version()
if ver:
return (16, self.verbose, self.sync, ver)
else:
return ver
def __init__(self, scalar_op, sync=True, inplace_pattern=None): def __init__(self, scalar_op, sync=True, inplace_pattern=None):
""" """
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论