Merge pull request #808 from jaberg/cuda_clear_error

FIX: cuda: clear error flag when handling error

Merge pull request #808 from jaberg/cuda_clear_error
51637afd · nouiz · d1e06e3a · 032279b8 · 51637afd · 51637afd
--- a/theano/sandbox/cuda/basic_ops.py
+++ b/theano/sandbox/cuda/basic_ops.py
@@ -2180,16 +2180,30 @@ class GpuAlloc(GpuOp):
        str += "if(%(out)s==NULL\n" % locals()
        for idx,sh in enumerate(shps):
            str += "||CudaNdarray_HOST_DIMS(%(out)s)[%(idx)s]!=dims[%(idx)s]" % locals()
-        str+="""){
+        str += """){
-        Py_XDECREF(%(out)s);
+            Py_XDECREF(%(out)s);
-        %(out)s= (CudaNdarray*)CudaNdarray_New();
+            %(out)s = (CudaNdarray*)CudaNdarray_New();
-        CudaNdarray_alloc_contiguous(%(out)s, %(nd)s, dims);
+            if (!%(out)s)
-    }
+            {
-    if (CudaNdarray_CopyFromCudaNdarray(%(out)s, %(value)s, true))
+                // exception already set
-    {
+                %(fail)s;
-    %(fail)s;
+            }
-    }
+            if (CudaNdarray_alloc_contiguous(%(out)s, %(nd)s, dims))
-""" % locals()
+            {
+                // exception already set
+                Py_XDECREF(%(out)s);
+                %(out)s = NULL;
+                %(fail)s;
+            }
+        }
+        if (CudaNdarray_CopyFromCudaNdarray(%(out)s, %(value)s, true))
+        {
+            // exception already set
+            Py_XDECREF(%(out)s);
+            %(out)s = NULL;
+            %(fail)s;
+        }
+        """ % locals()
        return str
    def infer_shape(self, node, input_shapes):
@@ -2200,7 +2214,7 @@ class GpuAlloc(GpuOp):
        return [None for i in inputs]
    def c_code_cache_version(self):
-        return (3,)
+        return (4,)
    def do_constant_folding(self, node):
        for client in node.outputs[0].clients:

--- a/theano/sandbox/cuda/cuda_ndarray.cu
+++ b/theano/sandbox/cuda/cuda_ndarray.cu
@@ -48,6 +48,11 @@ void * device_malloc(size_t size)
    cudaError_t err = cudaMalloc(&rval, size);
    if (cudaSuccess != err)
    {
+        // Clear the error flag, cudaMalloc doesn't do it.
+        // Currently this returns the same thing as err, but if in future
+        // it returns something else I still don't see why we should ignore
+        // it.  All we want to do here is reset the flag.
+        cudaGetLastError();
 #if COMPUTE_GPU_MEM_USED
        fprintf(stderr, "Error allocating %li bytes of device memory (%s). new total bytes allocated: %d\n", (long)size, cudaGetErrorString(err),_allocated_size);
 #else
@@ -86,6 +91,11 @@ int device_free(void *ptr)
    cudaError_t err =  cudaFree(ptr);
    if (cudaSuccess != err)
    {
+        // Clear the error flag, cudaFree doesn't do it.
+        // Currently this returns the same thing as err, but if in future
+        // it returns something else I still don't see why we should ignore
+        // it.  All we want to do here is reset the flag.
+        cudaGetLastError();
 #if COMPUTE_GPU_MEM_USED
        fprintf(stderr, "Error freeing device pointer %p (%s).%d byte already allocated\n", ptr, cudaGetErrorString(err), _allocated_size);
 #else
@@ -910,6 +920,11 @@ CudaNdarray_TakeFrom(CudaNdarray * self, PyObject *args){
        }
        cudaError_t err = cudaMemset((void*)err_var, 0, sizeof(int));
        if (cudaSuccess != err) {
+            // Clear the error flag, cudaMemset doesn't do it.
+            // Currently this returns the same thing as err, but if in future
+            // it returns something else I still don't see why we should ignore
+            // it.  All we want to do here is reset the flag.
+            cudaGetLastError();
            PyErr_Format(PyExc_RuntimeError,
                         "Error setting device error code to 0. %s",
                         cudaGetErrorString(err));
@@ -2129,6 +2144,11 @@ CudaNdarray_setitem(PyObject *o, PyObject  *key, PyObject  *value)
            Py_XDECREF(rval);
            if (err)
            {
+                // Clear the error flag, cudaMemset doesn't do it.
+                // Currently this returns the same thing as err, but if in future
+                // it returns something else I still don't see why we should ignore
+                // it.  All we want to do here is reset the flag.
+                cudaGetLastError();
                PyErr_SetString(PyExc_RuntimeError,
                                "CudaNdarray.__setitem__: cudaMemset failed");
                return -1;
@@ -2401,6 +2421,11 @@ GetDeviceMemInfo(PyObject* _unused, PyObject* dummy)
    cudaError_t err = cudaMemGetInfo(&free, &total);
    if (err != cudaSuccess){
+        // Clear the error flag, cudaMemGetInfo doesn't do it.
+        // Currently this returns the same thing as err, but if in future
+        // it returns something else I still don't see why we should ignore
+        // it.  All we want to do here is reset the flag.
+        cudaGetLastError();
        PyErr_Format(PyExc_RuntimeError,
                     "Error while getting memory info about the gpu: %s",
                     cudaGetErrorString(err));

--- a/theano/sandbox/cuda/elemwise.py
+++ b/theano/sandbox/cuda/elemwise.py
@@ -36,8 +36,14 @@ class SupportCodeError(Exception):
 class NaiveAlgo(object):
    verbose = 0 # 1, 2 or 3 for more verbose output.
-    #cache_version = ()
-    cache_version = (15, verbose)
+    @property
+    def cache_version(self):
+        ver = self.scalar_op.c_code_cache_version()
+        if ver:
+            return (16, self.verbose, self.sync, ver)
+        else:
+            return ver
    def __init__(self, scalar_op, sync=True, inplace_pattern=None):
        """