Provide more detailed error messages when CUDA initialization fails; make cuda_ndarray.cu more DRY.

e3523c06 · Josh Bleecher Snyder · 5751083b · e3523c06 · e3523c06
--- a/theano/sandbox/cuda/__init__.py
+++ b/theano/sandbox/cuda/__init__.py
@@ -90,7 +90,13 @@ except Exception, e:
    set_cuda_disabled()
 if cuda_available:
-    cuda_available=device_available()
+    try:
+        gpu_init()
+        cuda_available = True
+        cuda_initialization_error_message = ""
+    except EnvironmentError, e:
+        cuda_available = False
+        cuda_initialization_error_message = e.message
 if cuda_available:
    #check if their is an old cuda_ndarray that was loading instead of the one we compiled!
@@ -121,9 +127,10 @@ if cuda_available:
 def use(device, force=False):
    global cuda_enabled
    if force and not cuda_available and device.startswith('gpu'):
-        raise Exception("You force to use a gpu device but cuda is not installed or their is no usable gpu device")
+        raise EnvironmentError("You forced use of device %s, but CUDA initialization failed "
+                               "with error:\n%s" % (device, cuda_initialization_error_message))
    if not cuda_available:
-        warning('CUDA is installed, but GPU device is not available')
+        warning('CUDA is installed, but device %s is not available' % device)
        return 
    if device == 'gpu':
@@ -150,8 +157,8 @@ def use(device, force=False):
            handle_shared_float32(True)
            use.device_number = device
            cuda_enabled = True
-        except RuntimeError, e:
+        except (EnvironmentError, ValueError), e:
-            _logger.error("ERROR: Not using GPU. Initialisation of device %i failed. %s" %(device, e))
+            _logger.error("ERROR: Not using GPU. Initialisation of device %i failed:\n%s" % (device, e))
            cuda_enabled = False
            if force:
                e.args+=("You asked to force this device and it failed. No fallback to the cpu or other gpu device.",)

--- a/theano/sandbox/cuda/cuda_ndarray.cu
+++ b/theano/sandbox/cuda/cuda_ndarray.cu
@@ -1724,80 +1724,67 @@ static PyTypeObject CudaNdarrayType =
 };
-//This fct return True it is able to find a cuda card and query its properti
+// Initialize the gpu.
-//Otherwise we return False
+// Takes one optional parameter, the device number.
+// If provided, it sets that device to be the active device.
+// If not provided (usually just to test whether the gpu is available at all),
+// it does not set an active device.
+// Raises EnvironmentError or ValueError (as appropriate) if the initialization failed.
 PyObject *
-device_available(PyObject* _unsed, PyObject * args)
+CudaNdarray_gpu_init(PyObject* _unused, PyObject* args)
 {
-  int deviceCount;
+  int card_nb = 0;
+  int card_number_provided = 1;
-  cudaError err = cudaGetDeviceCount(&deviceCount);
+  PyArg_ParseTuple(args, "|i", &card_nb); // if we're given something wildly invalid, this will throw a TypeError
-  if( cudaSuccess != err) {
-    Py_RETURN_FALSE;
-  }
-  if (deviceCount <= 0) {
-    Py_RETURN_FALSE;
-  }
-  cudaDeviceProp deviceProp;
+  if(PyTuple_Size(args) == 0) {
-  err=cudaGetDeviceProperties(&deviceProp, 0);
+    card_number_provided = 0;
-  if( cudaSuccess != err) {
+    card_nb = 0;
-    Py_RETURN_FALSE;
  }
-  if(deviceProp.major == 9999 && deviceProp.minor == 9999 ){
-    Py_RETURN_FALSE;
-  }
-  Py_RETURN_TRUE;
-}
-PyObject *
-CudaNdarray_gpu_init(PyObject* _unsed, PyObject * args)
-{
-  int card_nb=0;
-  if (! PyArg_ParseTuple(args, "|i", &card_nb))
-    return NULL; 
  int deviceCount;
  cudaError err = cudaGetDeviceCount(&deviceCount);
-  if( cudaSuccess != err) {
+  if(cudaSuccess != err) {
-    //TODO: put this as a warning and let theano continue on the cpu...
+    return PyErr_Format(PyExc_EnvironmentError,
-    PyErr_Format(PyExc_RuntimeError, "ERROR: Not able to get the number of gpu available.");
+                        "Unable to get the number of gpus available: %s",
-    return NULL;
+                        cudaGetErrorString(cudaGetLastError()));
  }
-  if (deviceCount <= 0) {
+  if(deviceCount <= 0) {
-    //TODO: put this as a warning and let theano continue on the cpu...
+    return PyErr_Format(PyExc_EnvironmentError,
-    PyErr_Format(PyExc_RuntimeError, "ERROR: Can't use the GPU, no devices supporting CUDA.\n");
+                        "Can't use the GPU, no devices support CUDA");
-    return NULL;
  }
-  if(card_nb<0 || card_nb>(deviceCount-1)){
+  if(card_number_provided && (card_nb < 0 || card_nb > (deviceCount - 1))) {
-    PyErr_Format(PyExc_RuntimeError, "ERROR: bad device number %d. Their is only %d device available\n",
+    return PyErr_Format(PyExc_ValueError,
-		 card_nb, deviceCount);
+                        "Bad device number %d. There is only %d device available.",
-    return NULL;
+                        card_nb,
+                        deviceCount);
  }
  cudaDeviceProp deviceProp;
-  err=cudaGetDeviceProperties(&deviceProp, card_nb);
+  err = cudaGetDeviceProperties(&deviceProp, card_nb);
-  if( cudaSuccess != err) {
+  if(cudaSuccess != err) {
-    PyErr_Format(PyExc_RuntimeError, "ERROR: Was not able to get the property of the gpu %i.",
+    return PyErr_Format(PyExc_EnvironmentError,
-		 card_nb);
+                        "Unable to get properties of gpu %i: %s",
-    exit(-1);
+                        card_nb,
+                        cudaGetErrorString(cudaGetLastError()));
  }
  if(deviceProp.major == 9999 && deviceProp.minor == 9999 ){
-    PyErr_Format(PyExc_RuntimeError, "WARNING: Their is no device that support CUDA.\n");
+    return PyErr_Format(PyExc_EnvironmentError,
-    return NULL;    
+                        "There is no device that supports CUDA");
  }
+  if(card_number_provided) {
    fprintf(stderr, "Using gpu device %d: %s\n", card_nb, deviceProp.name);
    err = cudaSetDevice(card_nb);
-  if( cudaSuccess != err) {
+    if(cudaSuccess != err) {
-    PyErr_Format(PyExc_RuntimeError, "ERROR: Was not able to set the device. %s\n", cudaGetErrorString(err));
+      return PyErr_Format(PyExc_EnvironmentError,
-    return NULL;
+                          "Unable to set device %i: %s",
+                          card_nb,
+                          cudaGetErrorString(cudaGetLastError()));
+    }
  }
  Py_INCREF(Py_None);
@@ -1958,8 +1945,7 @@ filter(PyObject* __unsed_self, PyObject *args) // args = (data, broadcastable, s
 static PyMethodDef module_methods[] = {
    {"dot", CudaNdarray_Dot, METH_VARARGS, "Returns the matrix product of two CudaNdarray arguments."},
-    {"device_available", device_available, METH_VARARGS, "Return Py_True if a cuda card is available."},
+    {"gpu_init", CudaNdarray_gpu_init, METH_VARARGS, "Select the gpu card to use; also usable to test whether CUDA is available."},
-    {"gpu_init", CudaNdarray_gpu_init, METH_VARARGS, "Allow to select the gpu card to use."},
    {"filter", filter, METH_VARARGS, "filter(obj, broadcastable, strict, storage) returns a CudaNdarray initialized to obj if it matches the constraints of broadcastable.  strict=True prevents any numeric casting. If storage is a CudaNdarray it may be overwritten and used as the return value."},    
    {"outstanding_mallocs", outstanding_mallocs, METH_VARARGS, "how many more mallocs have been called than free's"},
    {NULL, NULL, NULL, NULL}  /* Sentinel */