提交 0f07b4a5 authored 作者: Frédéric Bastien's avatar Frédéric Bastien

Merge pull request #1966 from abergeron/cublas_v2

Switch to cublas v2
...@@ -40,7 +40,7 @@ ...@@ -40,7 +40,7 @@
#endif #endif
#include <cublas.h> #include <cublas_v2.h>
#ifdef _WIN32 #ifdef _WIN32
#ifdef _CUDA_NDARRAY_C #ifdef _CUDA_NDARRAY_C
...@@ -81,6 +81,9 @@ typedef float real; ...@@ -81,6 +81,9 @@ typedef float real;
#define VERBOSE_DEVICE_MALLOC 1 #define VERBOSE_DEVICE_MALLOC 1
#define NO_VERBOSE_DEVICE_MALLOC 0 #define NO_VERBOSE_DEVICE_MALLOC 0
/* Use this handle to make cublas calls */
extern cublasHandle_t handle;
/** /**
* Allocation and freeing of device memory should go through these functions so that the lib can track memory usage. * Allocation and freeing of device memory should go through these functions so that the lib can track memory usage.
* *
...@@ -365,8 +368,8 @@ static int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd, ...@@ -365,8 +368,8 @@ static int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd,
//Detect overflow on unsigned integer //Detect overflow on unsigned integer
if (dim[i] != 0 && size > (SIZE_MAX / dim[i])) { if (dim[i] != 0 && size > (SIZE_MAX / dim[i])) {
PyErr_Format(PyExc_AssertionError, PyErr_Format(PyExc_AssertionError,
"Can't store in size_t for the bytes requested", "Can't store in size_t for the bytes requested %llu",
size); (unsigned long long)size);
return -1; return -1;
} }
size = size * dim[i]; size = size * dim[i];
...@@ -382,8 +385,8 @@ static int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd, ...@@ -382,8 +385,8 @@ static int CudaNdarray_alloc_contiguous(CudaNdarray *self, const int nd,
//Detect overflow on unsigned integer //Detect overflow on unsigned integer
if (dim[i] != 0 && size > (SIZE_MAX / dim[i])) { if (dim[i] != 0 && size > (SIZE_MAX / dim[i])) {
PyErr_Format(PyExc_AssertionError, PyErr_Format(PyExc_AssertionError,
"Can't store in size_t for the bytes requested", "Can't store in size_t for the bytes requested %llu",
size); (unsigned long long)size);
return -1; return -1;
} }
size = size * dim[i]; size = size * dim[i];
...@@ -583,23 +586,33 @@ DllExport int CudaNdarray_inplace_elemwise(PyObject* py_self, PyObject * py_othe ...@@ -583,23 +586,33 @@ DllExport int CudaNdarray_inplace_elemwise(PyObject* py_self, PyObject * py_othe
DllExport int CudaNdarray_prep_output(CudaNdarray ** arr, int nd, DllExport int CudaNdarray_prep_output(CudaNdarray ** arr, int nd,
const int * dims, int fortran = 0); const int * dims, int fortran = 0);
DllExport inline const char* ALWAYS_INLINE cublasGetErrorString(cublasStatus err){ DllExport inline const char* ALWAYS_INLINE cublasGetErrorString(cublasStatus_t err){
if(CUBLAS_STATUS_SUCCESS == err) switch(err) {
case CUBLAS_STATUS_SUCCESS:
return "success"; return "success";
else if(CUBLAS_STATUS_NOT_INITIALIZED == err) case CUBLAS_STATUS_NOT_INITIALIZED:
return "the library was not initialized"; return "the library was not initialized";
else if(CUBLAS_STATUS_ALLOC_FAILED == err) case CUBLAS_STATUS_ALLOC_FAILED:
return "the resource allocation failed"; return "the resource allocation failed";
else if(CUBLAS_STATUS_INVALID_VALUE == err) case CUBLAS_STATUS_INVALID_VALUE:
return "the parameters n<0 or incx,incy=0"; return "the parameters n<0 or incx,incy=0";
else if(CUBLAS_STATUS_MAPPING_ERROR == err) #ifdef CUBLAS_STATUS_ARCH_MISMATCH
case CUBLAS_STATUS_ARCH_MISMATCH:
return "required device feature not present";
#endif
case CUBLAS_STATUS_MAPPING_ERROR:
return "an access to GPU memory space failed"; return "an access to GPU memory space failed";
else if(CUBLAS_STATUS_EXECUTION_FAILED == err) case CUBLAS_STATUS_EXECUTION_FAILED:
return "the function failed to launch on the GPU"; return "the function failed to launch on the GPU";
else if(CUBLAS_STATUS_INTERNAL_ERROR == err) case CUBLAS_STATUS_INTERNAL_ERROR:
return "an internal operation failed"; return "an internal operation failed";
else #ifdef CUBLAS_STATUS_NOT_SUPPORTED
case CUBLAS_STATUS_NOT_SUPPORTED:
return "unsupported function";
#endif
default:
return "unknow code"; return "unknow code";
}
} }
#endif #endif
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论