提交 08578830 authored 作者: James Bergstra's avatar James Bergstra

merge

...@@ -21,8 +21,8 @@ def debug(*msg): ...@@ -21,8 +21,8 @@ def debug(*msg):
# printed and this module will not be working properly (we set `cuda_available` # printed and this module will not be working properly (we set `cuda_available`
# to False). # to False).
# This variable is True by default, and set to False if something goes wrong # This variable is True by default, and set to False if nvcc is not available or
# when trying to initialize cuda. # their is no cuda card or something goes wrong when trying to initialize cuda.
cuda_available = True cuda_available = True
# Global variable to avoid displaying the same warning multiple times. # Global variable to avoid displaying the same warning multiple times.
...@@ -89,6 +89,9 @@ except Exception, e: ...@@ -89,6 +89,9 @@ except Exception, e:
error( "Failed to compile cuda_ndarray.cu: %s" % str(e)) error( "Failed to compile cuda_ndarray.cu: %s" % str(e))
set_cuda_disabled() set_cuda_disabled()
if cuda_available:
cuda_available=device_available()
if cuda_available: if cuda_available:
#check if their is an old cuda_ndarray that was loading instead of the one we compiled! #check if their is an old cuda_ndarray that was loading instead of the one we compiled!
import cuda_ndarray.cuda_ndarray import cuda_ndarray.cuda_ndarray
......
...@@ -1604,6 +1604,35 @@ static PyTypeObject CudaNdarrayType = ...@@ -1604,6 +1604,35 @@ static PyTypeObject CudaNdarrayType =
CudaNdarray_new, /* tp_new */ CudaNdarray_new, /* tp_new */
}; };
//This fct return True it is able to find a cuda card and query its properti
//Otherwise we return False
PyObject *
device_available(PyObject* _unsed, PyObject * args)
{
int deviceCount;
cudaError err = cudaGetDeviceCount(&deviceCount);
if( cudaSuccess != err) {
Py_RETURN_FALSE;
}
if (deviceCount <= 0) {
Py_RETURN_FALSE;
}
cudaDeviceProp deviceProp;
err=cudaGetDeviceProperties(&deviceProp, 0);
if( cudaSuccess != err) {
Py_RETURN_FALSE;
}
if(deviceProp.major == 9999 && deviceProp.minor == 9999 ){
Py_RETURN_FALSE;
}
Py_RETURN_TRUE;
}
PyObject * PyObject *
CudaNdarray_gpu_init(PyObject* _unsed, PyObject * args) CudaNdarray_gpu_init(PyObject* _unsed, PyObject * args)
{ {
...@@ -1810,6 +1839,7 @@ filter(PyObject* __unsed_self, PyObject *args) // args = (data, broadcastable, s ...@@ -1810,6 +1839,7 @@ filter(PyObject* __unsed_self, PyObject *args) // args = (data, broadcastable, s
static PyMethodDef module_methods[] = { static PyMethodDef module_methods[] = {
{"dot", CudaNdarray_Dot, METH_VARARGS, "Returns the matrix product of two CudaNdarray arguments."}, {"dot", CudaNdarray_Dot, METH_VARARGS, "Returns the matrix product of two CudaNdarray arguments."},
{"device_available", device_available, METH_VARARGS, "Return Py_True if a cuda card is available."},
{"gpu_init", CudaNdarray_gpu_init, METH_VARARGS, "Allow to select the gpu card to use."}, {"gpu_init", CudaNdarray_gpu_init, METH_VARARGS, "Allow to select the gpu card to use."},
{"filter", filter, METH_VARARGS, "filter(obj, broadcastable, strict, storage) returns a CudaNdarray initialized to obj if it matches the constraints of broadcastable. strict=True prevents any numeric casting. If storage is a CudaNdarray it may be overwritten and used as the return value."}, {"filter", filter, METH_VARARGS, "filter(obj, broadcastable, strict, storage) returns a CudaNdarray initialized to obj if it matches the constraints of broadcastable. strict=True prevents any numeric casting. If storage is a CudaNdarray it may be overwritten and used as the return value."},
{"outstanding_mallocs", outstanding_mallocs, METH_VARARGS, "how many more mallocs have been called than free's"}, {"outstanding_mallocs", outstanding_mallocs, METH_VARARGS, "how many more mallocs have been called than free's"},
......
...@@ -22,7 +22,6 @@ from theano.tests import unittest_tools as utt ...@@ -22,7 +22,6 @@ from theano.tests import unittest_tools as utt
#TODO: make tests work when no flags gived. Now need: THEANO_FLAGS=device=gpu0,floatX=float32 #TODO: make tests work when no flags gived. Now need: THEANO_FLAGS=device=gpu0,floatX=float32
# Partly done, in test_consistency_GPU_{serial,parallel} # Partly done, in test_consistency_GPU_{serial,parallel}
#TODO: bug fix test_normal0, in normal() fct, n_samples currently need to be numpy.prod(size) not self.n_streams(size)
mode = config.mode mode = config.mode
mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu') mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
...@@ -324,7 +323,7 @@ def test_uniform(): ...@@ -324,7 +323,7 @@ def test_uniform():
sample_size = (10,100) sample_size = (10,100)
steps = 50 steps = 50
else: else:
sample_size = (500,100) sample_size = (500,50)
steps = int(1e3) steps = int(1e3)
x = tensor.matrix() x = tensor.matrix()
...@@ -383,7 +382,7 @@ def test_binomial(): ...@@ -383,7 +382,7 @@ def test_binomial():
sample_size = (10,50) sample_size = (10,50)
steps = 70 steps = 70
else: else:
sample_size = (500,100) sample_size = (500,50)
steps = int(1e3) steps = int(1e3)
x = tensor.matrix() x = tensor.matrix()
...@@ -430,9 +429,9 @@ def test_normal0(): ...@@ -430,9 +429,9 @@ def test_normal0():
steps = 50 steps = 50
if mode in ['DEBUG_MODE','FAST_COMPILE']: if mode in ['DEBUG_MODE','FAST_COMPILE']:
sample_size = (99,100) sample_size = (99,30)
else: else:
sample_size = (999,100) sample_size = (999,50)
print '' print ''
print 'ON CPU:' print 'ON CPU:'
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论