merge

0f1d6881 · James Bergstra · aaf09989 · 31b44f44 · 0f1d6881 · 0f1d6881
--- a/__init__.py
+++ b/__init__.py
@@ -7,14 +7,28 @@ from .var import (CudaNdarrayVariable,
 import basic_ops
 import opt
+import cuda_ndarray
 import theano.compile.sandbox
+import logging, os
+def use():
+    handle_shared_float32(True)
 def handle_shared_float32(tf):
    """Set the CudaNdarrayType as the default handler for shared float32 arrays
+    Use use(tf) instead as this is a bad name.
    """
    if tf:
-        theano.compile.sandbox.shared_constructor(shared_constructor)
+        try:
+            v=os.getenv("THEANO_GPU",0)
+            cuda_ndarray.gpu_init(int(v))
+            theano.compile.sandbox.shared_constructor(shared_constructor)
+        except RuntimeError, e:
+            logging.getLogger('theano_cuda_ndarray').warning("WARNING: Won't use the GPU as the initialisation failed."+str(e))
    else:
        raise NotImplementedError('removing our handler')
--- a/elemwise.py
+++ b/elemwise.py
@@ -880,6 +880,8 @@ class NaiveAlgo(object):
        print >> sio, """
        int dims[%(nd)s] = {%(initial_dims)s};
        """ %locals()
+        #check that all inputs have valid dimensions
        for iname in inputs:
            print >> sio, """
        //std::cerr << "C_CODE %(opname)s checking input %(iname)s\\n";
@@ -904,6 +906,7 @@ class NaiveAlgo(object):
        }
            """ %locals()
+        #check that all outputs have valid dimensions
        for oname in outputs:
            print >> sio, """
        for (int i = 0; (i< %(nd)s) && (cnda_%(oname)s); ++i) {

--- a/nvcc_compiler.py
+++ b/nvcc_compiler.py
@@ -34,10 +34,12 @@ def nvcc_module_compile_str(module_name, src_code, location=None, include_dirs=[
    preargs= [] if preargs is None else list(preargs)
    preargs.append('-fPIC')
    no_opt = False
+    cuda_root = os.getenv('CUDA_ROOT')
    include_dirs = std_include_dirs() + include_dirs
    libs = std_libs() + ['cudart'] + libs
-    lib_dirs = std_lib_dirs() + [os.path.join(os.getenv('CUDA_ROOT'), 'lib')] + lib_dirs
+    lib_dirs = std_lib_dirs() + lib_dirs
+    if cuda_root:
+        lib_dirs.append(os.path.join(cuda_root, 'lib'))
    cppfilename = os.path.join(location, 'mod.cu')
    cppfile = file(cppfilename, 'w')
@@ -83,7 +85,7 @@ def nvcc_module_compile_str(module_name, src_code, location=None, include_dirs=[
        print '==============================='
        for i, l in enumerate(src_code.split('\n')):
            print i+1, l
-        raise Exception('nvcc return status', p.returncode)
+        raise Exception('nvcc return status', p.returncode, 'for file',cppfilename)
    #touch the __init__ file
    file(os.path.join(location, "__init__.py"),'w').close()      

--- a/opt.py
+++ b/opt.py
@@ -175,7 +175,7 @@ def local_gpu_flatten(node):
        host_input = node.inputs[0]
        if host_input.owner and isinstance(host_input.owner.op, tensor.Flatten):
            outdim = host_input.owner.op.outdim
-            return [GpuFlatten(outdim)(gpu_from_host(host_input.inputs[0]))]
+            return [GpuFlatten(outdim)(gpu_from_host(host_input.owner.inputs[0]))]
    if isinstance(node.op, tensor.Flatten):
        x, = node.inputs
        outdim = node.op.outdim

--- a/type.py
+++ b/type.py
@@ -52,8 +52,14 @@ class CudaNdarrayType(Type):
    def filter(self, data, strict=False):
        return type_support_filter(data, self.broadcastable, strict)
+    @staticmethod
+    def values_eq(a, b):
+        #TODO: make the comparaison without transfert.
+        return tensor.TensorType.values_eq(numpy.asarray(a), numpy.asarray(b))
    @staticmethod
    def values_eq_approx(a, b):
+        #TODO: make the comparaison without transfert.
        return tensor.TensorType.values_eq_approx(numpy.asarray(a), numpy.asarray(b))
    def dtype_specs(self):
@@ -229,12 +235,18 @@ class CudaNdarrayType(Type):
    def c_header_dirs(self):
        """Override `CLinkerOp.c_headers` """
-        return [os.path.dirname(cuda_ndarray.__file__),
+        ret = [os.path.dirname(cuda_ndarray.__file__)]
-                os.path.join(os.getenv("CUDA_ROOT"),'include')]
+        cuda_root = os.getenv("CUDA_ROOT")
+        if cuda_root:
+            ret.append(os.path.join(cuda_root,'include'))
+        return ret
    def c_lib_dirs(self):
-        return [os.path.dirname(cuda_ndarray.__file__),
+        ret = [os.path.dirname(cuda_ndarray.__file__)]
-                os.path.join(os.getenv("CUDA_ROOT"),'lib')]
+        cuda_root = os.getenv("CUDA_ROOT")
+        if cuda_root:
+            ret.append(os.path.join(cuda_root,'lib'))
+        return ret
    def c_libraries(self):
        return ['cuda_ndarray', 'cudart']