Fix problems in sanbox/cuda.

376ca250 · Arnaud Bergeron · 405ccc5d · 376ca250 · 376ca250 · 376ca250
--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -236,8 +236,11 @@ optdb['canonicalize'].register('local_cut_gpu_host_gpu',

 # 'float64', 'complex128' and 'complex64' are not supported in elemwise
 # on the gpu.
-elemwise_cuda_dtype_supported = ['float32', 'uint8', 'int8', 'uint16', 'int16',
-                                 'uint32', 'int32', 'uint64', 'int64']
+elemwise_cuda_dtype_supported = ['float32', 'bool',
+                                 'uint8', 'int8',
+                                 'uint16', 'int16',
+                                 'uint32', 'int32',
+                                 'uint64', 'int64']


 def dtype_in_elemwise_supported(op):
@@ -297,8 +300,8 @@ def local_gpu_elemwise_0(node):
                        return False

                #   first establish that float32 can store all inputs
-                upcastable = set(['float32', 'int8', 'int16', 'uint8',
-                                  'uint16'])
+                upcastable = set(['float32', 'bool', 'int8', 'int16',
+                                  'uint8', 'uint16',])
                # case 1 - all inputs are already float32
                if all([i.type.dtype == 'float32' for i in node.inputs]):
                    # TODO: change this when fusion makes Elemwise with

--- a/theano/sandbox/cuda/rng_curand.py
+++ b/theano/sandbox/cuda/rng_curand.py
@@ -28,7 +28,7 @@ class CURAND_Base(GpuOp):
    CURAND.  This Op uses a generic-typed shared variable to point to a CObject
    that encapsulates this opaque reference.

-    Each random variable is created with a generator of False.
+    Each random variable is created with a generator of None.
    The actual random number generator is allocated from the seed, on the first
    call to allocate random numbers (see c_code).

@@ -210,7 +210,7 @@ class CURAND_Base(GpuOp):
                %(fail)s;
            }
            %(o_generator)s = PyCObject_FromVoidPtr(gen, &free_generator);
-            assert (%(i_generator)s == Py_False);
+            assert (%(i_generator)s == Py_None);
        }
        else if (%(destructive)s)
        {
@@ -244,7 +244,7 @@ class CURAND_Base(GpuOp):
        return code

    def c_code_cache_version(self):
-        return (4,)
+        return (5,)


 class CURAND_Normal(CURAND_Base):
@@ -328,7 +328,7 @@ class CURAND_RandomStreams(object):
        else:
            msg = "size must be a tuple of int or a Theano variable"
            assert isinstance(size, Variable) and size.ndim == 1, msg
-        generator = theano.shared(False)  # makes a generic
+        generator = theano.shared(None)  # makes a generic
        s_size = theano.tensor.as_tensor_variable(size)
        u = CURAND_Uniform.new_auto_update(generator, ndim, dtype, s_size,
                                           self.next_seed())
@@ -360,7 +360,7 @@ class CURAND_RandomStreams(object):
        else:
            msg = "size must be a tuple of int or a Theano variable"
            assert isinstance(size, Variable) and size.ndim == 1, msg
-        generator = theano.shared(False)  # makes a generic
+        generator = theano.shared(None)  # makes a generic
        s_size = theano.tensor.as_tensor_variable(size)
        u = CURAND_Normal.new_auto_update(generator, ndim, dtype, s_size,
                                          self.next_seed())

--- a/theano/sandbox/cuda/tests/test_driver.py
+++ b/theano/sandbox/cuda/tests/test_driver.py
@@ -82,7 +82,7 @@ def test_nvcc_cast():
    https://groups.google.com/d/topic/theano-dev/LzHtP2OWeRE/discussion
    """
    var = theano.tensor.fvector()
-    f = theano.function([var], -1. * (var > 0), mode=mode_with_gpu)
+    f = theano.function([var], -1. * (var > 0).astype('int8'), mode=mode_with_gpu)
    if not numpy.allclose(f([-1, 0, 1]), [0, 0, -1]):
        raise Exception(
            "The version of nvcc that Theano detected on your system "