提交 376ca250 authored 作者: Arnaud Bergeron's avatar Arnaud Bergeron

Fix problems in sanbox/cuda.

上级 405ccc5d
...@@ -236,8 +236,11 @@ optdb['canonicalize'].register('local_cut_gpu_host_gpu', ...@@ -236,8 +236,11 @@ optdb['canonicalize'].register('local_cut_gpu_host_gpu',
# 'float64', 'complex128' and 'complex64' are not supported in elemwise # 'float64', 'complex128' and 'complex64' are not supported in elemwise
# on the gpu. # on the gpu.
elemwise_cuda_dtype_supported = ['float32', 'uint8', 'int8', 'uint16', 'int16', elemwise_cuda_dtype_supported = ['float32', 'bool',
'uint32', 'int32', 'uint64', 'int64'] 'uint8', 'int8',
'uint16', 'int16',
'uint32', 'int32',
'uint64', 'int64']
def dtype_in_elemwise_supported(op): def dtype_in_elemwise_supported(op):
...@@ -297,8 +300,8 @@ def local_gpu_elemwise_0(node): ...@@ -297,8 +300,8 @@ def local_gpu_elemwise_0(node):
return False return False
# first establish that float32 can store all inputs # first establish that float32 can store all inputs
upcastable = set(['float32', 'int8', 'int16', 'uint8', upcastable = set(['float32', 'bool', 'int8', 'int16',
'uint16']) 'uint8', 'uint16',])
# case 1 - all inputs are already float32 # case 1 - all inputs are already float32
if all([i.type.dtype == 'float32' for i in node.inputs]): if all([i.type.dtype == 'float32' for i in node.inputs]):
# TODO: change this when fusion makes Elemwise with # TODO: change this when fusion makes Elemwise with
......
...@@ -28,7 +28,7 @@ class CURAND_Base(GpuOp): ...@@ -28,7 +28,7 @@ class CURAND_Base(GpuOp):
CURAND. This Op uses a generic-typed shared variable to point to a CObject CURAND. This Op uses a generic-typed shared variable to point to a CObject
that encapsulates this opaque reference. that encapsulates this opaque reference.
Each random variable is created with a generator of False. Each random variable is created with a generator of None.
The actual random number generator is allocated from the seed, on the first The actual random number generator is allocated from the seed, on the first
call to allocate random numbers (see c_code). call to allocate random numbers (see c_code).
...@@ -210,7 +210,7 @@ class CURAND_Base(GpuOp): ...@@ -210,7 +210,7 @@ class CURAND_Base(GpuOp):
%(fail)s; %(fail)s;
} }
%(o_generator)s = PyCObject_FromVoidPtr(gen, &free_generator); %(o_generator)s = PyCObject_FromVoidPtr(gen, &free_generator);
assert (%(i_generator)s == Py_False); assert (%(i_generator)s == Py_None);
} }
else if (%(destructive)s) else if (%(destructive)s)
{ {
...@@ -244,7 +244,7 @@ class CURAND_Base(GpuOp): ...@@ -244,7 +244,7 @@ class CURAND_Base(GpuOp):
return code return code
def c_code_cache_version(self): def c_code_cache_version(self):
return (4,) return (5,)
class CURAND_Normal(CURAND_Base): class CURAND_Normal(CURAND_Base):
...@@ -328,7 +328,7 @@ class CURAND_RandomStreams(object): ...@@ -328,7 +328,7 @@ class CURAND_RandomStreams(object):
else: else:
msg = "size must be a tuple of int or a Theano variable" msg = "size must be a tuple of int or a Theano variable"
assert isinstance(size, Variable) and size.ndim == 1, msg assert isinstance(size, Variable) and size.ndim == 1, msg
generator = theano.shared(False) # makes a generic generator = theano.shared(None) # makes a generic
s_size = theano.tensor.as_tensor_variable(size) s_size = theano.tensor.as_tensor_variable(size)
u = CURAND_Uniform.new_auto_update(generator, ndim, dtype, s_size, u = CURAND_Uniform.new_auto_update(generator, ndim, dtype, s_size,
self.next_seed()) self.next_seed())
...@@ -360,7 +360,7 @@ class CURAND_RandomStreams(object): ...@@ -360,7 +360,7 @@ class CURAND_RandomStreams(object):
else: else:
msg = "size must be a tuple of int or a Theano variable" msg = "size must be a tuple of int or a Theano variable"
assert isinstance(size, Variable) and size.ndim == 1, msg assert isinstance(size, Variable) and size.ndim == 1, msg
generator = theano.shared(False) # makes a generic generator = theano.shared(None) # makes a generic
s_size = theano.tensor.as_tensor_variable(size) s_size = theano.tensor.as_tensor_variable(size)
u = CURAND_Normal.new_auto_update(generator, ndim, dtype, s_size, u = CURAND_Normal.new_auto_update(generator, ndim, dtype, s_size,
self.next_seed()) self.next_seed())
......
...@@ -82,7 +82,7 @@ def test_nvcc_cast(): ...@@ -82,7 +82,7 @@ def test_nvcc_cast():
https://groups.google.com/d/topic/theano-dev/LzHtP2OWeRE/discussion https://groups.google.com/d/topic/theano-dev/LzHtP2OWeRE/discussion
""" """
var = theano.tensor.fvector() var = theano.tensor.fvector()
f = theano.function([var], -1. * (var > 0), mode=mode_with_gpu) f = theano.function([var], -1. * (var > 0).astype('int8'), mode=mode_with_gpu)
if not numpy.allclose(f([-1, 0, 1]), [0, 0, -1]): if not numpy.allclose(f([-1, 0, 1]), [0, 0, -1]):
raise Exception( raise Exception(
"The version of nvcc that Theano detected on your system " "The version of nvcc that Theano detected on your system "
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论