提交 dc0ad48c authored 作者: abergeron's avatar abergeron

Merge pull request #1990 from nouiz/gpu_red

disable complex support in gpu reduce.
...@@ -647,6 +647,10 @@ class GpuCAReduceCuda(HideC, CAReduceDtype): ...@@ -647,6 +647,10 @@ class GpuCAReduceCuda(HideC, CAReduceDtype):
if (x.type.ndim != len(self.reduce_mask)): if (x.type.ndim != len(self.reduce_mask)):
raise TypeError("x must have rank %i" % len(self.reduce_mask)) raise TypeError("x must have rank %i" % len(self.reduce_mask))
if ("complex" in x.dtype or
"complex" in ret.outputs[0].dtype or
"complex" in self._acc_dtype(x.dtype)):
raise NotImplementedError("We don't support complex in gpu reduction")
return Apply(self, [x], [GpuArrayType(ret.outputs[0].dtype, return Apply(self, [x], [GpuArrayType(ret.outputs[0].dtype,
ret.outputs[0].type.broadcastable)()]) ret.outputs[0].type.broadcastable)()])
...@@ -717,8 +721,12 @@ class GpuCAReduceCuda(HideC, CAReduceDtype): ...@@ -717,8 +721,12 @@ class GpuCAReduceCuda(HideC, CAReduceDtype):
nd_in = node.inputs[0].type.ndim nd_in = node.inputs[0].type.ndim
nd_out = node.outputs[0].type.ndim nd_out = node.outputs[0].type.ndim
in_dtype = "npy_" + node.inputs[0].dtype # For complex, we need to use theano_complex* in the c code to
out_dtype = "npy_" + node.outputs[0].dtype # have it run. But libgpuarray don't understand it.
in_dtype = node.inputs[0].type.dtype_specs()[1]
out_dtype = node.outputs[0].type.dtype_specs()[1]
gin_dtype = "npy_" + node.inputs[0].dtype
gout_dtype = "npy_" + node.outputs[0].dtype
assert nd_in - nd_out == sum(self.reduce_mask) assert nd_in - nd_out == sum(self.reduce_mask)
sio = StringIO() sio = StringIO()
...@@ -782,7 +790,7 @@ class GpuCAReduceCuda(HideC, CAReduceDtype): ...@@ -782,7 +790,7 @@ class GpuCAReduceCuda(HideC, CAReduceDtype):
if not self.reduce_mask[i]: if not self.reduce_mask[i]:
print >> sio, 'new_dims[%(j)s] = PyGpuArray_DIMS(%(x)s)[%(i)s];' % locals() print >> sio, 'new_dims[%(j)s] = PyGpuArray_DIMS(%(x)s)[%(i)s];' % locals()
j += 1 j += 1
out_typecode = dtype_to_typecode(out_dtype[4:]) out_typecode = dtype_to_typecode(gout_dtype[4:])
print >> sio, """ print >> sio, """
Py_XDECREF(%(z)s); Py_XDECREF(%(z)s);
%(z)s = pygpu_empty(%(nd_out)s, new_dims, %(z)s = pygpu_empty(%(nd_out)s, new_dims,
...@@ -1001,7 +1009,9 @@ class GpuCAReduceCuda(HideC, CAReduceDtype): ...@@ -1001,7 +1009,9 @@ class GpuCAReduceCuda(HideC, CAReduceDtype):
return sio.getvalue() return sio.getvalue()
def _k_init(self, node, nodename): def _k_init(self, node, nodename):
acc_dtype = "npy_" + self._acc_dtype(node.inputs[0].dtype) acc_dtype = self._acc_dtype(node.inputs[0].dtype)
# We need to use theano_complex* and not npy_complex*
acc_dtype = theano.scalar.basic.Scalar(acc_dtype).dtype_specs()[1]
return """ return """
const int threadCount = blockDim.x * blockDim.y * blockDim.z; const int threadCount = blockDim.x * blockDim.y * blockDim.z;
......
...@@ -167,6 +167,10 @@ class T_gpureduce_dtype(T_reduce_dtype): ...@@ -167,6 +167,10 @@ class T_gpureduce_dtype(T_reduce_dtype):
op = GpuCAReduceCuda op = GpuCAReduceCuda
#Currently we don't support reduction on 0 axis #Currently we don't support reduction on 0 axis
axes = [None, 0, 1, 1, [0], [1], [0, 1]] axes = [None, 0, 1, 1, [0], [1], [0, 1]]
#We don't support complex dtype
dtypes = ['int8', 'int16', 'int32', 'int64',
'uint8', 'uint16', 'uint32', 'uint64',
'float32', 'float64']
def speed_reduce10(): def speed_reduce10():
......
...@@ -146,6 +146,33 @@ class GpuArrayType(Type): ...@@ -146,6 +146,33 @@ class GpuArrayType(Type):
def __str__(self): def __str__(self):
return "GpuArray<%s>" % (self.dtype,) return "GpuArray<%s>" % (self.dtype,)
def dtype_specs(self):
"""Return a tuple (python type, c type, numpy typenum) that corresponds
to self.dtype.
This function is used internally as part of C code generation.
"""
# TODO: add more type correspondances for e.g. int32, int64, float32,
# complex64, etc.
try:
return {
'float32': (float, 'npy_float32', 'NPY_FLOAT32'),
'float64': (float, 'npy_float64', 'NPY_FLOAT64'),
'uint8': (int, 'npy_uint8', 'NPY_UINT8'),
'int8': (int, 'npy_int8', 'NPY_INT8'),
'uint16': (int, 'npy_uint16', 'NPY_UINT16'),
'int16': (int, 'npy_int16', 'NPY_INT16'),
'uint32': (int, 'npy_uint32', 'NPY_UINT32'),
'int32': (int, 'npy_int32', 'NPY_INT32'),
'uint64': (int, 'npy_uint64', 'NPY_UINT64'),
'int64': (int, 'npy_int64', 'NPY_INT64'),
'complex128': (complex, 'theano_complex128', 'NPY_COMPLEX128'),
'complex64': (complex, 'theano_complex64', 'NPY_COMPLEX64')
}[self.dtype]
except KeyError:
raise TypeError("Unsupported dtype for %s: %s" %
(self.__class__.__name__, self.dtype))
def get_shape_info(self, obj): def get_shape_info(self, obj):
return obj.shape return obj.shape
......
...@@ -738,6 +738,7 @@ class T_reduce_dtype(unittest.TestCase): ...@@ -738,6 +738,7 @@ class T_reduce_dtype(unittest.TestCase):
op = CAReduce op = CAReduce
axes = [None, 0, 1, [], [0], [1], [0, 1]] axes = [None, 0, 1, [], [0], [1], [0, 1]]
methods = ['sum', 'prod'] methods = ['sum', 'prod']
dtypes = imap(str, theano.scalar.all_types)
def test_reduce_default_dtype(self): def test_reduce_default_dtype(self):
""" """
...@@ -745,7 +746,7 @@ class T_reduce_dtype(unittest.TestCase): ...@@ -745,7 +746,7 @@ class T_reduce_dtype(unittest.TestCase):
""" """
# We try multiple axis combinations even though axis should not matter. # We try multiple axis combinations even though axis should not matter.
for method in self.methods: for method in self.methods:
for idx, dtype in enumerate(imap(str, theano.scalar.all_types)): for idx, dtype in enumerate(self.dtypes):
axis = self.axes[idx % len(self.axes)] axis = self.axes[idx % len(self.axes)]
x = tensor.matrix(dtype=dtype) x = tensor.matrix(dtype=dtype)
s = getattr(x, method)(axis=axis) s = getattr(x, method)(axis=axis)
...@@ -768,7 +769,7 @@ class T_reduce_dtype(unittest.TestCase): ...@@ -768,7 +769,7 @@ class T_reduce_dtype(unittest.TestCase):
##Test the default acc_dtype of a reduce(). ##Test the default acc_dtype of a reduce().
# We try multiple axis combinations even though axis should not matter. # We try multiple axis combinations even though axis should not matter.
for method in self.methods: for method in self.methods:
for idx, dtype in enumerate(imap(str, theano.scalar.all_types)): for idx, dtype in enumerate(self.dtypes):
axis = self.axes[idx % len(self.axes)] axis = self.axes[idx % len(self.axes)]
x = tensor.matrix(dtype=dtype) x = tensor.matrix(dtype=dtype)
s = getattr(x, method)(axis=axis) s = getattr(x, method)(axis=axis)
...@@ -797,9 +798,9 @@ class T_reduce_dtype(unittest.TestCase): ...@@ -797,9 +798,9 @@ class T_reduce_dtype(unittest.TestCase):
# We try multiple axis combinations even though axis should not matter. # We try multiple axis combinations even though axis should not matter.
idx = 0 idx = 0
for method in self.methods: for method in self.methods:
for input_dtype in imap(str, theano.scalar.all_types): for input_dtype in self.dtypes:
x = tensor.matrix(dtype=input_dtype) x = tensor.matrix(dtype=input_dtype)
for output_dtype in imap(str, theano.scalar.all_types): for output_dtype in self.dtypes:
# If the output is a complex, the gradient of the reduce will # If the output is a complex, the gradient of the reduce will
# cast the complex to the input dtype. We can't call the normal # cast the complex to the input dtype. We can't call the normal
# cast on a complex to a not complex as this is ambiguous. # cast on a complex to a not complex as this is ambiguous.
...@@ -831,9 +832,9 @@ class T_reduce_dtype(unittest.TestCase): ...@@ -831,9 +832,9 @@ class T_reduce_dtype(unittest.TestCase):
# We try multiple axis combinations even though axis should not matter. # We try multiple axis combinations even though axis should not matter.
idx = 0 idx = 0
for method in self.methods: for method in self.methods:
for input_dtype in imap(str, theano.scalar.all_types): for input_dtype in self.dtypes:
x = tensor.matrix(dtype=input_dtype) x = tensor.matrix(dtype=input_dtype)
for acc_dtype in imap(str, theano.scalar.all_types): for acc_dtype in self.dtypes:
# If the accumulator is a complex, the gradient of the reduce will # If the accumulator is a complex, the gradient of the reduce will
# cast the complex to the input dtype. We can't call the normal # cast the complex to the input dtype. We can't call the normal
# cast on a complex to a not complex as this is ambiguous. # cast on a complex to a not complex as this is ambiguous.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论