Merge pull request #1990 from nouiz/gpu_red

disable complex support in gpu reduce.

Merge pull request #1990 from nouiz/gpu_red
dc0ad48c · abergeron · c1458cc1 · c01b28dd · dc0ad48c · dc0ad48c
--- a/theano/sandbox/gpuarray/elemwise.py
+++ b/theano/sandbox/gpuarray/elemwise.py
@@ -647,6 +647,10 @@ class GpuCAReduceCuda(HideC, CAReduceDtype):

        if (x.type.ndim != len(self.reduce_mask)):
            raise TypeError("x must have rank %i" % len(self.reduce_mask))
+        if ("complex" in x.dtype or
+            "complex" in ret.outputs[0].dtype or
+            "complex" in self._acc_dtype(x.dtype)):
+            raise NotImplementedError("We don't support complex in gpu reduction")
        return Apply(self, [x], [GpuArrayType(ret.outputs[0].dtype,
                                              ret.outputs[0].type.broadcastable)()])

@@ -717,8 +721,12 @@ class GpuCAReduceCuda(HideC, CAReduceDtype):

        nd_in = node.inputs[0].type.ndim
        nd_out = node.outputs[0].type.ndim
-        in_dtype = "npy_" + node.inputs[0].dtype
-        out_dtype = "npy_" + node.outputs[0].dtype
+        # For complex, we need to use theano_complex* in the c code to
+        # have it run. But libgpuarray don't understand it.
+        in_dtype = node.inputs[0].type.dtype_specs()[1]
+        out_dtype = node.outputs[0].type.dtype_specs()[1]
+        gin_dtype = "npy_" + node.inputs[0].dtype
+        gout_dtype = "npy_" + node.outputs[0].dtype
        assert nd_in - nd_out == sum(self.reduce_mask)

        sio = StringIO()
@@ -782,7 +790,7 @@ class GpuCAReduceCuda(HideC, CAReduceDtype):
            if not self.reduce_mask[i]:
                print >> sio, 'new_dims[%(j)s] = PyGpuArray_DIMS(%(x)s)[%(i)s];' % locals()
                j += 1
-        out_typecode = dtype_to_typecode(out_dtype[4:])
+        out_typecode = dtype_to_typecode(gout_dtype[4:])
        print >> sio, """
            Py_XDECREF(%(z)s);
            %(z)s = pygpu_empty(%(nd_out)s, new_dims,
@@ -1001,7 +1009,9 @@ class GpuCAReduceCuda(HideC, CAReduceDtype):
        return sio.getvalue()

    def _k_init(self, node, nodename):
-        acc_dtype = "npy_" + self._acc_dtype(node.inputs[0].dtype)
+        acc_dtype = self._acc_dtype(node.inputs[0].dtype)
+        # We need to use theano_complex* and not npy_complex*
+        acc_dtype = theano.scalar.basic.Scalar(acc_dtype).dtype_specs()[1]

        return """
                const int threadCount = blockDim.x * blockDim.y * blockDim.z;

--- a/theano/sandbox/gpuarray/tests/test_elemwise.py
+++ b/theano/sandbox/gpuarray/tests/test_elemwise.py
@@ -167,6 +167,10 @@ class T_gpureduce_dtype(T_reduce_dtype):
    op = GpuCAReduceCuda
    #Currently we don't support reduction on 0 axis
    axes = [None, 0, 1, 1, [0], [1], [0, 1]]
+    #We don't support complex dtype
+    dtypes = ['int8', 'int16', 'int32', 'int64',
+              'uint8', 'uint16', 'uint32', 'uint64',
+              'float32', 'float64']


 def speed_reduce10():

--- a/theano/sandbox/gpuarray/type.py
+++ b/theano/sandbox/gpuarray/type.py
@@ -146,6 +146,33 @@ class GpuArrayType(Type):
    def __str__(self):
        return "GpuArray<%s>" % (self.dtype,)

+    def dtype_specs(self):
+        """Return a tuple (python type, c type, numpy typenum) that corresponds
+        to self.dtype.
+
+        This function is used internally as part of C code generation.
+        """
+        # TODO: add more type correspondances for e.g. int32, int64, float32,
+        # complex64, etc.
+        try:
+            return {
+                'float32': (float, 'npy_float32', 'NPY_FLOAT32'),
+                'float64': (float, 'npy_float64', 'NPY_FLOAT64'),
+                'uint8': (int, 'npy_uint8', 'NPY_UINT8'),
+                'int8': (int, 'npy_int8', 'NPY_INT8'),
+                'uint16': (int, 'npy_uint16', 'NPY_UINT16'),
+                'int16': (int, 'npy_int16', 'NPY_INT16'),
+                'uint32': (int, 'npy_uint32', 'NPY_UINT32'),
+                'int32': (int, 'npy_int32', 'NPY_INT32'),
+                'uint64': (int, 'npy_uint64', 'NPY_UINT64'),
+                'int64': (int, 'npy_int64', 'NPY_INT64'),
+                'complex128': (complex, 'theano_complex128', 'NPY_COMPLEX128'),
+                'complex64': (complex, 'theano_complex64', 'NPY_COMPLEX64')
+                }[self.dtype]
+        except KeyError:
+            raise TypeError("Unsupported dtype for %s: %s" %
+                            (self.__class__.__name__, self.dtype))
+
    def get_shape_info(self, obj):
        return obj.shape


--- a/theano/tensor/tests/test_elemwise.py
+++ b/theano/tensor/tests/test_elemwise.py
@@ -738,6 +738,7 @@ class T_reduce_dtype(unittest.TestCase):
    op = CAReduce
    axes = [None, 0, 1, [], [0], [1], [0, 1]]
    methods = ['sum', 'prod']
+    dtypes = imap(str, theano.scalar.all_types)

    def test_reduce_default_dtype(self):
        """
@@ -745,7 +746,7 @@ class T_reduce_dtype(unittest.TestCase):
        """
        # We try multiple axis combinations even though axis should not matter.
        for method in self.methods:
-            for idx, dtype in enumerate(imap(str, theano.scalar.all_types)):
+            for idx, dtype in enumerate(self.dtypes):
                axis = self.axes[idx % len(self.axes)]
                x = tensor.matrix(dtype=dtype)
                s = getattr(x, method)(axis=axis)
@@ -768,7 +769,7 @@ class T_reduce_dtype(unittest.TestCase):
        ##Test the default acc_dtype of a reduce().
        # We try multiple axis combinations even though axis should not matter.
        for method in self.methods:
-            for idx, dtype in enumerate(imap(str, theano.scalar.all_types)):
+            for idx, dtype in enumerate(self.dtypes):
                axis = self.axes[idx % len(self.axes)]
                x = tensor.matrix(dtype=dtype)
                s = getattr(x, method)(axis=axis)
@@ -797,9 +798,9 @@ class T_reduce_dtype(unittest.TestCase):
        # We try multiple axis combinations even though axis should not matter.
        idx = 0
        for method in self.methods:
-            for input_dtype in imap(str, theano.scalar.all_types):
+            for input_dtype in self.dtypes:
                x = tensor.matrix(dtype=input_dtype)
-                for output_dtype in imap(str, theano.scalar.all_types):
+                for output_dtype in self.dtypes:
                # If the output is a complex, the gradient of the reduce will
                # cast the complex to the input dtype. We can't call the normal
                # cast on a complex to a not complex as this is ambiguous.
@@ -831,9 +832,9 @@ class T_reduce_dtype(unittest.TestCase):
        # We try multiple axis combinations even though axis should not matter.
        idx = 0
        for method in self.methods:
-            for input_dtype in imap(str, theano.scalar.all_types):
+            for input_dtype in self.dtypes:
                x = tensor.matrix(dtype=input_dtype)
-                for acc_dtype in imap(str, theano.scalar.all_types):
+                for acc_dtype in self.dtypes:
                # If the accumulator is a complex, the gradient of the reduce will
                # cast the complex to the input dtype. We can't call the normal
                # cast on a complex to a not complex as this is ambiguous.