Merge pull request #4932 from nouiz/no_complex

Don't move complex to the GPU in the new back-end.

Merge pull request #4932 from nouiz/no_complex
c1dfa69c · abergeron · GitHub · 67e5e2eb · 323230b1 · c1dfa69c
--- a/theano/gpuarray/basic_ops.py
+++ b/theano/gpuarray/basic_ops.py
@@ -626,6 +626,8 @@ class GpuFromHost(Op):
    def make_node(self, x):
        if not isinstance(x.type, tensor.TensorType):
            raise TypeError(x)
+        if "complex" in x.dtype:
+            raise TypeError("complex not supported in the new gpuarray back-end.", x)
        return Apply(self, [x], [GpuArrayType(broadcastable=x.broadcastable,
                                              context_name=self.context_name,
                                              dtype=x.dtype)()])

--- a/theano/gpuarray/dnn.py
+++ b/theano/gpuarray/dnn.py
@@ -1270,7 +1270,7 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
        Subsampling window size.
    stride : tuple
        Subsampling stride (default: (1, 1)).
-    mode : {'max', 'average_inc_pad', 'average_exc_pad'}
+    mode : {'max', 'average_inc_pad', 'average_exc_pad', 'sum'}
    pad : tuple
        (padX, padY) or (padX, padY, padZ)
        default: (0, 0)
@@ -1285,6 +1285,11 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
    """
    img = gpu_contiguous(img)
+    if mode == "sum":
+        ret = GpuDnnPool(mode="average_inc_pad")(img, ws, stride, pad)
+        context_name = ret.type.context_name
+        window_elem = theano.tensor.prod(ws).astype(ret.dtype)
+        return as_gpuarray_variable(ret * window_elem, context_name)
    return GpuDnnPool(mode=mode)(img, ws, stride, pad)

--- a/theano/gpuarray/opt.py
+++ b/theano/gpuarray/opt.py
@@ -191,8 +191,9 @@ def op_lifter(OP, cuda_only=False):
                # Check if we should replace
                if (not replace or
-                    (cuda_only and
+                        (cuda_only and
-                     get_context(context_name).kind != b'cuda')):
+                         get_context(context_name).kind != b'cuda') or
+                        any(["complex" in i.dtype for i in node.inputs])):
                    return False
                # tag the inputs with the context in case
@@ -298,7 +299,8 @@ class GraphToGPU(Optimizer):
        for i in fgraph.inputs:
            # Do not move *int* scalar to the GPU.
            if (isinstance(i.type, tensor.TensorType) and
-               (i.ndim > 0 or 'int' not in i.dtype)):
+                    (i.ndim > 0 or 'int' not in i.dtype) and
+                    "complex" not in i.dtype):
                mapping[i] = i.transfer(getattr(i.tag, 'target', target))
            else:
                mapping[i] = i
@@ -344,6 +346,10 @@ class GraphToGPU(Optimizer):
                         self.local_optimizers_map.get(type(c.op), []))):
                        move_to_GPU = True
            new_ops = None
+            if move_to_GPU and any(["complex" in getattr(i, 'dtype', "")
+                                    for i in node.inputs]):
+                move_to_GPU = False
            # Apply the lifter
            if move_to_GPU:
                for lopt in (self.local_optimizers_map.get(node.op, []) +

--- a/theano/gpuarray/tests/test_dnn.py
+++ b/theano/gpuarray/tests/test_dnn.py
@@ -356,6 +356,18 @@ def test_pooling_opt():
    f(numpy.zeros((10, 10), dtype='float32'))
+    # Test sum pooling
+    f = theano.function(
+        [x],
+        pool_2d(x, ds=(2, 3), mode='sum',
+                ignore_border=True),
+        mode=mode_with_gpu)
+    assert any([isinstance(n.op, dnn.GpuDnnPool)
+                for n in f.maker.fgraph.toposort()])
+    data = numpy.random.rand(10, 10).astype('float32')
+    f(data)
 def test_dnn_tag():
    """

--- a/theano/gpuarray/tests/test_opt.py
+++ b/theano/gpuarray/tests/test_opt.py
@@ -472,3 +472,12 @@ def test_local_assert_no_cpu_op():
        theano.function([], out, mode=mode_local_assert)
    finally:
        theano.config.assert_no_cpu_op = old
+def test_no_complex():
+    width_var = tensor.cscalar()
+    freq_var = tensor.fscalar()
+    signal_var = tensor.fscalar()
+    stft_out = tensor.exp(width_var * freq_var) * signal_var
+    theano.function([width_var, freq_var, signal_var], stft_out,
+                    mode=mode_with_gpu)
--- a/theano/sandbox/cuda/dnn.py
+++ b/theano/sandbox/cuda/dnn.py
@@ -1991,7 +1991,7 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
        Subsampling window size.
    stride
        Subsampling stride (default: (1, 1)).
-    mode : {'max', 'average_inc_pad', 'average_exc_pad}
+    mode : {'max', 'average_inc_pad', 'average_exc_pad, 'sum'}
    pad :
        (pad_h, pad_w) padding information.
        pad_h is the number of zero-valued pixels added to each of the top and
@@ -2009,6 +2009,11 @@ def dnn_pool(img, ws, stride=(1, 1), mode='max', pad=(0, 0)):
    """
    img = gpu_contiguous(img)
+    if mode == "sum":
+        ret = GpuDnnPool(mode="average_inc_pad")(img, ws, stride, pad)
+        window_elem = theano.tensor.prod(ws).astype(ret.dtype)
+        return as_cuda_ndarray_variable(ret * window_elem)
    return GpuDnnPool(mode=mode)(img, ws, stride, pad)

--- a/theano/sandbox/cuda/tests/test_dnn.py
+++ b/theano/sandbox/cuda/tests/test_dnn.py
@@ -545,6 +545,17 @@ def test_pooling_opt():
                for n in f.maker.fgraph.toposort()])
    f(numpy.zeros((10, 10), dtype='float32'))
+    # Test sum pooling
+    f = theano.function(
+        [x],
+        pool_2d(x, ds=(2, 3), mode='sum',
+                ignore_border=True),
+        mode=mode_with_gpu)
+    assert any([isinstance(n.op, dnn.GpuDnnPool)
+                for n in f.maker.fgraph.toposort()])
+    data = numpy.random.rand(10, 10).astype('float32')
+    f(data)
 class test_DnnSoftMax(test_nnet.test_SoftMax):