Merge pull request #2054 from nouiz/mixed

[ENH] OpenMPOp, doc and cleanup

Merge pull request #2054 from nouiz/mixed
165cbf6c · Pascal Lamblin · af6bd549 · a9d053ec · 165cbf6c · 165cbf6c
--- a/doc/library/tensor/nnet/conv.txt
+++ b/doc/library/tensor/nnet/conv.txt
@@ -54,8 +54,9 @@ TODO: Give examples for how to use these things! They are pretty complicated.
      not be call directly as it does not implement a grad function.
      You can enable it by setting THEANO_FLAGS to
      'optimizer_including=conv3d_fft:convgrad3d_fft:convtransp3d_fft'
-      This is not enabled by default because it has some restrictions on
-      input and uses more memory. Also note that it requires CUDA >= 5.0,
+      It does not support strides.
+      This is not enabled by default because it uses more memory.
+      Also note that it requires CUDA >= 5.0,
      scikits.cuda >= 0.5.0 and PyCUDA to run.
      To enable for just one Theano function:


--- a/theano/gof/op.py
+++ b/theano/gof/op.py
@@ -817,6 +817,12 @@ class OpenMPOp(Op):
            return ['-fopenmp']
        return []

+    def c_headers(self):
+        self.update_self_openmp()
+        if self.openmp:
+            return ["omp.h"]
+        return []
+
    @staticmethod
    def test_gxx_support():
        code = """
@@ -845,11 +851,12 @@ int main( int argc, const char* argv[] )
            if OpenMPOp.gxx_support_openmp is None:
                OpenMPOp.gxx_support_openmp = OpenMPOp.test_gxx_support()
                if not OpenMPOp.gxx_support_openmp:
-                    #We want to warn only once.
+                    # We want to warn only once.
                    warnings.warn(
                        "Your g++ compiler fails to compile OpenMP code. We"
                        " know this happen with some version of the EPD mingw"
-                        " compiler. We disable openmp everywhere in Theano."
+                        " compiler and LLVM compiler on Mac OS X."
+                        " We disable openmp everywhere in Theano."
                        " To remove this warning set the theano flags `openmp`"
                        " to False.",
                        stacklevel=3)

--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -43,6 +43,7 @@ from theano.sandbox.cuda.var import CudaNdarrayConstant
 from theano.scan_module import scan_utils, scan_op, scan_opt
 from theano.tensor.blas import _is_real_vector, _is_real_matrix
 from theano.tensor import nlinalg
+from theano.tensor.nnet.Conv3D import Conv3D

 #optdb.print_summary()  # shows what is currently registered

@@ -1236,17 +1237,18 @@ def local_conv_fft_full(node):
 gpu_optimizer.register("conv_fft_valid", local_conv_fft_valid)
 gpu_optimizer.register("conv_fft_full", local_conv_fft_full)

-from theano.tensor.nnet.Conv3D import Conv3D
+
 @local_optimizer([Conv3D])
 def local_conv3d_fft(node):
+    if not isinstance(node.op, Conv3D):
+        return
    try:
        stride_x = tensor.get_scalar_constant_value(node.inputs[3][0])
        stride_y = tensor.get_scalar_constant_value(node.inputs[3][1])
        stride_z = tensor.get_scalar_constant_value(node.inputs[3][2])
    except tensor.NotScalarConstantError:
        return False
-    if (isinstance(node.op, Conv3D) and
-        (stride_x, stride_y, stride_z) == (1, 1, 1)):
+    if (stride_x, stride_y, stride_z) == (1, 1, 1):
        # we import conv3d_fft locally to avoid pycuda warnings
        from theano.sandbox.cuda.fftconv import conv3d_fft
        # Shuffle inputs signal from (b, 0, 1, t, c) to (b, c, 0, 1, t)
@@ -1256,7 +1258,7 @@ def local_conv3d_fft(node):
        f = node.inputs[1]
        f = gpu_from_host(f.dimshuffle(0, 4, 1, 2, 3))
        # filter flip
-        f = f[:,:,::-1,::-1,::-1]
+        f = f[:, :, ::-1, ::-1, ::-1]
        rval = conv3d_fft(x, f, border_mode='valid', pad_last_dim=True)
        # Shuffle from (oc, c, 0, 1, t) to (oc, 0, 1, t, c)
        return [rval.dimshuffle(0, 2, 3, 4, 1) + node.inputs[2]]