Merge pull request #1044 from nouiz/gpu_conv_logical

Move convolution to the GPU when the image shape and logical image shape...

Merge pull request #1044 from nouiz/gpu_conv_logical
1e126b35 · lamblin · 3caaba8c · 315867ab · 1e126b35 · 1e126b35
--- a/theano/sandbox/cuda/__init__.py
+++ b/theano/sandbox/cuda/__init__.py
@@ -128,11 +128,12 @@ compile_cuda_ndarray = True
 if not compile_cuda_ndarray:
    compile_cuda_ndarray = not try_import()

-if not nvcc_compiler.is_nvcc_available():
-    # It can happen that there the file cuda_ndarray.so is already compiled
+if not nvcc_compiler.is_nvcc_available() or not theano.config.cxx:
+    # It can happen that the file cuda_ndarray.so is already compiled
    # but nvcc is not available. In that case we need to disable the CUDA
    # back-end as we won't be able to compile any new op and we can't only
    # use already compiled GPU op and not the others.
+    # Also, if cxx is not available, we need to disable all GPU code.
    set_cuda_disabled()

 if compile_cuda_ndarray and cuda_available:

--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -1011,11 +1011,7 @@ def local_gpu_conv(node):
    """
    def GpuConvOp_from_ConvOp(op):
        logical_img_hw = None
-        if op.imshp_logical is not None:
-            logical_img_hw = op.imshp_logical[1:3]
-            if logical_img_hw != op.imshp[1:3]:
-                # this case is not implemented
-                return None
+
        if op.kshp_logical is not None and op.kshp_logical != op.kshp:
            return None
        #print op.kshp, op.imshp[1:3]
@@ -1033,6 +1029,23 @@ def local_gpu_conv(node):
        #HACK to print the number of MFlops in the profiler output.
        if hasattr(op, 'flops'):
            ret.flops = op.flops
+        if op.imshp_logical is not None:
+            logical_img_hw = op.imshp_logical[1:3]
+            if logical_img_hw != op.imshp[1:3]:
+                # this case is not implemented
+                #return None
+                rstride = int(numpy.ceil(op.imshp_logical[1] /
+                                         float(op.imshp[1])))
+                cstride = int(numpy.ceil(op.imshp_logical[2] /
+                                         float(op.imshp[2])))
+                def make_graph(img, kern):
+                    buf = tensor.alloc(numpy.asarray(0, dtype=img.dtype),
+                                       img.shape[0], *op.imshp_logical)
+                    img = tensor.set_subtensor(buf[:, :, ::rstride, ::cstride],
+                                               img)
+                    img = gpu_from_host(img)
+                    return ret(img, kern)
+                return make_graph
        return ret

    if node.op == gpu_from_host:

--- a/theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
+++ b/theano/sandbox/cuda/tests/test_conv_cuda_ndarray.py
@@ -17,6 +17,7 @@ except ImportError:
    pass

 import theano
+from theano import tensor

 # Skip test if cuda_ndarray is not available.
 import theano.sandbox.cuda as cuda_ndarray
@@ -707,14 +708,41 @@ def test_subsample():
    exec_conv(version_full, shapes, verbose, random, 'full',
              print_=print_, ones=ones)

-## See #616
-#def test_logical_shapes():
-#    # implement when
-#    print >> sys.stderr, ("WARNING TODO: test_logical_shapes not implemented"
-#    " (i.e. imshp_logical, kshp_logical, kshp_logical_top_aligned)")
-

 class TestConv2DGPU(unittest.TestCase):
+    def test_logical_shapes(self):
+        for stride in range(1, 4):
+            kshp = (10, 2, 10, 10)
+            featshp = (3, 10, 11, 11)
+
+            a = tensor.ftensor4()
+            A = tensor.ftensor4()
+
+            # Need to transpose first two dimensions of kernel, and reverse
+            # index kernel image dims (for correlation)
+            kernel_rotated = tensor.transpose(A, axes=[1, 0, 2, 3])
+
+            featshp_logical = (featshp[0], featshp[1], featshp[2] * stride,
+                               featshp[3] * stride)
+            kshp_rotated = (kshp[1], kshp[0], kshp[2], kshp[3])
+            print featshp, kshp_rotated, featshp_logical[1:], kshp[2:]
+            image_estimate = tensor.nnet.conv2d(a, kernel_rotated,
+                                                border_mode='full',
+                                                image_shape=featshp,
+                                                filter_shape=kshp_rotated,
+                                                imshp_logical=featshp_logical[1:],
+                                                kshp_logical=kshp[2:])
+
+            func = theano.function([a, A], image_estimate, mode=theano_mode)
+            theano.printing.debugprint(func,)
+            assert any([isinstance(node.op, theano.sandbox.cuda.blas.GpuConv)
+                        for node in func.maker.fgraph.toposort()])
+
+            a_in = numpy.random.randn(*featshp).astype("float32")
+            A_in = numpy.random.randn(*kshp).astype("float32")
+
+            func(a_in, A_in)
+
    def test_invalid_input_shape(self):
        """
        Tests that when the shape gived at build time is not the same as