added tests for the conv3d_fft

Conflicts: doc/library/tensor/nnet/conv.txt

added tests for the conv3d_fft
c40b1bd9 · Guillaume Alain · Nicolas Ballas · 47c947ab · c40b1bd9 · c40b1bd9
--- a/doc/library/tensor/nnet/conv.txt
+++ b/doc/library/tensor/nnet/conv.txt
@@ -34,6 +34,8 @@ TODO: Give examples for how to use these things! They are pretty complicated.
      in your environement.  This is not enabled by default because it
      has some restrictions on input and uses more memory.  Also note
      that it requires CUDA >= 5.0, scikits.cuda >= 0.5.0 and PyCUDA to run.
+    - :func:`conv3d_fft <theano.sandbox.cuda.fftconv.conv3d_fft>`
+      This is the same as conv2d_fft but with 3d data instead.
    - :func:`conv3D <theano.tensor.nnet.Conv3D.conv3D>`. Doesn't work on the GPU.
    - :func:`conv3d2d <theano.tensor.nnet.conv3d2d.conv3d>`
      Another conv3d implementation that uses the conv2d with data reshaping.

--- a/theano/sandbox/cuda/fftconv.py
+++ b/theano/sandbox/cuda/fftconv.py
@@ -517,10 +517,16 @@ def conv3d_fft(input, filters, image_shape=None, filter_shape=None,
    """
    Perform a convolution through fft.

-    Only support input which will be even on the last dimension
-    (width).  All other dimensions can be anything and the filters can
-    have an even or odd width.
-
+    Only supports input whose shape is even on the last dimension.
+    All other dimensions can be anything and the filters can
+    have an even or odd last dimension.
+
+    The semantics associated with the last three dimensions
+    are not important as long as they are in the same order between
+    the inputs and the filters. For example, when the convolution
+    is done on a sequence of images, they could be either
+    (duration, height, width) or (height, width, duration).
+    
    If you must use input which has an odd width, you can either pad
    it or use the `pad_last_dim` argument which will do it for you and
    take care to strip the padding before returning.  Don't use this

--- a/theano/sandbox/cuda/tests/test_fftconv.py
+++ b/theano/sandbox/cuda/tests/test_fftconv.py
@@ -118,3 +118,123 @@ class TestConv2dFFT(unittest.TestCase):
        res_fft = f_fft()

        utt.assert_allclose(res_ref, res_fft)
+
+
+
+
+
+
+class TestConv3dFFT(unittest.TestCase):
+
+    @staticmethod
+    def perform_conv2d_fft(inputs, filters, border_mode, function_mode):
+
+        assert(border_mode in ['valid', 'full'])
+        # function_mode is just mode_with_gpu from the environment
+
+        if inputs.shape[-1] % 2 == 1:
+            pad_last_dim = True
+        else:
+            pad_last_dim = False
+
+        sym_inputs  = theano.tensor.tensor4()
+        sym_filters = theano.tensor.tensor4()
+
+        sym_outputs = theano.sandbox.cuda.fftconv.conv2d_fft(sym_inputs, sym_filters, image_shape=inputs.shape, filter_shape=filters.shape, border_mode=border_mode, pad_last_dim=pad_last_dim)
+        #f = theano.function([sym_inputs, sym_filters], sym_outputs, mode=function_mode)
+        f = theano.function([sym_inputs, sym_filters], sym_outputs)
+        outputs_on_gpu = f(inputs, filters)
+        outputs = numpy.array(outputs_on_gpu)
+
+        return outputs
+
+    @staticmethod
+    def perform_conv3d_through_multiple_conv2d_fft(inputs, filters, border_mode, function_mode):
+
+        assert(border_mode in ['valid', 'full'])
+        # function_mode is just mode_with_gpu from the environment
+
+        (nbr_images, nbr_channels, image_height,  image_width,  image_duration)  = inputs.shape
+        (nbr_filters,           _, filter_height, filter_width, filter_duration) = filters.shape
+
+        if border_mode == 'valid':
+            outputs = numpy.zeros( (nbr_images, nbr_filters,
+                                    image_height - filter_height + 1,
+                                    image_width - filter_width + 1,
+                                    image_duration - filter_duration + 1), dtype=numpy.float32 )
+
+            for t in range(image_duration - filter_duration + 1):
+                for sub_t in range(filter_duration):
+                    #print "(t, sub_t) is (%d, %d),     (t + sub_t, filter_duration - 1 -sub_t) is (%d, %d)" % (t, sub_t, t + sub_t, filter_duration - 1 -sub_t)
+                    outputs[:,:,:,:,t] = outputs[:,:,:,:,t] + TestConv3dFFT.perform_conv2d_fft(inputs[:,:,:,:,t + sub_t].copy(), filters[:,:,:,:, filter_duration - 1 - sub_t].copy(), border_mode, function_mode)
+
+            return outputs
+
+        elif border_mode == 'full':
+
+            # pad in time, and then rely on the proper 2d convolution to work out the padding in the height and width
+            padded_inputs = numpy.zeros( (nbr_images, nbr_channels,
+                                          image_height + 2 * (filter_height - 1),
+                                          image_width + 2 * (filter_width - 1),
+                                          image_duration + 2 * (filter_duration - 1) ), dtype=numpy.float32)
+            padded_inputs[:,:,filter_height-1:filter_height-1+image_height,filter_width-1:filter_width-1+image_width,filter_duration-1:filter_duration-1+image_duration] = inputs.copy()
+
+            return TestConv3dFFT.perform_conv3d_through_multiple_conv2d_fft(padded_inputs, filters, border_mode='valid', function_mode=function_mode)
+
+    @staticmethod
+    def perform_fftconv3d(inputs, filters, border_mode, function_mode):
+
+        assert(border_mode in ['valid', 'full'])
+
+        if inputs.shape[-1] % 2 == 1:
+            pad_last_dim = True
+        else:
+            pad_last_dim = False
+
+        tensor5 = theano.tensor.TensorType('float32', (False,)*5)
+
+        sym_inputs  = tensor5()
+        sym_filters = tensor5()
+
+        sym_outputs = theano.sandbox.cuda.fftconv.conv3d_fft(sym_inputs, sym_filters, image_shape=inputs.shape, filter_shape=filters.shape, border_mode=border_mode, pad_last_dim=pad_last_dim)
+        #f = theano.function([sym_inputs, sym_filters], sym_outputs, mode=mode_with_gpu)
+        f = theano.function([sym_inputs, sym_filters], sym_outputs)
+        outputs_on_gpu = f(inputs, filters)
+        outputs = numpy.array(outputs_on_gpu)
+
+        return outputs
+
+
+    def run_conv(self, inputs_shape, filters_shape, border_mode):
+        inputs_val = numpy.random.random(inputs_shape).astype('float32')
+        filters_val = numpy.random.random(filters_shape).astype('float32')
+
+        res_ref = TestConv3dFFT.perform_conv3d_through_multiple_conv2d_fft(inputs_val, filters_val, border_mode, mode_with_gpu)
+        res_fft = TestConv3dFFT.perform_fftconv3d(inputs_val, filters_val, border_mode, mode_with_gpu)
+
+        utt.assert_allclose(res_ref, res_fft)
+
+    def test_valid(self):
+
+        for offset1 in range(2):
+            for offset2 in range(2):
+                for offset3 in range(2):
+                    self.run_conv(inputs_shape=(5, 3, 5 + offset1, 6 + offset2, 4 + offset3),
+                                  filters_shape=(2, 3, 3 + offset1, 3 + offset2, 2 + offset3),
+                                  border_mode='valid')
+
+    def test_full(self):
+
+        for offset1 in range(2):
+            for offset2 in range(2):
+                for offset3 in range(2):
+                    self.run_conv(inputs_shape=(5, 3, 5 + offset1, 6 + offset2, 4 + offset3),
+                                  filters_shape=(2, 3, 3 + offset1, 3 + offset2, 3 + offset3),
+                                  border_mode='full')
+
+
+
+
+
+
+