update tests, fix cudnn call in abstract op

6178a7a9 · Nicolas Ballas · Pascal Lamblin · 7cc4e783 · 6178a7a9 · 6178a7a9
--- a/theano/sandbox/cuda/dnn.py
+++ b/theano/sandbox/cuda/dnn.py
@@ -1279,7 +1279,7 @@ def dnn_conv3d(img, kerns, border_mode='valid', subsample=(1, 1, 1),
 def dnn_gradweight(img, topgrad,
                   kerns_shp,
                   border_mode='valid', subsample=(1, 1),
-                   conv_mode='conv', workmem=None):
+                   conv_mode='conv'):
    """
    GPU convolution gradient with respect to weight using cuDNN from NVIDIA.

@@ -1295,16 +1295,16 @@ def dnn_gradweight(img, topgrad,

    img = gpu_contiguous(img)
    topgrad = gpu_contiguous(topgrad)
+    kerns_shp = theano.tensor.as_tensor_variable(kerns_shp) 
    desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
                          conv_mode=conv_mode)(img.shape, kerns_shp)
-
-    out = gpu_alloc_empty(*kern_shp)
-    return GpuDnnConvGradW(workmem=workmem)(img, topgrad, out, desc)
+    out = gpu_alloc_empty(*kerns_shp)
+    return GpuDnnConvGradW()(img, topgrad, out, desc)

 def dnn_gradinput(kerns, topgrad,
-                  img_shape,
+                  img_shp,
                  border_mode='valid', subsample=(1, 1),
-                  conv_mode='conv', workmem=None):
+                  conv_mode='conv'):
    """
    GPU convolution gradient with respect to input using cuDNN from NVIDIA.

@@ -1320,11 +1320,12 @@ def dnn_gradinput(kerns, topgrad,

    kerns = gpu_contiguous(kerns)
    topgrad = gpu_contiguous(topgrad)
+    img_shp = theano.tensor.as_tensor_variable(img_shp)
    desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,
                          conv_mode=conv_mode)(img_shp, kerns.shape)

    out = gpu_alloc_empty(*img_shp)
-    return GpuDnnConvGradI(workmem=workmem)(kerns, topgrad, out, desc)
+    return GpuDnnConvGradI()(kerns, topgrad, out, desc)


 class GpuDnnPoolDesc(GpuOp):

--- a/theano/tensor/nnet/abstract_conv2d.py
+++ b/theano/tensor/nnet/abstract_conv2d.py
@@ -25,7 +25,7 @@ from theano.sandbox.cuda.basic_ops import (
    GpuFromHost, HostFromGpu
    )
 from theano.sandbox.cuda.type import CudaNdarrayType
-from theano.sandbox.cuda.dnn import dnn_available, dnn_conv
+from theano.sandbox.cuda.dnn import dnn_available, dnn_conv, dnn_gradweight, dnn_gradinput
 from theano.sandbox.cuda.blas import GpuCorrMM, GpuCorrMM_gradWeights, GpuCorrMM_gradInputs
 from theano.sandbox.cuda.opt import values_eq_approx_high_tol

@@ -45,7 +45,7 @@ def conv2d(inputs,
           batch_size=None,
           border_mode='valid',
           subsample=(1, 1),
-           filter_flip=True):
+           filters_flip=True):
    """
    This function will build the symbolic graph for convolving a mini-batch of a
    stack of 2D inputs with a set of 2D filters. The implementation is modelled
@@ -92,8 +92,8 @@ def conv2d(inputs,
    :param subsample: factor by which to subsample the output.
        Also called strides elsewhere.

-    :type filter_flip: bool
-    :param filter_flip: If ``True``, will flip the filter rows and columns
+    :type filters_flip: bool
+    :param filters_flip: If ``True``, will flip the filter rows and columns
        before sliding them over the input. This operation is normally referred
        to as a convolution, and this is the default. If ``False``, the filters
        are not flipped and the operation is referred to as a cross-correlation.
@@ -109,7 +109,7 @@ def conv2d(inputs,
                             bsize=batch_size,
                             border_mode=border_mode,
                             subsample=subsample,
-                             filter_flip = filter_flip)
+                             filters_flip = filters_flip)
    return conv_op(inputs, filters)


@@ -120,12 +120,12 @@ class BaseAbstractConv2d(Op):
    FIXME
    """
    check_broadcast = False
-    __props__ = ('border_mode', 'subsample', 'filter_flip', 'imshp', 'kshp', 'bsize')
+    __props__ = ('border_mode', 'subsample', 'filters_flip', 'imshp', 'kshp', 'bsize')

    def __init__(self,
                 imshp=None, kshp=None, bsize=None,
                 border_mode="valid", subsample=(1, 1),
-                 filter_flip = True):
+                 filters_flip = True):
        if isinstance(border_mode, int):
            border_mode = (border_mode, border_mode)
        if isinstance(border_mode, tuple):
@@ -142,7 +142,7 @@ class BaseAbstractConv2d(Op):
        self.kshp = kshp
        self.bsize = bsize
        self.border_mode = border_mode
-        self.filter_flip = filter_flip
+        self.filters_flip = filters_flip

        if len(subsample) != 2:
            raise ValueError("subsample must have two elements")
@@ -175,9 +175,9 @@ class AbstractConv2d(BaseAbstractConv2d):
                 bsize=None,
                 border_mode="valid",
                 subsample=(1, 1),
-                 filter_flip = True):
+                 filters_flip = True):
        super(AbstractConv2d, self).__init__(imshp, kshp, bsize,
-                                             border_mode, subsample, filter_flip)
+                                             border_mode, subsample, filters_flip)

    def make_node(self, img, kern):
        if img.type.ndim != 4:
@@ -203,13 +203,13 @@ class AbstractConv2d(BaseAbstractConv2d):
                                             self.bsize,
                                             self.border_mode,
                                             self.subsample,
-                                             self.filter_flip)(
+                                             self.filters_flip)(
            weights, top, bottom.shape[-2:])
        d_weights = AbstractConv2d_gradWeights(self.imshp, self.kshp,
                                               self.bsize,
                                               self.border_mode,
                                               self.subsample,
-                                               self.filter_flip)(
+                                               self.filters_flip)(
            bottom, top, weights.shape[-2:])
        return d_bottom, d_weights

@@ -222,16 +222,15 @@ class AbstractConv2d_gradWeights(BaseAbstractConv2d):
           use it as needed.

    """
-
    def __init__(self,
                 imshp=None,
                 kshp=None,
                 bsize=None,
                 border_mode="valid",
                 subsample=(1, 1),
-                 filter_flip=True):
+                 filters_flip=True):
        super(AbstractConv2d_gradWeights, self).__init__(imshp, kshp, bsize,
-                                                         border_mode, subsample, filter_flip)
+                                                         border_mode, subsample, filters_flip)

    ## Update shape/height_width
    def make_node(self, img, topgrad, shape):
@@ -261,13 +260,13 @@ class AbstractConv2d_gradWeights(BaseAbstractConv2d):
                                             self.bsize,
                                             self.border_mode,
                                             self.subsample,
-                                             self.filter_flip)(weights, top, bottom.shape[-2:])
+                                             self.filters_flip)(weights, top, bottom.shape[-2:])
        d_top = AbstractConv2d(self.imshp,
                               self.kshp,
                               self.bsize,
                               self.border_mode,
                               self.subsample,
-                               self.filter_flip)(bottom, weights)
+                               self.filters_flip)(bottom, weights)
        d_height_width = (theano.gradient.DisconnectedType()(),)
        return (d_bottom, d_top) + d_height_width

@@ -290,9 +289,9 @@ class AbstractConv2d_gradInputs(BaseAbstractConv2d):
                 bsize=None,
                 border_mode="valid",
                 subsample=(1, 1),
-                 filter_flip=True):
+                 filters_flip=True):
        super(AbstractConv2d_gradInputs, self).__init__(imshp, kshp, bsize,
-                                                        border_mode, subsample, filter_flip)
+                                                        border_mode, subsample, filters_flip)

    ## Update shape/height_width
    def make_node(self, kern, topgrad, shape):
@@ -336,7 +335,8 @@ class AbstractConv2d_gradInputs(BaseAbstractConv2d):
 ### move to Gpu optimization
 ### Do not replace the AbstractOpt only the inputs
 ### Abstract Ops is replaced layer by device_specialized opt
-@local_optimizer([gpu_from_host, BaseAbstractConv2d])
+@local_optimizer([gpu_from_host,
+                  AbstractConv2d, AbstractConv2d_gradWeights, AbstractConv2d_gradInputs])
 def local_conv2d_gpu_conv(node):
    """
    gpu_from_host(AbstractConv) -> AbstractConv(gpu_from_host)
@@ -381,13 +381,12 @@ def local_conv2d_gpu_conv(node):
                node.outputs[0].broadcastable)
            out.values_eq_approx = values_eq_approx_high_tol
            return [as_tensor_variable(out)]
-# We register the optimizer that moves convolutions to the GPU.
 register_gpu()(local_conv2d_gpu_conv)



 ### Call dnn conv class directly
-@local_optimizer([BaseAbstractConv2d])
+@local_optimizer([AbstractConv2d, AbstractConv2d_gradWeights, AbstractConv2d_gradInputs])
 def local_conv2d_cudnn(node):

    inp1 = node.inputs[0]
@@ -399,7 +398,7 @@ def local_conv2d_cudnn(node):
    if not dnn_available():
        return None

-    if node.op.filter_flip:
+    if node.op.filters_flip:
        conv_mode = 'conv'
    else:
        conv_mode = 'cross'
@@ -411,20 +410,20 @@ def local_conv2d_cudnn(node):
                        conv_mode = conv_mode)
        return [rval]
    if (isinstance(node.op, AbstractConv2d_gradWeights)):
-        shape = node.inputs[2]
+        shape = (inp2.shape[1], inp1.shape[1], node.inputs[2][0], node.inputs[2][1])
        rval = dnn_gradweight(inp1, inp2, shape,
                              border_mode=node.op.border_mode,
                              subsample=node.op.subsample,
                              conv_mode = conv_mode)
        return [rval]
    if (isinstance(node.op, AbstractConv2d_gradInputs)):
-        shape = node.inputs[2]
-        rval = dnn_gradinput(inp1, inp2, shape
+        shape = (inp2.shape[0], inp1.shape[1], node.inputs[2][0], node.inputs[2][1])
+        rval = dnn_gradinput(inp1, inp2, shape,
                             border_mode=node.op.border_mode,
                             subsample=node.op.subsample,
                             conv_mode = conv_mode)
        return [rval]
-register_specialize_device(local_conv2d_cudnn)
+register_specialize_device(local_conv2d_cudnn, 'cudnn')


 @local_optimizer([AbstractConv2d])
@@ -441,7 +440,7 @@ def local_conv2d_corrmm(node):
        subsample = node.op.subsample
        if (border_mode == 'valid') or (subsample != (1,1)):
            # need to flip the kernel for valid convolution
-            if node.op.filter_flip:
+            if node.op.filters_flip:
                kern = kern[:, :, ::-1, ::-1]
            # By default use GpuCorrMM
            rval = GpuCorrMM(border_mode, subsample)(gpu_contiguous(img),
@@ -484,7 +483,7 @@ def local_conv2d_corrmm(node):
            rval = GpuCorrMM_gradInputs('valid', subsample)(
                    gpu_contiguous(kern), gpu_contiguous(img))
        return [rval]
-#register_specialize_device(local_conv2d_corrmm)
+register_specialize_device(local_conv2d_corrmm, 'conv_gemm')

 @local_optimizer([AbstractConv2d_gradWeights])
 def local_conv2d_gradweight_corrmm(node):
@@ -494,13 +493,13 @@ def local_conv2d_gradweight_corrmm(node):
    if not isinstance(img.type, CudaNdarrayType) or \
            not isinstance(topgrad.type, CudaNdarrayType):
        return None
-    if node.op.filter_flip:
+    if node.op.filters_flip:
        img = img[:, :, ::-1, ::-1]
    rval = GpuCorrMM_gradWeights(border_mode=node.op.border_mode,
                                 subsample=node.op.subsample)(
        gpu_contiguous(img), gpu_contiguous(topgrad), shape)
    return [rval]
-#register_specialize_device(local_conv2d_gradweight_corrmm)
+register_specialize_device(local_conv2d_gradweight_corrmm, 'conv_gemm')

 @local_optimizer([AbstractConv2d_gradInputs])
 def local_conv2d_gradinputs_corrmm(node):
@@ -510,14 +509,14 @@ def local_conv2d_gradinputs_corrmm(node):
            not isinstance(topgrad.type, CudaNdarrayType):
        return None

-    if node.op.filter_flip:
+    if node.op.filters_flip:
        kern = kern[:, :, ::-1, ::-1]

    rval =  GpuCorrMM_gradInputs(border_mode=node.op.border_mode,
    subsample=node.op.subsample)(
        gpu_contiguous(kern), gpu_contiguous(topgrad), shape)
    return [rval]
-#register_specialize_device(local_conv2d_gradinputs_corrmm)
+register_specialize_device(local_conv2d_gradinputs_corrmm, 'conv_gemm')



@@ -553,7 +552,7 @@ def local_conv2d_gradweight_cpu(node):
    if node.op.border_mode not in ['full', 'valid']:
        return None

-    if not node.op.filter_flip:
+    if not node.op.filters_flip:
        # Not tested yet
        return

@@ -617,7 +616,7 @@ def local_conv2d_gradweight_cpu(node):
        raise NotImplementedError(
            'Only [full,valid] modes are currently supported.')

-    if node.op.filter_flip:
+    if node.op.filters_flip:
        filters = filters[:, :, ::-1, ::-1]  # flip them

    dw = ConvOp(imshp, kshp, nkern, bsize, 1, 1, output_mode='valid',
@@ -645,7 +644,7 @@ def local_conv2d_gradinputs_cpu(node):
    if node.op.border_mode not in ['full', 'valid']:
        return None

-    if not node.op.filter_flip:
+    if not node.op.filters_flip:
        # Not tested yet
        return None


--- a/theano/tensor/nnet/tests/test_abstractconv.py
+++ b/theano/tensor/nnet/tests/test_abstractconv.py
@@ -3,6 +3,7 @@ import numpy
 import copy

 import theano
+import theano.tensor as T
 from theano.tests import unittest_tools as utt

 from nose.plugins.skip import SkipTest
@@ -13,7 +14,7 @@ from theano.sandbox.cuda import float32_shared_constructor as gpu_shared
 from theano.compile import shared as cpu_shared

 from theano.sandbox.cuda.tests.test_conv_cuda_ndarray import py_conv
-#from theano.sandbox.cuda.dnn import dnn_available
+from theano.sandbox.cuda.dnn import dnn_available, dnn_conv, dnn_gradweight, dnn_gradinput


 if theano.config.mode == 'FAST_COMPILE':
@@ -26,249 +27,239 @@ else:

 class TestConv2d(unittest.TestCase):

-    def run_fwd(self,
-                inputs_shape,
-                filters_shape,
-                subsample=(1, 1),
-                verify_grad=True,
-                mode=mode_without_gpu,
-                border_mode='valid',
-                device='gpu',
-                provide_shape=False):
+    def setUp(self):
+        super(TestConv2d, self).setUp()
+
+        self.inputs_shapes = [(16, 1, 12, 12), (16, 1, 18, 18), (16, 1, 24, 24),
+                              (16, 1, 20, 20), (16, 1, 32, 20), (10, 5, 32, 32)]
+        self.filters_shapes = [(10, 1, 2, 2), (10, 1, 3, 3), (10, 1, 2, 2),
+                               (1, 1, 2, 5), (5, 1, 2, 2), (15, 5, 2, 2)]
+        self.subsamples = [(1, 1), (2, 2), (2, 4)]
+        self.border_modes = ["valid", "full", (0, 0), (1, 1), (5, 5), (5, 2)]
+
+
+    def get_output_shape(self, inputs_shape, filters_shape, subsample, border_mode):
+        if border_mode == "valid":
+            border_mode = (0, 0)
+        if border_mode == "full":
+            border_mode = (filters_shape[2] - 1, filters_shape[3] - 1)
+        batch_size = inputs_shape[0]
+        num_filters = filters_shape[1]
+        return (batch_size, num_filters,) + \
+                tuple(None if i is None or k is None
+                      else ((i + 2*pad - k) // d + 1)
+                      for i, k, d, pad in zip(inputs_shape[2:],
+                                              filters_shape[2:],
+                                              subsample,
+                                              border_mode))
+
+    def run_fwd(self, inputs_shape, filters_shape, ref=dnn_conv,
+                subsample=(1, 1), verify_grad=True, mode=mode_without_gpu,
+                border_mode='valid', filters_flip=True, device='cpu', provide_shape=False):

        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')
-
        if device == 'gpu':
            inputs = gpu_shared(inputs_val)
            filters = gpu_shared(filters_val)
        else:
-            inputs = cpu_shared(inputs_val)
-            filters = cpu_shared(filters_val)
+            inputs = theano.tensor.as_tensor_variable(cpu_shared(inputs_val))
+            filters = theano.tensor.as_tensor_variable(cpu_shared(filters_val))
        if provide_shape:
            imshp = inputs_shape
            kshp = filters_shape
        else:
            imshp = None
            kshp = None
+        if filters_flip:
+            conv_mode = 'conv'
+        else:
+            conv_mode = 'cross'

-        c_ref = conv_ref.conv2d(inputs, filters,
-                                border_mode=border_mode,
-                                subsample=subsample)
+        c_ref = ref(inputs, filters,
+                    border_mode=border_mode,
+                    subsample=subsample,
+                    conv_mode = conv_mode)
        c = conv.conv2d(inputs, filters,
-                        border_mode=border_mode, subsample=subsample)
-
+                        border_mode=border_mode,
+                        subsample=subsample,
+                        filters_flip=filters_flip,
+                        inputs_shape=imshp,
+                        filters_shape=kshp)
        f_ref = theano.function([], c_ref, mode=mode)
        f = theano.function([], c, mode)
-
        res_ref = f_ref()
        res = f()
-        print res_ref.shape, res.shape
        utt.assert_allclose(res_ref, res)
        if verify_grad:
-            utt.verify_grad(conv.AbstractConv2d(border_mode="valid",
-                                                imshp=imshp,
-                                                kshp=kshp,
-                                                bsize=inputs_shape[0],
-                                                subsample=subsample),
+            utt.verify_grad(conv.AbstractConv2d(border_mode="valid", imshp=imshp, kshp=kshp,
+                                                bsize=inputs_shape[0], subsample=subsample),
                            [inputs_val, filters_val])

-
-    def run_gradweight(self,
-                       inputs_shape,
-                       filters_shape,
-                       output_shape,
-                       subsample=(1, 1),
-                       verify_grad=True,
-                       mode=mode_without_gpu,
-                       border_mode='valid',
-                       device='gpu',
-                       provide_shape = False):
+    def run_gradweight(self, inputs_shape, filters_shape, output_shape,
+                       ref=dnn_gradweight, subsample=(1, 1), filters_flip=True,
+                       verify_grad=True, mode=mode_without_gpu, border_mode='valid',
+                       device='cpu', provide_shape = False):

        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        output_val = numpy.random.random(output_shape).astype('float32')
-
        if device == 'gpu':
            inputs = gpu_shared(inputs_val)
            output = gpu_shared(output_val)
        else:
-            inputs = cpu_shared(inputs_val)
-            output = cpu_shared(output_val)
-
+            inputs = theano.tensor.as_tensor_variable(cpu_shared(inputs_val))
+            output = theano.tensor.as_tensor_variable(cpu_shared(output_val))
        if provide_shape:
            imshp = inputs_shape
            kshp = filters_shape
        else:
            imshp = None
            kshp = None
-
+        if filters_flip:
+            conv_mode = 'conv'
+        else:
+            conv_mode = 'cross'
        c = conv.AbstractConv2d_gradWeights(border_mode=border_mode,
+                                            filters_flip=filters_flip,
                                            subsample=subsample,
                                            imshp = imshp, kshp = kshp)
        c = c(inputs, output, filters_shape[-2:])
+        c_ref = ref(inputs, output,
+                    filters_shape,
+                    border_mode=border_mode,
+                    subsample=subsample,
+                    conv_mode=conv_mode)
        f = theano.function([], c, mode)
-        res_ref = py_conv(inputs_val.transpose((1, 0, 2, 3)),
-                          output_val.transpose((1, 0, 2, 3))[:, :, ::-1, ::-1],
-                          'valid', subsample).transpose((1, 0, 2, 3))[:, :, ::-1, ::-1]
-        res = numpy.array(f())
-        print res_ref.shape, res.shape
-
+        f_ref = theano.function([], c_ref, mode)
+        res = f()
+        res_ref = f_ref()
        utt.assert_allclose(res_ref, res)

        def abstract_conv2d_gradweight(inputs_val, output_val):
-            conv_op = conv.AbstractConv2d_gradInputs(border_mode=border_mode,
-                                                     subsample=subsample)
+            conv_op = conv.AbstractConv2d_gradInputs(border_mode=border_mode, subsample=subsample)
            return conv_op(inputs_val, output_val, filters_shape[-2:])
        if verify_grad:
-            utt.verify_grad(abstract_conv2d_gradweight,
-                           [inputs_val, output_val])
+            utt.verify_grad(abstract_conv2d_gradweight, [inputs_val, output_val])


-    def run_gradinput(self,
-                      inputs_shape,
-                      filters_shape,
-                      output_shape,
-                      subsample=(1, 1),
-                      verify_grad=True,
-                      mode=mode_without_gpu,
-                      border_mode='valid',
-                      device='gpu',
-                      provide_shape = False):
+    def run_gradinput(self, inputs_shape, filters_shape, output_shape, ref=dnn_gradweight,
+                      subsample=(1, 1), filters_flip=True, verify_grad=True, mode=mode_without_gpu,
+                      border_mode='valid', device='cpu', provide_shape = False):

        output_val = numpy.random.random(output_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')
-
-
        if device == 'gpu':
            output = gpu_shared(output_val)
            filters = gpu_shared(filters_val)
        else:
-            output = cpu_shared(output_val)
-            filters = cpu_shared(filters_val)
+            output = theano.tensor.as_tensor_variable(cpu_shared(output_val))
+            filters = theano.tensor.as_tensor_variable(cpu_shared(filters_val))
        if provide_shape:
            imshp = inputs_shape
            kshp = filters_shape
        else:
            imshp = None
            kshp = None
-
-        c = conv.AbstractConv2d_gradInputs(border_mode="valid",
+        if filters_flip:
+            conv_mode = 'conv'
+        else:
+            conv_mode = 'cross'
+        c = conv.AbstractConv2d_gradInputs(border_mode=border_mode,
                                           subsample=subsample,
-                                           imshp = imshp, kshp = kshp)
+                                           filters_flip=filters_flip,
+                                           imshp=imshp, kshp=kshp)
        c = c(filters, output, inputs_shape[-2:])
+        c_ref = ref(filters, output, inputs_shape,
+                    border_mode=border_mode, subsample=subsample,
+                    conv_mode=conv_mode)
        f = theano.function([], c, mode)
-        res_ref = py_conv(output_val,
-                          filters_val.transpose(1, 0, 2, 3)[:, :, ::-1, ::-1],
-                          'full', subsample)
-        print filters_val.shape, output_val.shape, inputs_shape
-        res = numpy.array(f())
-        print "2, ", res_ref.shape, res.shape
-
+        f_ref = theano.function([], c_ref, mode)
+        res = f()
+        res_ref = f_ref()
        utt.assert_allclose(res_ref, res)

        def abstract_conv2d_gradinputs(filters_val, output_val):
-            conv_op = conv.AbstractConv2d_gradInputs(border_mode=border_mode,
-                                                     subsample=subsample)
+            conv_op = conv.AbstractConv2d_gradInputs(border_mode=border_mode, subsample=subsample)
            return conv_op(filters_val, output_val, inputs_shape[-2:])
-
        if verify_grad:
-            utt.verify_grad(abstract_conv2d_gradinputs,
-                            [filters_val, output_val])
-
+            utt.verify_grad(abstract_conv2d_gradinputs, [filters_val, output_val])
+
+
+    def test_dnn_conv(self):
+        if not dnn_available():
+            return
+        mode=mode_with_gpu
+
+        inputs_shapes =  self.inputs_shapes
+        filters_shapes = self.filters_shapes
+        subsamples = self.subsamples
+        border_modes = self.border_modes
+        for i, f in zip(inputs_shapes[0:1], filters_shapes[0:1]):
+            for s in subsamples:
+                for b in border_modes:
+                    o = self.get_output_shape(i, f, s, b)
+                    for provide_shape in [False, True]:
+                        self.run_fwd(inputs_shape=i, filters_shape=f, subsample=s,
+                                     verify_grad=True, mode=mode, device='gpu',
+                                     provide_shape=provide_shape, border_mode=b)
+                        self.run_gradweight(inputs_shape=i, filters_shape=f,
+                                            output_shape=o, subsample=s,
+                                            verify_grad=True, mode=mode, device='gpu',
+                                            provide_shape=provide_shape, border_mode=b)
+                        self.run_gradinput(inputs_shape=i, filters_shape=f,
+                                           output_shape=o, subsample=s,
+                                           verify_grad=False, mode=mode, device='gpu',
+                                           provide_shape=provide_shape, border_mode=border_mode)
+
+    def test_cormm_conv(self):
+        mode = mode_with_gpu.excluding('cudnn')
+
+        inputs_shapes =  self.inputs_shapes
+        filters_shapes = self.filters_shapes
+        subsamples = self.subsamples
+        border_modes = self.border_modes
+        for i, f in zip(inputs_shapes, filters_shapes):
+            for s in subsamples:
+                for b in border_modes:
+                    o = self.get_output_shape(i, f, s, b)
+                    for provide_shape in [False, True]:
+                        self.run_fwd(inputs_shape=i, filters_shape=f, subsample=s,
+                                     verify_grad=True, mode=mode, device='gpu',
+                                     provide_shape=provide_shape, border_mode=b)
+                        self.run_gradweight(inputs_shape=i, filters_shape=f,
+                                            output_shape=o, subsample=s,
+                                            verify_grad=True, mode=mode, device='gpu',
+                                            provide_shape=provide_shape, border_mode=b)
+                        self.run_gradinput(inputs_shape=i, filters_shape=f,
+                                           output_shape=o, subsample=s,
+                                           verify_grad=True, mode=mode, device='gpu',
+                                           provide_shape=provide_shape, border_mode=border_mode)


-    def test_corrmm(self):
-       mode = mode_with_gpu
-       mode = mode.excluding('cudnn')
-       self.run_fwd(inputs_shape=(16, 1, 2, 2),
-                    filters_shape=(10, 1, 2, 2),
-                    verify_grad=False, mode=mode)
-        self.run_gradweight(inputs_shape=(16, 1, 2, 2),
-                            filters_shape=(10, 1, 2, 2),
-                            verify_grad=False, mode=mode)
-        self.run_gradinput(inputs_shape=(1, 1, 2, 2),
-                           filters_shape=(10, 1, 2, 2),
-                           verify_grad=False, mode=mode)



    def test_cpu_conv(self):
-
-        inputs_shapes =  [(16, 1, 2, 2), (16, 1, 8, 8), (16, 1, 4, 4)]
-        filters_shapes = [(10, 1, 2, 2), (10, 1, 2, 2), (10, 1, 2, 2),]
-        output_shapes =  [(16, 10, 1, 1), (16, 10, 7, 7), (16, 10, 3, 3)]
-        subsamples =     [(1, 1), (1, 1), (1, 1)]
-
-        border_mode= 'valid'
-        for i, f, o, s in zip(inputs_shapes[0:1], filters_shapes[0:1], output_shapes[0:1], subsamples[0:1]):
-            for provide_shape in [False, True]:
-                self.run_fwd(inputs_shape=i, filters_shape=f, subsample=s,
-                             verify_grad=True, mode=mode_without_gpu, device='cpu',
-                             provide_shape=provide_shape, border_mode=border_mode)
-        return
-        ### No reference implementation of full available yet
-        border_mode= 'full'
-        provide_shape = True
-        self.run_gradweight(inputs_shape=(16, 1, 2, 2),
-                            filters_shape=(10, 1, 2, 2),
-                            output_shape=(16, 10, 3, 3),
-                            subsample=(1, 1),
-                            verify_grad=True, mode=mode_without_gpu, device='cpu',
-                            provide_shape=provide_shape, border_mode=border_mode)
-
-
-
-
-    def test_cpu_grad_weight(self):
-
-        ### FIXME subsample
-        inputs_shapes =  [(16, 1, 2, 2), (16, 1, 8, 8), (16, 1, 4, 4)]
-        filters_shapes = [(10, 1, 2, 2), (10, 1, 2, 2), (10, 1, 2, 2),]
-        output_shapes =  [(16, 10, 1, 1), (16, 10, 7, 7), (16, 10, 3, 3)]
-        subsamples =     [(1, 1), (1, 1), (1, 1)]
-
-        border_mode = 'valid'
-        for i, f, o, s in zip(inputs_shapes[:], filters_shapes[:], output_shapes[:], subsamples[:]):
-            for provide_shape in [False, True]:
-                self.run_gradweight(inputs_shape=i, filters_shape=f,
-                                    output_shape=o, subsample=s,
-                                    verify_grad=True, mode=mode_without_gpu, device='cpu',
-                                    provide_shape=provide_shape, border_mode=border_mode)
-        return
-        ### No reference implementation of full available yet
-        border_mode= 'full'
-        provide_shape = True
-        self.run_gradweight(inputs_shape=(16, 1, 2, 2),
-                            filters_shape=(10, 1, 2, 2),
-                            output_shape=(16, 10, 3, 3),
-                            subsample=(1, 1),
-                            verify_grad=True, mode=mode_without_gpu, device='cpu',
-                            provide_shape=provide_shape, border_mode=border_mode)
-
-
-    def test_cpu_grad_input(self):
-
-        ### FIXME subsample
-        inputs_shapes =  [(16, 1, 2, 2), (16, 1, 8, 8), (16, 1, 4, 4)]
-        filters_shapes = [(10, 1, 2, 2), (10, 1, 2, 2), (10, 1, 2, 2),]
-        output_shapes =  [(16, 10, 1, 1), (16, 10, 7, 7), (16, 10, 3, 3)]
-        subsamples =     [(1, 1), (1, 1), (1, 1)]
-
-        border_mode= 'valid'
-        for i, f, o, s in zip(inputs_shapes[:], filters_shapes[:], output_shapes[:], subsamples[:]):
-            for provide_shape in [True, False]:
-                self.run_gradinput(inputs_shape=i, filters_shape=f,
-                                   output_shape=o, subsample=s,
-                                   verify_grad=True, mode=mode_without_gpu, device='cpu',
-                                   provide_shape=provide_shape, border_mode=border_mode)
-        return
-        ### No reference implementation of full available yet
-        border_mode= 'full'
-        provide_shape = True
-        self.run_gradweight(inputs_shape=(16, 1, 2, 2),
-                            filters_shape=(10, 1, 2, 2),
-                            output_shape=(16, 10, 3, 3),
-                            subsample=(1, 1),
-                            verify_grad=True, mode=mode_without_gpu, device='cpu',
-                            provide_shape=provide_shape, border_mode=border_mode)
-
+        mode = mode_without_gpu
+
+        inputs_shapes =  self.inputs_shapes
+        filters_shapes = self.filters_shapes
+        subsamples = self.subsamples
+        border_modes = self.border_modes[:2] # only valid and full are supported
+
+        for i, f in zip(inputs_shapes, filters_shapes):
+            for s in subsamples:
+                for b in border_modes:
+                    o = self.get_output_shape(i, f, s, b)
+                    for provide_shape in [False, True]:
+                        self.run_fwd(inputs_shape=i, filters_shape=f, subsample=s,
+                                     verify_grad=True, mode=mode, device='cpu',
+                                     provide_shape=provide_shape, border_mode=b)
+                        self.run_gradweight(inputs_shape=i, filters_shape=f,
+                                            output_shape=o, subsample=s,
+                                            verify_grad=True, mode=mode, device='cpu',
+                                            provide_shape=provide_shape, border_mode=b)
+                        self.run_gradinput(inputs_shape=i, filters_shape=f,
+                                           output_shape=o, subsample=s,
+                                           verify_grad=True, mode=mode, device='cpu',
+                                           provide_shape=provide_shape, border_mode=border_mode)