Merge pull request #2222 from SinaHonari/issue2196

DownsampleFactorMax support strides: issue #2196

Merge pull request #2222 from SinaHonari/issue2196
c2895dcf · Pascal Lamblin · 0bee6bb1 · 7518621d · c2895dcf · c2895dcf
--- a/theano/tensor/signal/downsample.py
+++ b/theano/tensor/signal/downsample.py
@@ -29,8 +29,8 @@ def max_pool_2d(input, ds, ignore_border=False):
    :param input: input images. Max pooling will be done over the 2 last
        dimensions.
    :type ds: tuple of length 2
-    :param ds: factor by which to downscale (vertical ds, horizontal ds). 
+    :param ds: factor by which to downscale (vertical ds, horizontal ds).
-        (2,2) will halve the image in each dimension. 
+        (2,2) will halve the image in each dimension.
    :param ignore_border: boolean value. When True, (5,5) input with ds=(2,2)
        will generate a (2,2) output. (3,3) otherwise.
    """
@@ -68,7 +68,7 @@ class DownsampleFactorMax(Op):
    """
    @staticmethod
-    def out_shape(imgshape, ds, ignore_border=False):
+    def out_shape(imgshape, ds, ignore_border=False, st=None):
        """Return the shape of the output from this op, for input of given
        shape and flags.
@@ -78,8 +78,13 @@ class DownsampleFactorMax(Op):
            scalar Theano variable.
        :param ds: downsample factor over rows and columns
+                   this parameter indicates the size of the pooling region
        :type ds: list or tuple of two ints
+        :param st: the stride size. This is the distance between the pooling
+                   regions. If it's set to None, in which case it equlas ds.
+        :type st: list or tuple of two ints
        :param ignore_border: if ds doesn't divide imgshape, do we include an
            extra row/col of partial downsampling (False) or ignore it (True).
        :type ignore_border: bool
@@ -92,25 +97,58 @@ class DownsampleFactorMax(Op):
        if len(imgshape) < 2:
            raise TypeError('imgshape must have at least two elements '
                            '(rows, cols)')
+        if st is None:
+            st = ds
        r, c = imgshape[-2:]
-        rval = list(imgshape[:-2]) + [r // ds[0], c // ds[1]]
-        if not ignore_border:
+        if ignore_border:
+            out_r = (r - ds[0]) // st[0] + 1
+            out_c = (c - ds[1]) // st[1] + 1
            if isinstance(r, theano.Variable):
-                rval[-2] = tensor.switch(r % ds[0], rval[-2] + 1, rval[-2])
+                nr = tensor.maximum(out_r, 0)
-            elif r % ds[0]:
+            else:
-                rval[-2] += 1
+                nr = numpy.maximum(out_r, 0)
            if isinstance(c, theano.Variable):
-                rval[-1] = tensor.switch(c % ds[1], rval[-1] + 1, rval[-1])
+                nc = tensor.maximum(out_c, 0)
-            elif c % ds[1]:
+            else:
-                rval[-1] += 1
+                nc = numpy.maximum(out_c, 0)
+        else:
+            if isinstance(r, theano.Variable):
+                nr = tensor.switch(tensor.ge(st[0], ds[0]),
+                                   (r - 1) // st[0] + 1,
+                                   tensor.maximum(0, (r - 1 - ds[0])
+                                                  // st[0] + 1) + 1)
+            elif st[0] >= ds[0]:
+                nr = (r - 1) // st[0] + 1
+            else:
+                nr = max(0, (r - 1 - ds[0]) // st[0] + 1) + 1
+            if isinstance(c, theano.Variable):
+                nc = tensor.switch(tensor.ge(st[1], ds[1]),
+                                   (c - 1) // st[1] + 1,
+                                   tensor.maximum(0, (c - 1 - ds[1])
+                                                  // st[1] + 1) + 1)
+            elif st[1] >= ds[1]:
+                nc = (c - 1) // st[1] + 1
+            else:
+                nc = max(0, (c - 1 - ds[1]) // st[1] + 1) + 1
+        rval = list(imgshape[:-2]) + [nr, nc]
        return rval
-    def __init__(self, ds, ignore_border=False):
+    def __init__(self, ds, ignore_border=False, st=None):
        """
-        :param ds: downsample factor over rows and columns
+        :param ds: downsample factor over rows and column.
+                   ds indicates the pool region size.
        :type ds: list or tuple of two ints
+        : param st: stride size, which is the number of shifts
+            over rows/cols to get the the next pool region.
+            if st is None, it is considered equal to ds
+            (no overlap on pooling regions)
+        : type st: list or tuple of two ints
        :param ignore_border: if ds doesn't divide imgshape, do we include
            an extra row/col of partial downsampling (False) or
            ignore it (True).
@@ -123,19 +161,24 @@ class DownsampleFactorMax(Op):
            raise ValueError(
                "DownsampleFactorMax downsample parameters must be ints."
                " Got %s" % str(ds))
+        if st is None:
+            st = ds
+        self.st = tuple(st)
        self.ignore_border = ignore_border
    def __eq__(self, other):
        return (type(self) == type(other) and
                self.ds == other.ds and
+                self.st == other.st and
                self.ignore_border == other.ignore_border)
    def __hash__(self):
-        return hash(type(self)) ^ hash(self.ds) ^ hash(self.ignore_border)
+        return hash(type(self)) ^ hash(self.ds) ^ \
+            hash(self.st) ^ hash(self.ignore_border)
    def __str__(self):
-        return '%s{%s,%s}' % (self.__class__.__name__,
+        return '%s{%s,%s,%s}' % (self.__class__.__name__,
-                              self.ds, self.ignore_border)
+                                 self.ds, self.st, self.ignore_border)
    def make_node(self, x):
        if x.type.ndim != 4:
@@ -151,46 +194,57 @@ class DownsampleFactorMax(Op):
        if len(x.shape) != 4:
            raise NotImplementedError(
                'DownsampleFactorMax requires 4D input for now')
-        z_shape = self.out_shape(x.shape, self.ds, self.ignore_border)
+        z_shape = self.out_shape(x.shape, self.ds, self.ignore_border, self.st)
        if (z[0] is None) or (z[0].shape != z_shape):
            z[0] = numpy.zeros(self.out_shape(x.shape, self.ds,
-                                              self.ignore_border))
+                                              self.ignore_border, self.st))
            z[0] = theano._asarray(z[0], dtype=x.dtype)
        zz = z[0]
        ## zz needs to be initialized with -inf for the following to work
        zz -= numpy.inf
+        #number of pooling output rows
+        pr = zz.shape[-2]
+        #number of pooling output cols
+        pc = zz.shape[-1]
        ds0, ds1 = self.ds
-        if self.ignore_border:
+        st0, st1 = self.st
-            x_usable2 = (x.shape[2] // ds0 * ds0)
+        img_rows = x.shape[-2]
-        else:
+        img_cols = x.shape[-1]
-            x_usable2 = x.shape[2]
-        if self.ignore_border:
-            x_usable3 = (x.shape[3] // ds1 * ds1)
-        else:
-            x_usable3 = x.shape[3]
        for n in xrange(x.shape[0]):
            for k in xrange(x.shape[1]):
-                for i in xrange(x_usable2):
+                for r in xrange(pr):
-                    zi = i / ds0
+                    row_st = r * st0
-                    for j in xrange(x_usable3):
+                    row_end = __builtin__.min(row_st + ds0, img_rows)
-                        zj = j / ds1
+                    for c in xrange(pc):
-                        zz[n, k, zi, zj] = __builtin__.max(zz[n, k, zi, zj],
+                        col_st = c * st1
-                                                           x[n, k, i, j])
+                        col_end = __builtin__.min(col_st + ds1, img_cols)
+                        for row_ind in xrange(row_st, row_end):
+                            for col_ind in xrange(col_st, col_end):
+                                zz[n, k, r, c] = \
+                                    __builtin__.max(zz[n, k, r, c],
+                                                    x[n, k, row_ind, col_ind])
    def infer_shape(self, node, in_shapes):
-        shp = self.out_shape(in_shapes[0], self.ds, self.ignore_border)
+        shp = self.out_shape(in_shapes[0], self.ds,
+                             self.ignore_border, self.st)
        return [shp]
    def grad(self, inp, grads):
        x, = inp
        gz, = grads
        maxout = self(x)
+        if self.st != self.ds:
+            return [theano.gradient.grad_not_implemented(self, 0, x)]
        return [DownsampleFactorMaxGrad(self.ds,
-                                        ignore_border=self.ignore_border)(
+                                        ignore_border=self.ignore_border,
+                                        st=self.st)(
                                            x, maxout, gz)]
    def c_code(self, node, name, inp, out, sub):
+        if self.ds != self.st:
+           raise theano.gof.utils.MethodNotDefined()
        x, = inp
        z, = out
        fail = sub['fail']
@@ -268,21 +322,26 @@ class DownsampleFactorMax(Op):
 class DownsampleFactorMaxGrad(Op):
-    def __init__(self, ds, ignore_border):
+    def __init__(self, ds, ignore_border, st=None):
        self.ds = tuple(ds)
        self.ignore_border = ignore_border
+        if st is None:
+            st = ds
+        self.st = tuple(st)
    def __eq__(self, other):
        return (type(self) == type(other) and
                self.ds == other.ds and
+                self.st == other.st and
                self.ignore_border == other.ignore_border)
    def __hash__(self):
-        return hash(type(self)) ^ hash(self.ds) ^ hash(self.ignore_border)
+        return hash(type(self)) ^ hash(self.ds) ^ \
+            hash(self.st) ^ hash(self.ignore_border)
    def __str__(self):
-        return '%s{%s,%s}' % (self.__class__.__name__,
+        return '%s{%s,%s,%s}' % (self.__class__.__name__,
-                              self.ds, self.ignore_border)
+                                 self.ds, self.st, self.ignore_border)
    def make_node(self, x, maxout, gz):
        # make_node should only be called by the grad function of
@@ -298,22 +357,27 @@ class DownsampleFactorMaxGrad(Op):
        gx_stg, = out
        gx = numpy.zeros_like(x)
+        #number of pooling output rows
+        pr = maxout.shape[-2]
+        #number of pooling output cols
+        pc = maxout.shape[-1]
        ds0, ds1 = self.ds
-        shape2 = (x.shape[2] // ds0 * ds0)
+        st0, st1 = self.st
-        if not self.ignore_border:
+        img_rows = x.shape[-2]
-            shape2 = x.shape[2]
+        img_cols = x.shape[-1]
-        shape3 = (x.shape[3] // ds1 * ds1)
-        if not self.ignore_border:
-            shape3 = x.shape[3]
        for n in xrange(x.shape[0]):
            for k in xrange(x.shape[1]):
-                for i in xrange(shape2):
+                for r in xrange(pr):
-                    zi = i // ds0
+                    row_st = r * st0
-                    for j in xrange(shape3):
+                    row_end = __builtin__.min(row_st + ds0, img_rows)
-                        zj = j // ds1
+                    for c in xrange(pc):
-                        if (maxout[n, k, zi, zj] == x[n, k, i, j]):
+                        col_st = c * st1
-                            gx[n, k, i, j] = gz[n, k, zi, zj]
+                        col_end = __builtin__.min(col_st + ds1, img_cols)
-                        # No else clause needed as it is allocated with zeros
+                        for row_ind in xrange(row_st, row_end):
+                            for col_ind in xrange(col_st, col_end):
+                                if (maxout[n, k, r, c] == x[n, k, row_ind, col_ind]):
+                                    gx[n, k, row_ind, col_ind] += gz[n, k, r, c]
        gx_stg[0] = gx
    def infer_shape(self, node, in_shapes):
@@ -322,10 +386,14 @@ class DownsampleFactorMaxGrad(Op):
    def grad(self, inp, grads):
        x, maxout, gz = inp
        ggx, = grads
+        if self.st != self.ds:
+            return [theano.gradient.grad_not_implemented(self, 0, x),
+                    theano.gradient.grad_not_implemented(self, 1, maxout),
+                    theano.gradient.grad_not_implemented(self, 2, gz)]
        return [theano.tensor.zeros_like(x),
                theano.tensor.zeros_like(maxout),
                DownsampleFactorMaxGradGrad(
-                    self.ds, ignore_border=self.ignore_border)(x, maxout, ggx)]
+                    self.ds, ignore_border=self.ignore_border, st=self.st)(x, maxout, ggx)]
    def c_code(self, node, name, inp, out, sub):
        x, z, gz = inp
@@ -426,7 +494,7 @@ class DownsampleFactorMaxGrad(Op):
 class DownsampleFactorMaxGradGrad(Op):
    @staticmethod
-    def out_shape(imgshape, ds, ignore_border=False):
+    def out_shape(imgshape, ds, ignore_border=False, st=None):
        """Return the shape of the output from this op, for input of given
        shape and flags.
@@ -436,11 +504,15 @@ class DownsampleFactorMaxGradGrad(Op):
            scalar Theano variable.
        :param ds: downsample factor over rows and columns
+                   this parameter indicates the size of the pooling region
        :type ds: list or tuple of two ints
-        :param ignore_border: if ds doesn't divide imgshape, do we include
+        :param st: the stride size. This is the distance between the pooling
-            an extra row/col of partial downsampling (False) or ignore
+                   regions. If it's set to None, in which case it equlas ds.
-            it (True).
+        :type st: list or tuple of two ints
+        :param ignore_border: if ds doesn't divide imgshape, do we include an
+            extra row/col of partial downsampling (False) or ignore it (True).
        :type ignore_border: bool
        :rtype: list
@@ -451,35 +523,66 @@ class DownsampleFactorMaxGradGrad(Op):
        if len(imgshape) < 2:
            raise TypeError('imgshape must have at least two elements '
                            '(rows, cols)')
+        if st is None:
+            st = ds
        r, c = imgshape[-2:]
-        rval = list(imgshape[:-2]) + [r // ds[0], c // ds[1]]
-        if not ignore_border:
+        if ignore_border:
+            out_r = (r - ds[0]) // st[0] + 1
+            out_c = (c - ds[1]) // st[1] + 1
+            if isinstance(r, theano.Variable):
+                nr = tensor.maximum(out_r, 0)
+            else:
+                nr = numpy.maximum(out_r, 0)
+            if isinstance(c, theano.Variable):
+                nc = tensor.maximum(out_c, 0)
+            else:
+                nc = numpy.maximum(out_c, 0)
+        else:
            if isinstance(r, theano.Variable):
-                rval[-2] = tensor.switch(r % ds[0], rval[-2] + 1, rval[-2])
+                nr = tensor.switch(tensor.ge(st[0], ds[0]),
-            elif r % ds[0]:
+                                   (r - 1) // st[0] + 1,
-                rval[-2] += 1
+                                   tensor.maximum(0, (r - 1 - ds[0])
+                                                  // st[0] + 1) + 1)
+            elif st[0] >= ds[0]:
+                nr = (r - 1) // st[0] + 1
+            else:
+                nr = max(0, (r - 1 - ds[0]) // st[0] + 1) + 1
            if isinstance(c, theano.Variable):
-                rval[-1] = tensor.switch(c % ds[1], rval[-1] + 1, rval[-1])
+                nc = tensor.switch(tensor.ge(st[1], ds[1]),
-            elif c % ds[1]:
+                                   (c - 1) // st[1] + 1,
-                rval[-1] += 1
+                                   tensor.maximum(0, (c - 1 - ds[1])
+                                                  // st[1] + 1) + 1)
+            elif st[1] >= ds[1]:
+                nc = (c - 1) // st[1] + 1
+            else:
+                nc = max(0, (c - 1 - ds[1]) // st[1] + 1) + 1
+        rval = list(imgshape[:-2]) + [nr, nc]
        return rval
-    def __init__(self, ds, ignore_border):
+    def __init__(self, ds, ignore_border, st=None):
        self.ds = tuple(ds)
        self.ignore_border = ignore_border
+        if st is None:
+            st = ds
+        self.st = tuple(st)
    def __eq__(self, other):
        return (type(self) == type(other)
                and self.ds == other.ds
+                and self.st == other.st
                and self.ignore_border == other.ignore_border)
    def __hash__(self):
-        return hash(type(self)) ^ hash(self.ds) ^ hash(self.ignore_border)
+        return hash(type(self)) ^ hash(self.ds) ^ \
+            hash(self.st) ^ hash(self.ignore_border)
    def __str__(self):
-        return '%s{%s,%s}' % (self.__class__.__name__, self.ds,
+        return '%s{%s,%s,%s}' % (self.__class__.__name__,
-                              self.ignore_border)
+                                 self.ds, self.st, self.ignore_border)
    def make_node(self, x, maxout, gz):
        # make_node should only be called by the grad function of
@@ -491,38 +594,40 @@ class DownsampleFactorMaxGradGrad(Op):
        return Apply(self, [x, maxout, gz], [x.type()])
    def perform(self, node, inp, out):
        x, maxout, ggx = inp
        z, = out
        if len(x.shape) != 4:
            raise NotImplementedError(
                'DownsampleFactorMaxGradGrad requires 4D input for now')
-        z_shape = self.out_shape(x.shape, self.ds, self.ignore_border)
+        z_shape = self.out_shape(x.shape, self.ds, self.ignore_border, self.st)
        if (z[0] is None) or (z[0].shape != z_shape):
-            z[0] = numpy.zeros(
+            z[0] = numpy.zeros(self.out_shape(x.shape, self.ds,
-                self.out_shape(x.shape, self.ds, self.ignore_border))
+                                              self.ignore_border, self.st))
            z[0] = theano._asarray(z[0], dtype=x.dtype)
        ggz = z[0]
+        #number of pooling output rows
+        pr = ggz.shape[-2]
+        #number of pooling output cols
+        pc = ggz.shape[-1]
        ds0, ds1 = self.ds
-        if self.ignore_border:
+        st0, st1 = self.st
-            x_usable2 = (x.shape[2] // ds0 * ds0)
+        img_rows = x.shape[-2]
-        else:
+        img_cols = x.shape[-1]
-            x_usable2 = x.shape[2]
-        if self.ignore_border:
-            x_usable3 = (x.shape[3] // ds1 * ds1)
-        else:
-            x_usable3 = x.shape[3]
        for n in xrange(x.shape[0]):
            for k in xrange(x.shape[1]):
-                for i in xrange(x_usable2):
+                for r in xrange(pr):
-                    zi = i // ds0
+                    row_st = r * st0
-                    for j in xrange(x_usable3):
+                    row_end = __builtin__.min(row_st + ds0, img_rows)
-                        zj = j // ds1
+                    for c in xrange(pc):
-                        if (maxout[n, k, zi, zj] == x[n, k, i, j]):
+                        col_st = c * st1
-                            ggz[n, k, zi, zj] = ggx[n, k, i, j]
+                        col_end = __builtin__.min(col_st + ds1, img_cols)
+                        for row_ind in xrange(row_st, row_end):
+                            for col_ind in xrange(col_st, col_end):
+                                if (maxout[n, k, r, c] == x[n, k, row_ind, col_ind]):
+                                    ggz[n, k, r, c] = ggx[n, k, row_ind, col_ind]
    def infer_shape(self, node, in_shapes):
        return [in_shapes[0]]
--- a/theano/tensor/signal/tests/test_downsample.py
+++ b/theano/tensor/signal/tests/test_downsample.py
 import unittest
+import __builtin__
 import numpy
 import theano.tensor as tensor
 from theano.tests import unittest_tools as utt
@@ -14,8 +15,8 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
        '''Helper function, implementing max_pool_2d in pure numpy'''
        if len(input.shape) < 2:
            raise NotImplementedError('input should have at least 2 dim,'
-                                      ' shape is %s'\
+                                      ' shape is %s'
-                    % str(input.shape))
+                                      % str(input.shape))
        xi = 0
        yi = 0
        if not ignore_border:
@@ -37,6 +38,64 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
                    output_val[k][i, j] = numpy.max(patch)
        return output_val
+    @staticmethod
+    def numpy_max_pool_2d_stride(input, ds, ignore_border=False, st=None):
+        '''Helper function, implementing max_pool_2d in pure numpy
+           this function provides st input to indicate the stide size
+           for the pooling regions. if not indicated, st == sd.'''
+        if len(input.shape) < 2:
+            raise NotImplementedError('input should have at least 2 dim,'
+                                      ' shape is %s'
+                                      % str(input.shape))
+        if st is None:
+            st = ds
+        xi = 0
+        yi = 0
+        img_rows = input.shape[-2]
+        img_cols = input.shape[-1]
+        out_r = 0
+        out_c = 0
+        if img_rows - ds[0] >= 0:
+            out_r = (img_rows - ds[0]) // st[0] + 1
+        if img_cols - ds[1] >= 0:
+            out_c = (img_cols - ds[1]) // st[1] + 1
+        if not ignore_border:
+            if out_r > 0:
+                if img_rows - ((out_r - 1) * st[0] + ds[0]) > 0:
+                    rr = img_rows - out_r * st[0]
+                    if rr > 0:
+                        out_r += 1
+            else:
+                if img_rows > 0:
+                        out_r += 1
+            if out_c > 0:
+                if img_cols - ((out_c - 1) * st[1] + ds[1]) > 0:
+                    cr = img_cols - out_c * st[1]
+                    if cr > 0:
+                        out_c += 1
+            else:
+                if img_cols > 0:
+                        out_c += 1
+        out_shp = list(input.shape[:-2])
+        out_shp.append(out_r)
+        out_shp.append(out_c)
+        output_val = numpy.zeros(out_shp)
+        for k in numpy.ndindex(*input.shape[:-2]):
+            for i in range(output_val.shape[-2]):
+                ii_st = i * st[0]
+                ii_end = __builtin__.min(ii_st + ds[0], img_rows)
+                for j in range(output_val.shape[-1]):
+                    jj_st = j * st[1]
+                    jj_end = __builtin__.min(jj_st + ds[1], img_cols)
+                    patch = input[k][ii_st:ii_end, jj_st:jj_end]
+                    output_val[k][i, j] = numpy.max(patch)
+        return output_val
    def test_DownsampleFactorMax(self):
        rng = numpy.random.RandomState(utt.fetch_seed())
        # generate random images
@@ -59,10 +118,83 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
                #DownsampleFactorMax op
                maxpool_op = DownsampleFactorMax(maxpoolshp,
-                                                 ignore_border=ignore_border)(images)
+                                                 ignore_border=
+                                                 ignore_border)(images)
                f = function([images], maxpool_op)
                output_val = f(imval)
-                assert (numpy.abs(output_val - numpy_output_val) < 1e-5).all()
+                utt.assert_allclose(output_val, numpy_output_val)
+    def test_DownsampleFactorMaxStride(self):
+        rng = numpy.random.RandomState(utt.fetch_seed())
+        maxpoolshps = ((1, 1), (3, 3), (5, 3))
+        stridesizes = ((1, 1), (3, 3), (5, 7))
+        # generate random images
+        imval = rng.rand(4, 10, 16, 16)
+        outputshps = ((4, 10, 16, 16), (4, 10, 6, 6), (4, 10, 4, 3),
+                      (4, 10, 16, 16), (4, 10, 6, 6), (4, 10, 4, 3),
+                      (4, 10, 14, 14), (4, 10, 5, 5), (4, 10, 3, 2),
+                      (4, 10, 14, 14), (4, 10, 6, 6), (4, 10, 4, 3),
+                      (4, 10, 12, 14), (4, 10, 4, 5), (4, 10, 3, 2),
+                      (4, 10, 12, 14), (4, 10, 5, 6), (4, 10, 4, 3))
+        images = tensor.dtensor4()
+        indx = 0
+        for maxpoolshp in maxpoolshps:
+            for ignore_border in [True, False]:
+                for stride in stridesizes:
+                    outputshp = outputshps[indx]
+                    indx += 1
+                    #DownsampleFactorMax op
+                    numpy_output_val = \
+                        self.numpy_max_pool_2d_stride(imval, maxpoolshp,
+                                                      ignore_border, stride)
+                    assert numpy_output_val.shape == outputshp, (
+                        "outshape is %s, calculated shape is %s"
+                        % (outputshp, numpy_output_val.shape))
+                    maxpool_op = \
+                        DownsampleFactorMax(maxpoolshp,
+                                            ignore_border=ignore_border,
+                                            st=stride)(images)
+                    f = function([images], maxpool_op)
+                    output_val = f(imval)
+                    utt.assert_allclose(output_val, numpy_output_val)
+    def test_DownsampleFactorMaxStrideExtra(self):
+        rng = numpy.random.RandomState(utt.fetch_seed())
+        maxpoolshps = ((5, 3), (5, 3), (5, 3), (5, 5), (3, 2), (7, 7), (9, 9))
+        stridesizes = ((3, 2), (7, 5), (10, 6), (1, 1),
+                       (2, 3), (10, 10), (1, 1))
+        imvsizs = ((16, 16), (16, 16), (16, 16), (8, 5),
+                   (8, 5), (8, 5), (8, 5))
+        outputshps = ((4, 10, 4, 7), (4, 10, 5, 8), (4, 10, 2, 3),
+                      (4, 10, 3, 4), (4, 10, 2, 3), (4, 10, 2, 3),
+                      (4, 10, 4, 1), (4, 10, 4, 1), (4, 10, 3, 2),
+                      (4, 10, 4, 2), (4, 10, 1, 0), (4, 10, 1, 1),
+                      (4, 10, 0, 0), (4, 10, 1, 1))
+        images = tensor.dtensor4()
+        for indx in numpy.arange(len(maxpoolshps)):
+            imvsize = imvsizs[indx]
+            imval = rng.rand(4, 10, imvsize[0], imvsize[1])
+            stride = stridesizes[indx]
+            maxpoolshp = maxpoolshps[indx]
+            for ignore_border in [True, False]:
+                indx_out = indx * 2
+                if not ignore_border:
+                    indx_out += 1
+                outputshp = outputshps[indx_out]
+                #DownsampleFactorMax op
+                numpy_output_val = \
+                    self.numpy_max_pool_2d_stride(imval, maxpoolshp,
+                                                  ignore_border, stride)
+                assert numpy_output_val.shape == outputshp, (
+                    "outshape is %s, calculated shape is %s"
+                    % (outputshp, numpy_output_val.shape))
+                maxpool_op = \
+                    DownsampleFactorMax(maxpoolshp,
+                                        ignore_border=ignore_border,
+                                        st=stride)(images)
+                f = function([images], maxpool_op)
+                output_val = f(imval)
+                utt.assert_allclose(output_val, numpy_output_val)
    def test_DownsampleFactorMax_grad(self):
        rng = numpy.random.RandomState(utt.fetch_seed())
@@ -76,7 +208,8 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
                #print 'ignore_border =', ignore_border
                def mp(input):
                    return DownsampleFactorMax(maxpoolshp,
-                                    ignore_border=ignore_border)(input)
+                                               ignore_border=
+                                               ignore_border)(input)
                utt.verify_grad(mp, [imval], rng=rng)
    def test_DownsampleFactorMaxGrad_grad(self):
@@ -133,7 +266,10 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
                                                          ignore_border)
                output = max_pool_2d(images, maxpoolshp, ignore_border)
                output_val = function([images], output)(imval)
-                assert numpy.all(output_val == numpy_output_val)
+                assert numpy.all(output_val == numpy_output_val), (
+                    "output_val is %s, numpy_output_val is %s"
+                    % (output_val, numpy_output_val))
                def mp(input):
                    return max_pool_2d(input, maxpoolshp, ignore_border)
                utt.verify_grad(mp, [imval], rng=rng)
@@ -152,15 +288,17 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
                                                          ignore_border)
                output = max_pool_2d(images, maxpoolshp, ignore_border)
                output_val = function([images], output)(imval)
-                assert numpy.all(output_val == numpy_output_val)
+                assert numpy.all(output_val == numpy_output_val), (
+                    "output_val is %s, numpy_output_val is %s"
+                    % (output_val, numpy_output_val))
                c = tensor.sum(output)
                c_val = function([images], c)(imval)
                g = tensor.grad(c, images)
                g_val = function([images],
-                        [g.shape,
+                                 [g.shape,
-                            tensor.min(g, axis=(0, 1, 2)),
+                                 tensor.min(g, axis=(0, 1, 2)),
-                            tensor.max(g, axis=(0, 1, 2))]
+                                 tensor.max(g, axis=(0, 1, 2))]
-                        )(imval)
+                                 )(imval)
 #removed as already tested in test_max_pool_2d_2D
 #This make test in debug mode too slow.
@@ -209,19 +347,20 @@ class TestDownsampleFactorMax(utt.InferShapeTester):
                # checking shapes generated by DownsampleFactorMax
                self._compile_and_check([image],
-                        [DownsampleFactorMax(maxpoolshp,
+                                        [DownsampleFactorMax(maxpoolshp,
-                        ignore_border=ignore_border)(image)],
+                                        ignore_border=ignore_border)(image)],
-                        [image_val], DownsampleFactorMax)
+                                        [image_val], DownsampleFactorMax)
                # checking shapes generated by DownsampleFactorMaxGrad
                maxout_val = rng.rand(*out_shapes[i][j])
                gz_val = rng.rand(*out_shapes[i][j])
                self._compile_and_check([image, maxout, gz],
-                        [DownsampleFactorMaxGrad(maxpoolshp,
+                                        [DownsampleFactorMaxGrad(maxpoolshp,
-                        ignore_border=ignore_border)(image, maxout, gz)],
+                                        ignore_border=ignore_border)
-                        [image_val, maxout_val, gz_val],
+                                        (image, maxout, gz)],
+                                        [image_val, maxout_val, gz_val],
                                        DownsampleFactorMaxGrad,
-                        warn=False)
+                                        warn=False)
 if __name__ == '__main__':