Merge pull request #2376 from daemonmaker/cudnn2

Cudnn2

Merge pull request #2376 from daemonmaker/cudnn2
f8bd5b80 · abergeron · 0c11332c · 399911f0 · f8bd5b80 · f8bd5b80
--- a/theano/sandbox/cuda/dnn.py
+++ b/theano/sandbox/cuda/dnn.py
@@ -368,6 +368,36 @@ class GpuDnnConv(DnnBase, COp):
        # not connected to desc
        return [[1], [1], [0]]
+    def infer_shape(self, node, shape):
+        b = shape[0][0]  # Number of inputs
+        h = shape[0][2]  # Height of input feature maps
+        w = shape[0][3]  # Width of input feature maps
+        nb = shape[1][0]  # Number of output feature maps
+        kh = shape[1][2]  # Height of each filter
+        kw = shape[1][3]  # Width of each filter
+        padh = 0
+        padw = 0
+        if (
+            not node.inputs[2].owner
+            or not isinstance(node.inputs[2].owner.op, GpuDnnConvDesc)
+        ):
+            raise theano.tensor.basic.ShareError("case not implemented and probably not needed")
+        desc = node.inputs[2].owner.op
+        sh, sw = desc.subsample
+        if desc.border_mode == 'full':
+            padh = kh - 1
+            padw = kw - 1
+        elif isinstance(desc.border_mode, tuple):
+            padh, padw = desc.border_mode
+        else:
+            assert desc.border_mode == 'valid'
+        return [(
+            b, nb,
+            (h + 2*padh - kh)/sh + 1,
+            (w + 2*padw - kw)/sw + 1
+        )]
 class GpuDnnConvGradW(DnnBase, COp):
    """
@@ -423,6 +453,40 @@ class GpuDnnConvGradW(DnnBase, COp):
        return Apply(self, [img, topgrad, desc, h, w],
                     [CudaNdarrayType(broadcastable)()])
+    def infer_shape(self, node, shape):
+        h = shape[0][2]  # Height of input feature maps
+        w = shape[0][3]  # Width of input feature maps
+        kh = shape[1][2]  # Height of each filter
+        kw = shape[1][3]  # Width of each filter
+        out3 = kh
+        out4 = kw
+        desc = node.inputs[2].owner.op
+        sh, sw = desc.subsample
+        # We don't have the information necessary, namely the weight size so
+        # we cannot infer the shape
+        if sh != 1 or sw != 1:
+            raise ShapeError(
+                'Unable to infer shape for stride (%d, %d)' % (sh, sw)
+            )
+        if desc.border_mode == 'full':
+            out3 = 2 - h + (kh - 1) * sh
+            out4 = 2 - w + (kw - 1) * sw
+        else:
+            # border_mode is 'valid'
+            assert(desc.border_mode == 'valid')
+            out3 = h - (kh - 1) * sh
+            out4 = w - (kw - 1) * sw
+        return [(
+            shape[1][1],
+            shape[0][1],
+            out3,
+            out4
+        )]
 class GpuDnnConvGradI(DnnBase, COp):
    """
@@ -477,6 +541,38 @@ class GpuDnnConvGradI(DnnBase, COp):
        return Apply(self, [kern, topgrad, desc, h, w],
                     [CudaNdarrayType(broadcastable)()])
+    def infer_shape(self, node, shape):
+        padh = 0
+        padw = 0
+        desc = node.inputs[2].owner.op
+        sh, sw = desc.subsample
+        # We don't have the information necessary, namely the image size so
+        # we cannot infer the shape
+        if sh != 1 or sw != 1:
+            raise ShapeError(
+                'Unable to infer shape for stride (%d, %d)' % (sh, sw)
+            )
+        if desc.border_mode == 'full':
+            padh = shape[0][2] - 1
+            padw = shape[0][3] - 1
+        elif isinstance(desc.border_mode, tuple):
+            padh, padw = desc.border_mode
+        else:
+            assert desc.border_mode == 'valid'
+        out2 = (shape[1][2] - 1) * sh + shape[0][2] - 2*padh
+        out3 = (shape[1][3] - 1) * sw + shape[0][3] - 2*padw
+        return [(
+            shape[1][0],
+            shape[0][1],
+            out2,
+            out3
+        )]
 def dnn_conv(img, kerns, border_mode='valid', subsample=(1, 1),
             conv_mode='conv', direction_hint=None):
@@ -655,6 +751,17 @@ class GpuDnnPool(DnnBase):
        return Apply(self, [img, desc],
                     [img.type()])
+    def infer_shape(self, node, shape):
+        desc = node.inputs[1].owner.op
+        kh, kw = desc.ws
+        sh, sw = desc.stride
+        return [(
+            shape[0][0],
+            shape[0][1],
+            (shape[0][2] - kh)/sh + 1,
+            (shape[0][3] - kw)/sw + 1
+        )]
    def c_support_code_struct(self, node, name):
        return """
 cudnnTensorDescriptor_t input%(name)s;
@@ -964,6 +1071,9 @@ if (err%(name)s != CUDNN_STATUS_SUCCESS) {
    def c_code_cache_version(self):
        return (4, version())
+    def infer_shape(self, node, shape):
+        return [shape[0]]
 def dnn_pool(img, ws, stride=(1, 1), mode='max'):
    """
@@ -1016,6 +1126,12 @@ class GpuDnnSoftmaxBase(DnnBase):
                                for softmax_input in self.softmax_inputs]
        self.tensor_4d_descs.append('softmax_output')
+    def infer_shape(self, node, shape):
+        if self.direction == 'forward':
+            return [shape[0]]
+        else:
+            return [shape[1]]
    def _define_tensor4d_desc(self, name, id):
        return """
 cudnnTensorDescriptor_t %(id)s_%(name)s;
@@ -1129,6 +1245,7 @@ if (CudaNdarray_prep_output(&%(outs)s, 4, CudaNdarray_HOST_DIMS(%(ins)s)) != 0)
 class GpuDnnSoftmax(GpuDnnSoftmaxBase):
+    direction = 'forward'
    softmax_inputs = ['softmax_input']
    def make_node(self, x):
@@ -1179,6 +1296,7 @@ err%(name)s = cudnnSoftmaxForward(
 class GpuDnnSoftmaxGrad(GpuDnnSoftmaxBase):
+    direction = 'backward'
    softmax_inputs = ['softmax_gout', 'softmax_input']
    def make_node(self, dy, sm):

--- a/theano/sandbox/cuda/tests/test_dnn.py
+++ b/theano/sandbox/cuda/tests/test_dnn.py
@@ -3,6 +3,7 @@ import unittest
 from nose.plugins.skip import SkipTest
 import numpy
+from itertools import product
 import theano
 from theano.compat.six import StringIO
@@ -12,7 +13,8 @@ import theano.tests.unittest_tools as utt
 from theano.sandbox.neighbours import images2neibs, neibs2images
 from theano.tensor.signal.downsample import max_pool_2d
 from theano.tensor.signal.downsample import DownsampleFactorMaxGrad
+import theano.sandbox.cuda.dnn as dnn
+from theano.sandbox.cuda.basic_ops import gpu_contiguous
 # Skip test if cuda_ndarray is not available.
 import theano.sandbox.cuda as cuda
@@ -194,6 +196,222 @@ def test_dnn_tag():
                    for n in f.maker.fgraph.toposort()])
+class TestDnnInferShapes(utt.InferShapeTester):
+    def setUp(self):
+        super(TestDnnInferShapes, self).setUp()
+    def test_softmax(self):
+        t = T.tensor4('t')
+        rand_tensor = numpy.asarray(
+            numpy.random.rand(5, 4, 3, 2),
+            dtype=theano.config.floatX
+        )
+        self._compile_and_check(
+            [t],
+            [dnn.GpuDnnSoftmax('bc01', 'accurate', 'channel')(t)],
+            [rand_tensor],
+            dnn.GpuDnnSoftmax
+        )
+        self._compile_and_check(
+            [t],
+            [
+                T.grad(
+                    dnn.GpuDnnSoftmax(
+                        'bc01',
+                        'accurate',
+                        'channel'
+                    )(t).mean(),
+                    t
+                )
+            ],
+            [rand_tensor],
+            dnn.GpuDnnSoftmaxGrad
+        )
+    def test_conv(self):
+        img = T.tensor4('img')
+        kerns = T.tensor4('kerns')
+        img_val = numpy.asarray(
+            numpy.random.rand(3, 4, 5, 6),
+            dtype=theano.config.floatX
+        )
+        kern_vals = numpy.asarray(
+            numpy.random.rand(3, 4, 5, 6),
+            dtype=theano.config.floatX
+        )
+        for params in product(
+            ['valid', 'full'],
+            [(1, 1), (2, 2)],
+            ['conv', 'cross']
+        ):
+            desc = dnn.GpuDnnConvDesc(
+                border_mode=params[0],
+                subsample=params[1],
+                conv_mode=params[2]
+            )(img.shape, kerns.shape)
+            conv = dnn.GpuDnnConv()(img_val, kern_vals, desc)
+            self._compile_and_check(
+                [img, kerns],
+                [conv],
+                [img_val, kern_vals],
+                dnn.GpuDnnConv
+            )
+    def test_conv_gradw(self):
+        img = T.tensor4('img')
+        kerns = T.tensor4('kerns')
+        img_val = numpy.asarray(
+            numpy.random.rand(3, 4, 5, 6),
+            dtype=theano.config.floatX
+        )
+        kern_vals = numpy.asarray(
+            numpy.random.rand(3, 4, 5, 6),
+            dtype=theano.config.floatX
+        )
+        for params in product(
+            ['valid', 'full'],
+            [(1, 1)],  # strides besides (1, 1)
+            ['conv', 'cross']
+        ):
+            temp_img = img.dimshuffle(1, 0, 2, 3)
+            temp_kerns = kerns
+            if params[2] == 'conv':
+                temp_kerns = temp_kerns[:, :, ::-1, ::-1]
+            temp_kerns = temp_kerns.dimshuffle(1, 0, 2, 3)
+            shape = theano.tensor.stack(
+                temp_kerns.shape[1], temp_img.shape[1],
+                temp_img.shape[2] - temp_kerns.shape[2] + 1,
+                temp_img.shape[3] - temp_kerns.shape[3] + 1
+            )
+            desc = dnn.GpuDnnConvDesc(
+                border_mode=params[0],
+                subsample=params[1],
+                conv_mode=params[2]
+            )(temp_img.shape, shape)
+            conv_grad_w = dnn.GpuDnnConvGradW()(
+                temp_img,
+                temp_kerns,
+                desc,
+                shape[2],
+                shape[3]
+            )
+            self._compile_and_check(
+                [temp_img, temp_kerns],
+                [conv_grad_w],
+                [img_val, kern_vals],
+                dnn.GpuDnnConvGradW
+            )
+    def test_conv_gradi(self):
+        img = T.tensor4('img')
+        kerns = T.tensor4('kerns')
+        img_val = numpy.asarray(
+            numpy.random.rand(3, 4, 5, 6),
+            dtype=theano.config.floatX
+        )
+        kern_vals = numpy.asarray(
+            numpy.random.rand(3, 4, 5, 6),
+            dtype=theano.config.floatX
+        )
+        for params in product(
+            ['valid'],  # Should this work for 'full'?
+            [(1, 1)],
+            ['conv', 'cross']
+        ):
+            print params
+            temp_kerns = kerns.dimshuffle(1, 0, 2, 3)
+            shape = theano.tensor.stack(
+                img.shape[0], temp_kerns.shape[1],
+                img.shape[2] + temp_kerns.shape[2] - 1,
+                img.shape[3] + temp_kerns.shape[3] - 1
+            )
+            desc = dnn.GpuDnnConvDesc(
+                border_mode=params[0],
+                subsample=params[1],
+                conv_mode=params[2]
+            )(shape, temp_kerns.shape)
+            conv_grad_i = dnn.GpuDnnConvGradI()(
+                temp_kerns,
+                img,
+                desc,
+                shape[2],
+                shape[3]
+            )
+            self._compile_and_check(
+                [temp_kerns, img],
+                [conv_grad_i],
+                [kern_vals, img_val],
+                dnn.GpuDnnConvGradI
+            )
+    def test_pool(self):
+        img = T.tensor4('img')
+        img_val = numpy.asarray(
+            numpy.random.rand(2, 3, 4, 5),
+            dtype=theano.config.floatX
+        )
+        for params in product(
+            [(1, 1), (2, 2), (3, 3)],
+            [(1, 1), (2, 2), (3, 3)],
+            ['max', 'average']
+        ):
+            desc = dnn.GpuDnnPoolDesc(
+                ws=params[0],
+                stride=params[1],
+                mode=params[2]
+            )()
+            self._compile_and_check(
+                [img],
+                [dnn.GpuDnnPool()(img, desc)],
+                [img_val],
+                dnn.GpuDnnPool
+            )
+    def test_pool_grad(self):
+        img = T.tensor4('img')
+        img_grad = T.tensor4('img_grad')
+        out = T.tensor4('out')
+        img_val = numpy.asarray(
+            numpy.random.rand(2, 3, 4, 5),
+            dtype=theano.config.floatX
+        )
+        img_grad_val = numpy.asarray(
+            numpy.random.rand(2, 3, 4, 5),
+            dtype=theano.config.floatX
+        )
+        out_val = numpy.asarray(
+            numpy.random.rand(2, 3, 4, 5),
+            dtype=theano.config.floatX
+        )
+        for params in product(
+            [(1, 1), (2, 2), (3, 3)],
+            [(1, 1), (2, 2), (3, 3)],
+            ['max', 'average']
+        ):
+            desc = dnn.GpuDnnPoolDesc(
+                ws=params[0],
+                stride=params[1],
+                mode=params[2]
+            )()
+            pool_grad = dnn.GpuDnnPoolGrad()(
+                    img,
+                    out,
+                    img_grad,
+                    desc
+                )
+            self._compile_and_check(
+                [img, img_grad, out],
+                [pool_grad],
+                [img_val, img_grad_val, out_val],
+                dnn.GpuDnnPoolGrad
+            )
 def test_version():
    if not cuda.dnn.dnn_available():
        raise SkipTest(cuda.dnn.dnn_available.msg)

--- a/theano/tests/unittest_tools.py
+++ b/theano/tests/unittest_tools.py
@@ -248,7 +248,7 @@ class InferShapeTester(unittest.TestCase):
        numeric_outputs = outputs_function(*numeric_inputs)
        numeric_shapes = shapes_function(*numeric_inputs)
        for out, shape in zip(numeric_outputs, numeric_shapes):
-            assert numpy.all(out.shape == shape)
+            assert numpy.all(out.shape == shape), (out.shape, shape)
 def str_diagnostic(expected, value, rtol, atol):