update test

33d35144 · Nicolas Ballas · Pascal Lamblin · 12cc6f02 · 33d35144 · 33d35144
--- a/theano/tensor/nnet/abstract_conv2d.py
+++ b/theano/tensor/nnet/abstract_conv2d.py
@@ -539,7 +539,6 @@ def local_conv2d_gradinputs_corrmm(node):
 @local_optimizer([AbstractConv2d])
 def local_conv2d_cpu(node):

-    import pdb; pdb.set_trace()
    if not isinstance(node.op, AbstractConv2d):
        return None

@@ -559,24 +558,29 @@ register_specialize_device(local_conv2d_cpu)
 @local_optimizer([AbstractConv2d_gradWeights])
 def local_conv2d_gradweight_cpu(node):

-    import pdb; pdb.set_trace()
-    ## len is 4 all the time
    img, topgrad, shape = node.inputs
+
    if isinstance(img.type, CudaNdarrayType) or \
            isinstance(topgrad.type, CudaNdarrayType):
        return None
+    if node.op.border_mode not in ['full', 'valid']:
+        return None

-    if (node.op.border_mode == 'valid' and node.op.subsample != (1, 1)) or \
-            node.op.imshp is None or node.op.kshp is None:
+
+    if node.op.border_mode == 'valid' and \
+            (node.op.subsample != (1, 1) or node.op.imshp is None or node.op.kshp is None):
        # Use the gradient as defined in conv3D, because the implementation
        # by Conv is slow (about 3x slower than conv3D, and probably 10x
        # slower than it could be), nad incorrect when subsample > 2.
        # build a "node", that should be equivalent to the one given by
        # self.make_node, but using convGrad3D instead.

+        if not node.op.filter_flip:
+            topgrad = topgrad[:, :, ::-1, ::-1]  # flip them
+
+
        shuffled_img = img.dimshuffle(0, 2, 3, 'x', 1)
        shuffled_topgrad = topgrad.dimshuffle(0, 2, 3, 'x', 1)
-        print shape
        rval = convGrad3D(V=shuffled_img,
                          d=(node.op.subsample[0], node.op.subsample[1], 1),
                          WShape=(shape[0], shape[2], shape[3], 1, shape[1]),
@@ -585,10 +589,11 @@ def local_conv2d_gradweight_cpu(node):
        rval = theano.tensor.addbroadcast(rval, 3)
        return [rval.dimshuffle(0, 4, 1, 2)]

+    if node.op.imshp is None or node.op.kshp is None:
+        return None
+
    ####### Determine gradient on kernels ########
    assert len(node.op.imshp) == 4 and len(node.op.kshp) == 4
-    print "here0", node.op.imshp[2:], node.op.kshp[2:]
-    import pdb; pdb.set_trace()

    outshp = ConvOp.getOutputShape(node.op.imshp[2:],
                                   node.op.kshp[2:],  node.op.subsample,
@@ -596,23 +601,19 @@ def local_conv2d_gradweight_cpu(node):
    fulloutshp = ConvOp.getOutputShape(node.op.imshp[2:],
                                       node.op.kshp[2:], (1, 1),
                                       node.op.border_mode)
-    print outshp, fulloutshp


-    #newimg = img.dimshuffle((1, 0, 2, 3))
-    #newtopgrad = topgrad.dimshuffle((1, 0, 2, 3))
-    newimg = img
-    newtopgrad = topgrad
+    newimg = img.dimshuffle((1, 0, 2, 3))
+    newtopgrad = topgrad.dimshuffle((1, 0, 2, 3))

    if node.op.border_mode == 'valid':
-        print "here1", node.op.imshp, node.op.kshp, fulloutshp
        (img, filters) = (newimg, newtopgrad)
        kshp_logical = fulloutshp
        kshp_logical_top_aligned = False
        imshp_logical = None
-        (bsize, nkern) = (node.op.imshp[0], node.op.kshp[0])
-        imshp = (bsize, node.op.imshp[1], node.op.imshp[2])
-        kshp = node.op.kshp[2:]
+        (bsize, nkern) = (node.op.imshp[1], node.op.kshp[0])
+        imshp = (node.op.imshp[0], node.op.imshp[2], node.op.imshp[3])
+        kshp = outshp
    elif node.op.border_mode == 'full':
        (img, filters) = (newtopgrad, newimg)
        kshp_logical = None
@@ -622,25 +623,20 @@ def local_conv2d_gradweight_cpu(node):
                         fulloutshp[1])
        (bsize, nkern) = (node.op.kshp[0], node.op.imshp[1])
        imshp = (node.op.imshp[0], outshp[0], outshp[1])
-        kshp = node.op.imshp[1:]
+        kshp = node.op.imshp[2:]
    else:
        raise NotImplementedError(
            'Only [full,valid] modes are currently supported.')

-    print "here2", node.op.imshp, node.op.kshp, fulloutshp
-
    if node.op.filter_flip:
        filters = filters[:, :, ::-1, ::-1]  # flip them
-
    dw = ConvOp(imshp, kshp, nkern, bsize, 1, 1, output_mode='valid',
                unroll_batch=None, unroll_kern=None, unroll_patch=None,
                imshp_logical=imshp_logical,
                kshp_logical=kshp_logical,
                kshp_logical_top_aligned=kshp_logical_top_aligned,
                direction_hint='bprop weights')
-    #dw = ConvOp(output_mode='valid')
    res = dw(img, filters)
-    print "here3", node.op.imshp, node.op.kshp, fulloutshp
    res = res.dimshuffle((1, 0, 2, 3))
    return [res]
 register_specialize_device(local_conv2d_gradweight_cpu)
@@ -649,53 +645,50 @@ register_specialize_device(local_conv2d_gradweight_cpu)
 @local_optimizer([AbstractConv2d_gradInputs])
 def local_conv2d_gradinputs_cpu(node):

-    import pdb; pdb.set_trace()
    kern, topgrad, shape = node.inputs

+
    if  isinstance(kern.type, CudaNdarrayType) or \
            isinstance(topgrad.type, CudaNdarrayType):
        return None

-    print "here4a", node.op.imshp, node.op.kshp
-
-
+    if node.op.border_mode not in ['full', 'valid']:
+        return None

-    if node.op.border_mode == 'valid' and node.op.subsample != (1, 1):
-        # Use the gradient as defined in conv3D, because the implementation
-        # by Conv is slow (about 3x slower than conv3D, and probably 10x
-        # slower than it could be), nad incorrect when subsample > 2.
-        # build a "node", that should be equivalent to the one given by
-        # self.make_node, but using convGrad3D instead.
+    ### Conv 3d implementation, needed when subsample > 2
+    if node.op.border_mode == 'valid' and \
+            (node.op.subsample != (1, 1) or node.op.imshp is None or node.op.kshp is None):
+        if node.op.filter_flip:
+            kern = kern[:, :, ::-1, ::-1]
        shuffled_kern = kern.dimshuffle(0, 2, 3, 'x', 1)
        shuffled_topgrad = topgrad.dimshuffle(0, 2, 3, 'x', 1)
-        b = T.zeros((kern.shape[1]))
-        rval = ConvTransp3D(W=shuffled_kern, b=b,
-                            d=(op.subsample[0], op.subsample[1], 1),
+        b = theano.tensor.zeros_like(shuffled_kern[0, 0, 0, 0, :])
+        rval = convTransp3D(W=shuffled_kern, b=b,
+                            d=(node.op.subsample[0], node.op.subsample[1], 1),
                            H=shuffled_topgrad,
-                            RShape=(shape[0], shape[1], 1))
+                            RShape=(shape[2], shape[3], 1))
+        rval = theano.tensor.addbroadcast(rval, 3)
        return [rval.dimshuffle(0, 4, 1, 2)]

-    ####### Determine gradient on inputs ########
+    ### Conv2d Implementation
+    if node.op.imshp is None or node.op.kshp is None:
+        return None
    mode = 'valid'
    if not node.op.border_mode == 'full':
        mode = 'full'
-
    filters = kern.dimshuffle((1, 0, 2, 3))
+    if node.op.filter_flip:
+        filters = filters[:, :, ::-1, ::-1]
+
    outshp = ConvOp.getOutputShape(node.op.imshp[2:],
                                   node.op.kshp[2:],  node.op.subsample,
                                   node.op.border_mode)
    fulloutshp = ConvOp.getOutputShape(node.op.imshp[2:],
                                       node.op.kshp[2:], (1, 1),
                                       node.op.border_mode)
-    nkern = node.op.kshp[1]
-    imshp = (nkern, outshp[0], outshp[1])
-    imshp_logical = (nkern, fulloutshp[0], fulloutshp[1])
-
-    if node.op.filter_flip:
-        filters = filters[:, :, ::-1, ::-1]
-
-
-    print "here4",  imshp, node.op.kshp, nkern
+    nkern = node.op.imshp[1]
+    imshp = (node.op.kshp[0], outshp[0], outshp[1])
+    imshp_logical = (node.op.kshp[0], fulloutshp[0], fulloutshp[1])
    din = ConvOp(imshp,
                 node.op.kshp[2:],
                 nkern,

--- a/theano/tensor/nnet/tests/test_abstractconv.py
+++ b/theano/tensor/nnet/tests/test_abstractconv.py
@@ -9,43 +9,54 @@ from nose.plugins.skip import SkipTest
 import theano.tensor.nnet.conv as conv_ref
 import theano.tensor.nnet.abstract_conv2d as conv

-from theano.sandbox.cuda import float32_shared_constructor as shared
+from theano.sandbox.cuda import float32_shared_constructor as gpu_shared
+from theano.compile import shared as cpu_shared
+
 from theano.sandbox.cuda.tests.test_conv_cuda_ndarray import py_conv
 #from theano.sandbox.cuda.dnn import dnn_available


 if theano.config.mode == 'FAST_COMPILE':
-    #mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including('gpu')
+    mode_with_gpu = theano.compile.mode.get_mode('FAST_RUN').including('gpu')
    mode_without_gpu = theano.compile.mode.get_mode('FAST_RUN').excluding('gpu')
 else:
-    #mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
-    mode_without_gpu = theano.compile.mode.get_mode('FAST_RUN').excluding('gpu')
+    mode_with_gpu = theano.compile.mode.get_default_mode().including('gpu')
+    mode_without_gpu = theano.compile.get_default_mode().excluding('gpu')


 class TestConv2d(unittest.TestCase):

-    def run_conv(self,
-                 inputs_shape,
-                 filters_shape,
-                 subsample=(1, 1),
-                 verify_grad=True,
-                 mode=mode_without_gpu):
+    def run_fwd(self,
+                inputs_shape,
+                filters_shape,
+                subsample=(1, 1),
+                verify_grad=True,
+                mode=mode_without_gpu,
+                border_mode='valid',
+                device='gpu',
+                provide_shape=False):

        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')

-        ### FIXME (CPU vs GPU)
-        inputs = theano.tensor.shared(inputs_val)
-        filters = theano.tensor.shared(filters_val)
-
+        if device == 'gpu':
+            inputs = gpu_shared(inputs_val)
+            filters = gpu_shared(filters_val)
+        else:
+            inputs = cpu_shared(inputs_val)
+            filters = cpu_shared(filters_val)
+        if provide_shape:
+            imshp = inputs_shape
+            kshp = filters_shape
+        else:
+            imshp = None
+            kshp = None

        c_ref = conv_ref.conv2d(inputs, filters,
-                                border_mode="valid",
+                                border_mode=border_mode,
                                subsample=subsample)
-
        c = conv.conv2d(inputs, filters,
-                        border_mode="valid", subsample=subsample)
-
+                        border_mode=border_mode, subsample=subsample)

        f_ref = theano.function([], c_ref, mode=mode)
        f = theano.function([], c, mode)
@@ -56,8 +67,8 @@ class TestConv2d(unittest.TestCase):
        utt.assert_allclose(res_ref, res)
        if verify_grad:
            utt.verify_grad(conv.AbstractConv2d(border_mode="valid",
-                                                imshp=inputs_shape,
-                                                kshp=filters_shape,
+                                                imshp=imshp,
+                                                kshp=kshp,
                                                bsize=inputs_shape[0],
                                                subsample=subsample),
                            [inputs_val, filters_val])
@@ -70,6 +81,7 @@ class TestConv2d(unittest.TestCase):
                       subsample=(1, 1),
                       verify_grad=True,
                       mode=mode_without_gpu,
+                       border_mode='valid',
                       device='gpu',
                       provide_shape = False):

@@ -77,29 +89,30 @@ class TestConv2d(unittest.TestCase):
        output_val = numpy.random.random(output_shape).astype('float32')

        if device == 'gpu':
-            inputs = shared(inputs_val)
-            filters = shared(filters_val)
+            inputs = gpu_shared(inputs_val)
+            output = gpu_shared(output_val)
        else:
-            inputs = theano.tensor.shared(inputs_val)
-            output = theano.tensor.shared(output_val)
+            inputs = cpu_shared(inputs_val)
+            output = cpu_shared(output_val)

        if provide_shape:
            imshp = inputs_shape
            kshp = filters_shape
        else:
-            imshp = None,
+            imshp = None
            kshp = None

-        c = conv.AbstractConv2d_gradWeights(border_mode="valid",
+        c = conv.AbstractConv2d_gradWeights(border_mode=border_mode,
                                            subsample=subsample,
                                            imshp = imshp, kshp = kshp)
        c = c(inputs, output, filters_shape)
        f = theano.function([], c, mode)
        res_ref = py_conv(inputs_val.transpose((1, 0, 2, 3)),
-                          output_val.transpose((1, 0, 2, 3)),
+                          output_val.transpose((1, 0, 2, 3))[:, :, ::-1, ::-1],
                          'valid', subsample).transpose((1, 0, 2, 3))
-        print res_ref.shape, numpy.array(f()).shape
        res = numpy.array(f())
+        print res_ref.shape, res.shape
+
        utt.assert_allclose(res_ref, res)
        if verify_grad:
            utt.verify_grad(conv.AbstractConv2d(border_mode="valid",
@@ -110,37 +123,58 @@ class TestConv2d(unittest.TestCase):
    def run_gradinput(self,
                      inputs_shape,
                      filters_shape,
+                      output_shape,
                      subsample=(1, 1),
                      verify_grad=True,
-                      mode=mode_without_gpu):
+                      mode=mode_without_gpu,
+                      border_mode='valid',
+                      device='gpu',
+                      provide_shape = False):

-        inputs_val = numpy.random.random(inputs_shape).astype('float32')
+        output_val = numpy.random.random(output_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')

-        inputs = shared(inputs_val)
-        filters = shared(filters_val.transpose(1, 0, 2, 3)[:, :, ::-1, ::-1])
+
+        if device == 'gpu':
+            output = gpu_shared(output_val)
+            filters = gpu_shared(filters_val)
+        else:
+            output = cpu_shared(output_val)
+            filters = cpu_shared(filters_val)
+        if provide_shape:
+            imshp = inputs_shape
+            kshp = filters_shape
+        else:
+            imshp = None
+            kshp = None
+
        c = conv.AbstractConv2d_gradInputs(border_mode="valid",
-                                           subsample=subsample)
-        c = c(filters, inputs, inputs_shape)
+                                           subsample=subsample,
+                                           imshp = imshp, kshp = kshp)
+        c = c(filters, output, inputs_shape)
        f = theano.function([], c, mode)
-        res_ref = py_conv(inputs_val, filters_val, 'full', subsample)
-        res = numpy.array(f()) #.transpose((1, 0, 2, 3))
+        res_ref = py_conv(output_val,
+                          filters_val.transpose(1, 0, 2, 3)[:, :, ::-1, ::-1],
+                          'full', subsample)
+        print filters_val.shape, output_val.shape, inputs_shape
+        res = numpy.array(f())
        print "2, ", res_ref.shape, res.shape

        utt.assert_allclose(res_ref, res)
        if verify_grad:
-            utt.verify_grad(conv.AbstractConv2d(border_mode="valid",
-                                                subsample=subsample),
-                            [inputs_val, filters_val])
+            utt.verify_grad(conv.AbstractConv2d_gradInputs(border_mode=border_mode,
+                                                           subsample=subsample),
+                            [filters_val, output_val,
+                             numpy.array(inputs_shape).astype('float32')])



-    # def test_corrmm(self):
-    #     mode = mode_with_gpu
-    #     mode = mode.excluding('cudnn')
-    #     self.run_conv(inputs_shape=(16, 1, 2, 2),
-    #                   filters_shape=(10, 1, 2, 2),
-    #                   verify_grad=False, mode=mode)
+    #def test_corrmm(self):
+    #    mode = mode_with_gpu
+    #    mode = mode.excluding('cudnn')
+    #    self.run_fwd(inputs_shape=(16, 1, 2, 2),
+    #                 filters_shape=(10, 1, 2, 2),
+    #                 verify_grad=False, mode=mode)
    #     self.run_gradweight(inputs_shape=(16, 1, 2, 2),
    #                         filters_shape=(10, 1, 2, 2),
    #                         verify_grad=False, mode=mode)
@@ -149,50 +183,85 @@ class TestConv2d(unittest.TestCase):
    #                        verify_grad=False, mode=mode)


-    #def test_cpu(self):
-        #self.run_conv(inputs_shape=(16, 1, 2, 2),
-        #              filters_shape=(10, 1, 2, 2),
-        #              verify_grad=False,
-        #              mode=mode_without_gpu)
-        # self.run_gradinput(inputs_shape=(1, 1, 2, 2),
-        #                    filters_shape=(10, 1, 2, 2),
-        #                    verify_grad=False, mode=mode_without_gpu)
-
-        # mode = mode_without_gpu
-        # self.run_conv(inputs_shape=(16, 1, 2, 2),
-        #               filters_shape=(10, 1, 2, 2),
-        #               verify_grad=False, mode=mode)
-        # self.run_gradweight(inputs_shape=(16, 1, 2, 2),
-        #                     filters_shape=(10, 1, 2, 2),
-        #                     verify_grad=False, mode=mode)
-        # self.run_gradinput(inputs_shape=(1, 1, 2, 2),
-        #                    filters_shape=(10, 1, 2, 2),
-        #                    verify_grad=False, mode=mode)
-
-
-
-        # # self.run_conv(inputs_shape=(16, 1, 8, 8),
-        # #               filters_shape=(10, 1, 4, 4),
-        # #                subsample=(2, 2),
-        # #               verify_grad=False,mode=mode)
-        # # self.run_conv(inputs_shape=(16, 1, 2, 2),
-        # #               filters_shape=(10, 1, 2, 2),
-        # #               verify_grad=True,mode=mode)
-        # # self.run_conv(inputs_shape=(16, 1, 8, 8),
-        # #               filters_shape=(10, 1, 2, 2),
-        # #               subsample=(2, 2),
-        # #               verify_grad=True,mode=mode)
+
+    def test_cpu_conv(self):
+
+        inputs_shapes =  [(16, 1, 2, 2), (16, 1, 8, 8), (16, 1, 4, 4)]
+        filters_shapes = [(10, 1, 2, 2), (10, 1, 2, 2), (10, 1, 2, 2),]
+        output_shapes =  [(16, 10, 1, 1), (16, 10, 7, 7), (16, 10, 3, 3)]
+        subsamples =     [(1, 1), (1, 1), (1, 1)]
+
+        border_mode= 'valid'
+        for i, f, o, s in zip(inputs_shapes[0:1], filters_shapes[0:1], output_shapes[0:1], subsamples[0:1]):
+            for provide_shape in [True]:
+                self.run_fwd(inputs_shape=i, filters_shape=f, subsample=s,
+                             verify_grad=True, mode=mode_without_gpu, device='cpu',
+                             provide_shape=provide_shape, border_mode=border_mode)
+        return
+        ### No reference implementation of full available yet
+        border_mode= 'full'
+        provide_shape = True
+        self.run_gradweight(inputs_shape=(16, 1, 2, 2),
+                            filters_shape=(10, 1, 2, 2),
+                            output_shape=(16, 10, 3, 3),
+                            subsample=(1, 1),
+                            verify_grad=True, mode=mode_without_gpu, device='cpu',
+                            provide_shape=provide_shape, border_mode=border_mode)
+
+
+

    def test_cpu_grad_weight(self):
+
+        ### FIXME subsample
+        inputs_shapes =  [(16, 1, 2, 2), (16, 1, 8, 8), (16, 1, 4, 4)]
+        filters_shapes = [(10, 1, 2, 2), (10, 1, 2, 2), (10, 1, 2, 2),]
+        output_shapes =  [(16, 10, 1, 1), (16, 10, 7, 7), (16, 10, 3, 3)]
+        subsamples =     [(1, 1), (1, 1), (1, 1)]
+
+        border_mode = 'valid'
+        for i, f, o, s in zip(inputs_shapes[:], filters_shapes[:], output_shapes[:], subsamples[:]):
+            for provide_shape in [False, True]:
+                self.run_gradweight(inputs_shape=i, filters_shape=f,
+                                    output_shape=o, subsample=s,
+                                    verify_grad=False, mode=mode_without_gpu, device='cpu',
+                                    provide_shape=provide_shape, border_mode=border_mode)
+        return
+        ### No reference implementation of full available yet
+        border_mode= 'full'
+        provide_shape = True
        self.run_gradweight(inputs_shape=(16, 1, 2, 2),
                            filters_shape=(10, 1, 2, 2),
-                            output_shape=(16, 10, 1, 1),
-                            verify_grad=False, mode=mode_without_gpu, device='cpu')
+                            output_shape=(16, 10, 3, 3),
+                            subsample=(1, 1),
+                            verify_grad=True, mode=mode_without_gpu, device='cpu',
+                            provide_shape=provide_shape, border_mode=border_mode)
+
+
+    def test_cpu_grad_input(self):
+
+        ### FIXME subsample
+        inputs_shapes =  [(16, 1, 2, 2), (16, 1, 8, 8), (16, 1, 4, 4)]
+        filters_shapes = [(10, 1, 2, 2), (10, 1, 2, 2), (10, 1, 2, 2),]
+        output_shapes =  [(16, 10, 1, 1), (16, 10, 7, 7), (16, 10, 3, 3)]
+        subsamples =     [(1, 1), (1, 1), (1, 1)]
+
+        border_mode= 'valid'
+        for i, f, o, s in zip(inputs_shapes[:], filters_shapes[:], output_shapes[:], subsamples[:]):
+            for provide_shape in [True, False]:
+                self.run_gradinput(inputs_shape=i, filters_shape=f,
+                                   output_shape=o, subsample=s,
+                                   verify_grad=False, mode=mode_without_gpu, device='cpu',
+                                   provide_shape=provide_shape, border_mode=border_mode)
+        return
+        ### No reference implementation of full available yet
+        border_mode= 'full'
+        provide_shape = True
        self.run_gradweight(inputs_shape=(16, 1, 2, 2),
                            filters_shape=(10, 1, 2, 2),
-                            output_shape=(16, 10, 1, 1),
-                            verify_grad=False,
-                            mode=mode_without_gpu, device='cpu',
-                            provide_shape=True)
+                            output_shape=(16, 10, 3, 3),
+                            subsample=(1, 1),
+                            verify_grad=False, mode=mode_without_gpu, device='cpu',
+                            provide_shape=provide_shape, border_mode=border_mode)