Add GpuCorr3d optimization tests

b0bacd7b · Nicolas Ballas · affc0f7f · b0bacd7b · b0bacd7b
--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -1371,21 +1371,19 @@ def local_convgrad3d_gemm(node):
    except tensor.NotScalarConstantError:
        return False
    if isinstance(node.op, ConvGrad3D):
        # Shuffle inputs signal from (b, 0, 1, t, c) to (b, c, 0, 1, t)
        x = node.inputs[0]
        x = gpu_contiguous(x.dimshuffle(0, 4, 1, 2, 3))
        # Shuffle dCdH from (b, 0, 1, t, oc) to (oc, b, 0, 1, t)
-        f = node.input[3]
+        f = node.inputs[3]
        f = gpu_contiguous(f.dimshuffle(0, 4, 1, 2, 3))
-        f = node.inputs[3]
+        rval = GpuCorr3dMM_gradWeights(subsample=(sx, sy, sz))(x, f,
-        f = f.dimshuffle(4, 0, 1, 2, 3)
-        rval = Gpucorr3dMM_gradWeights(subsample=(sx, sy, sz))(x, f,
                                                               shape=node.inputs[2])
        # Shuffle from (ic, oc, 0, 1, t) to (oc, 0, 1, t, ic)
        return [rval.dimshuffle(0, 2, 3, 4, 1)]
 gpu_optimizer.register("convgrad3d_gemm", local_convgrad3d_gemm)
 @local_optimizer([ConvTransp3D])
@@ -1403,8 +1401,7 @@ def local_convtransp3d_gemm(node):
        # Shuffle dCdH from (b, 0, 1, t, oc) to (b, oc, 0, 1, t)
        f = node.inputs[3]
        f = gpu_contiguous(f.dimshuffle(0, 4, 1, 2, 3))
-        # filter flip
+        rval = GpuCorr3dMM_gradInputs(subsample=(sx, sy, sz))(kern=x, topgrad=f)
-        rval = GpuCorr3DMM(border_mode='full', subsample=(sx, sy, sz))(f, x)
        # Shuffle from (ic, b, 0, 1, t) to (b, 0, 1, t, ic)
        return [rval.dimshuffle(0, 2, 3, 4, 1) + node.inputs[1]]

--- a/theano/sandbox/cuda/tests/test_gemmcorr3d.py
+++ b/theano/sandbox/cuda/tests/test_gemmcorr3d.py
@@ -10,7 +10,7 @@ import theano.sandbox.cuda as cuda_ndarray
 if not cuda_ndarray.cuda_available:
    raise SkipTest('Optional package cuda not available')
 from theano.sandbox.cuda import float32_shared_constructor as shared
-from  theano.sandbox.cuda.blas import GpuCorr3dMM, GpuCorr3dMM_gradWeights, GpuCorr3dMM_gradInputs, GpuCorr3dMM_gradInputs
+from  theano.sandbox.cuda.blas import GpuCorr3dMM, GpuCorr3dMM_gradWeights, GpuCorr3dMM_gradInputs
 from theano.sandbox.cuda.basic_ops import gpu_contiguous
 if theano.config.mode == 'FAST_COMPILE':
@@ -157,3 +157,84 @@ class TestCorr3DMM(unittest.TestCase):
                           filters_shape=(10, 6, 12, 4, 1),
                           subsample=(3,1,2))
+    def test_opt_conv3d_gemm(self):
+        inputs_shape = (16, 20, 32, 16, 1)
+        filters_shape = (10, 6, 12, 4, 1)
+        inputs_val = numpy.random.random(inputs_shape).astype('float32')
+        filters_val = numpy.random.random(filters_shape).astype('float32')
+        inputs = shared(inputs_val)
+        filters = shared(filters_val)
+        bias = shared(numpy.zeros(filters_shape[0]).astype('float32'))
+        conv = theano.tensor.nnet.conv3D(V=inputs, W=filters,
+                                         b=bias, d=(1,1,1))
+        mode = mode_with_gpu.including('conv3d_gemm')
+        f_ref = theano.function([], conv)
+        f_gemm = theano.function([], conv, mode=mode)
+        # make sure we inserted the gemm trickery
+        topo = f_gemm.maker.fgraph.toposort()
+        assert sum(isinstance(n.op, GpuCorr3dMM) for n in topo) > 0
+        res_ref = f_ref()
+        res_gemm = f_gemm()
+        utt.assert_allclose(res_ref, res_gemm)
+    def test_opt_convgrad3d_gemm(self):
+        inputs_shape = (16, 20, 32, 16, 1)
+        filters_shape = (10, 6, 12, 4, 1)
+        dCdH_shape = (16, 15, 21, 13, 10)
+        inputs_val = numpy.random.random(inputs_shape).astype('float32')
+        dCdH_val = numpy.random.random(dCdH_shape).astype('float32')
+        inputs = shared(inputs_val)
+        dCdH = shared(dCdH_val)
+        conv = theano.tensor.nnet.convGrad3D(V=inputs, dCdH=dCdH,
+                                             WShape=filters_shape,
+                                             d=(1,1,1))
+        mode = mode_with_gpu.including('convgrad3d_gemm')
+        f_ref = theano.function([], conv)
+        f_gemm = theano.function([], conv, mode=mode)
+        # make sure we inserted the gemm trickery
+        topo = f_gemm.maker.fgraph.toposort()
+        assert sum(isinstance(n.op, GpuCorr3dMM_gradWeights) for n in topo) > 0
+        res_ref = f_ref()
+        res_gemm = f_gemm()
+        utt.assert_allclose(res_ref, res_gemm,  rtol=1e-04, atol=1e-04)
+    def test_opt_convtransp3d_gemm(self):
+        inputs_shape = (16, 15, 21, 12, 10)
+        filters_shape = (10, 6, 12, 4, 1)
+        inputs_val = numpy.random.random(inputs_shape).astype('float32')
+        filters_val = numpy.random.random(filters_shape).astype('float32')
+        bias = shared(numpy.zeros(filters_shape[4]).astype('float32'))
+        inputs = shared(inputs_val)
+        filters = shared(filters_val)
+        conv = theano.tensor.nnet.convTransp3D(W=filters, b=bias, d=(1,1,1),
+                                               H=inputs)
+        mode = mode_with_gpu.including('convtransp3d_gemm')
+        f_ref = theano.function([], conv)
+        f_gemm = theano.function([], conv, mode=mode)
+        # make sure we inserted the gemm trickery
+        topo = f_gemm.maker.fgraph.toposort()
+        assert sum(isinstance(n.op, GpuCorr3dMM_gradInputs) for n in topo) > 0
+        res_ref = f_ref()
+        res_gemm = f_gemm()
+        utt.assert_allclose(res_ref, res_gemm, rtol=1e-04, atol=1e-04)