[Conv3DFFT] Add optimization for GpuConvGrad3Dn and GpuConvTransp3D

25a0fa36 · Nicolas Ballas · 498f9298 · 25a0fa36
--- a/theano/sandbox/cuda/opt.py
+++ b/theano/sandbox/cuda/opt.py
@@ -1263,28 +1263,19 @@ gpu_optimizer.register("conv_fft_full", local_conv_fft_full)
 from theano.sandbox.cuda.GpuConv3D import GpuConv3D

 def _gpu_conv3d_to_fftconv(node):
-    # shared helper function for local_conv_fft_valid and local_conv_fft_full.
-    # we import conv2d_fft locally to avoid pycuda warnings
+    # we import conv3d_fft locally to avoid pycuda warnings
    from theano.sandbox.cuda.fftconv import conv3d_fft

    # Shuffle inputs signal from (b, 0, 1, t, c) to (b, c, 0, 1, t)
    x = node.inputs[0]
    x = gpu_from_host(x.dimshuffle(0, 4, 1, 2, 3))
-    # Shuflle filters from (oc, 0, 1, t, ic) to (oc, ic, 0, 1, t)
+    # Shuffle filters from (oc, 0, 1, t, ic) to (oc, ic, 0, 1, t)
    f = node.inputs[1]
    f = gpu_from_host(f.dimshuffle(0, 4, 1, 2, 3))

-
-    # TODO: If the user supplied the full nonsymbolic image_shape and
-    # filter_shape in conv2d(), we could pass it on to conv2d_fft(). However,
-    # information on batch size and channel counts is currently discarded
-    # when a ConvOp is replaced by a GpuConv, so this would need more changes.
-    #if (node.op.imshp is not None) and (None not in node.op.imshp):
-    #    kwargs['image_shape'] = (bsize, inchannels) + node.op.imshp
-    #if (node.op.kshp is not None) and (None not in node.op.kshp):
-    #    kwargs['filter_shape'] = (outchannels, inchannels) + node.op.kshp
    rval = conv3d_fft(x, f)
-    # Shuffle back (oc, c, 0, 1, t) to (oc, 0, 1, t, c)
+    
+    # Shuffle from (oc, c, 0, 1, t) to (oc, 0, 1, t, c)
    rval = gpu_from_host(rval.dimshuffle(0, 2, 3, 4, 1))


@@ -1304,6 +1295,65 @@ def local_conv3d_fft(node):
 gpu_optimizer.register("conv3d_fft", local_conv3d_fft)


+from theano.sandbox.cuda.GpuConvGrad3D import GpuConvGrad3D
+
+def _gpu_convgrad3d_to_fftconv(node):
+    # we import conv3d_fft locally to avoid pycuda warnings
+    from theano.sandbox.cuda.fftconv import conv3d_fft
+    
+    # Shuffle inputs signal from (b, 0, 1, t, ic) to (ic, b, 0, 1, t)
+    x = node.inputs[0]
+    x = x.dimshuffle(4, 0, 1, 2, 3)
+    # Shuffle dCdH from (b, 0, 1, t, oc) to (oc, b, 0, 1, t)
+    f = node.inputs[3]
+    f = f.dimshuffle(4, 0, 1, 2, 3)
+
+    rval = conv3d_fft(x, f)
+    # Shuffle from (ic, oc, 0, 1, t) to (oc, 0, 1, t, ic)
+    rval = gpu_from_host(rval.dimshuffle(1, 2, 3, 4, 0))
+
+    return rval
+
+@local_optimizer([GpuConvGrad3D])
+def local_convgrad3d_fft(node):
+
+    if (isinstance(node.op, GpuConvGrad3D)# and
+        #        node.inputs[3] == (1, 1, 1)]):
+        ):
+        return [_gpu_convgrad3d_to_fftconv(node)]
+gpu_optimizer.register("convgrad3d_fft", local_convgrad3d_fft)
+
+
+from theano.sandbox.cuda.GpuConvTransp3D import GpuConvTransp3D
+
+def _gpu_convtransp3d_to_fftconv(node):
+    # we import conv3d_fft locally to avoid pycuda warnings
+    from theano.sandbox.cuda.fftconv import conv3d_fft
+    
+    # Shuffle filters from (oc, 0, 1, t, ic) to (ic, oc, 0, 1, t)
+    x = node.inputs[0]
+    x = x.dimshuffle(4, 0, 1, 2, 3)
+    # Shuffle dCdH from (b, 0, 1, t, oc) to (b, oc, 0, 1, t)
+    f = node.inputs[3]
+    f = f.dimshuffle(0, 4, 1, 2, 3)
+
+    rval = conv3d_fft(f, x, border_mode = 'full')
+    # Shuffle from (ic, b, 0, 1, t) to (b, 0, 1, t, ic)
+    rval = gpu_from_host(rval.dimshuffle(0, 2, 3, 4, 1))
+
+    return rval
+
+@local_optimizer([GpuConvTransp3D])
+def local_convtransp3d_fft(node):
+
+    if (isinstance(node.op, GpuConvTransp3D)# and
+        #        node.inputs[3] == (1, 1, 1)]):
+        ):
+        return [_gpu_convtransp3d_to_fftconv(node)]
+
+gpu_optimizer.register("convtransp3d_fft", local_convtransp3d_fft)
+
+

 import theano.tensor.signal.downsample as downsample