Make the IncDiagonalSubtensor work on the GPU.

This need https://github.com/Theano/Theano/pull/1437 PR to get merged to work.

Make the IncDiagonalSubtensor work on the GPU.
754dde21 · Frederic · 414b1268 · 754dde21 · 754dde21
--- a/theanoconv3d2d/conv3d2d.py
+++ b/theanoconv3d2d/conv3d2d.py
@@ -186,24 +186,49 @@ def conv3d(signals, filters,
    return out_5d
-@cuda.opt.register_opt()
+def make_gpu_optimizer(op, to_gpu):
-@theano.gof.local_optimizer([])
+    """This function create optimizer that move some inputs to the GPU
-def local_gpu_diagonal_subtensor(node):
+    for op that work on both CPU and GPU.
+    The op object is created by calling op(), so good default value
+    are needed.
+    We suppose the same op work with CPU and GPU inputs.
+    :param op: the op that support GPU inputs
+    :param to_gpu: a list of op inputs that are moved to the GPU.
+    """
+    @theano.gof.local_optimizer([])
+    def local_to_gpu(node):
        """
-    diagonal_subtensor(host_from_gpu()) -> host_from_gpu(diagonal_subtensor)
+        op(host_from_gpu()) -> host_from_gpu(op)
-    gpu_from_host(diagonal_subtensor) -> diagonal_subtensor(gpu_from_host)
+        gpu_from_host(op) -> op(gpu_from_host)
        """
-    if isinstance(node.op, DiagonalSubtensor):
+        if isinstance(node.op, op):
-        input = node.inputs[0]
+            #op(host_from_gpu()) -> host_from_gpu(op)
-        if input.owner and isinstance(input.owner.op, cuda.HostFromGpu):
+            #If any of the input that go on the GPU are on the GPU,
-            return [cuda.host_from_gpu(diagonal_subtensor(cuda.gpu_from_host(input),
+            #move the op to the gpu.
-                                                     *node.inputs[1:]))]
+            if any(node.inputs[idx].owner and
+                   isinstance(node.inputs[idx].owner.op, cuda.HostFromGpu)
+                   for idx in to_gpu):
+                new_inp = list(node.inputs)
+                for idx in to_gpu:
+                    new_inp[idx] = cuda.gpu_from_host(new_inp[idx])
+                return [cuda.host_from_gpu(op()(*new_inp))]
        if node.op == cuda.gpu_from_host:
+            #gpu_from_host(op) -> op(gpu_from_host)
            host_input = node.inputs[0]
            if host_input.owner and isinstance(host_input.owner.op,
-                                           DiagonalSubtensor):
+                                               op):
-            diag_node = host_input.owner
+                op_node = host_input.owner
-            return [tensor.diagonal_subtensor(
+                new_inp = list(op_node.inputs)
-                cuda.gpu_from_host(diag_node.inputs[0]),
+                for idx in to_gpu:
-                *diag_node.inputs[1:])]
+                    new_inp[idx] = cuda.gpu_from_host(new_inp[idx])
+                return [op()(*new_inp)]
        return False
+    local_to_gpu.__name__ = "local_to_gpu_" + op.__name__
+    cuda.opt.register_opt()(local_to_gpu)
+make_gpu_optimizer(DiagonalSubtensor, [0])
+make_gpu_optimizer(IncDiagonalSubtensor, [0, 3])
--- a/theanoconv3d2d/tests.py
+++ b/theanoconv3d2d/tests.py
@@ -123,12 +123,24 @@ def test_conv3d():
        s_filters = shared(filters)
        s_output = shared(signals*0)
-        newconv3d = theano.function([], [],
+        out = conv3d(s_signals, s_filters,
-                                    updates={s_output: conv3d(s_signals, s_filters,
                     signals_shape=signals.shape,
-                                                              filters_shape=filters.shape)},
+                     filters_shape=filters.shape)
+        newconv3d = theano.function([], [],
+                                    updates={s_output: out},
                                    mode=mode)
        t0 = time.time()
        newconv3d()
        print time.time() - t0
+        gsignals, gfilters = theano.grad(out.sum(), [s_signals, s_filters])
+        gnewconv3d = theano.function([], [],
+                                     updates=[(s_filters, gfilters),
+                                              (s_signals, gsignals)],
+                                     mode=mode,
+                                     name='grad')
+        t0 = time.time()
+        gnewconv3d()
+        print 'grad', time.time() - t0