Merge pull request #3 from nouiz/gpu_inc_diagonal_subtensor

Gpu inc diagonal subtensor and optimization refactoring.

Merge pull request #3 from nouiz/gpu_inc_diagonal_subtensor
d65475af · James Bergstra · cf85d105 · 92327dec · d65475af · d65475af
--- a/theanoconv3d2d/conv3d2d.py
+++ b/theanoconv3d2d/conv3d2d.py
@@ -21,7 +21,12 @@ class DiagonalSubtensor(Op):
    """
    Work on the GPU.
    """
-    def __init__(self, inplace):
+    def __str__(self):
+        if self.inplace:
+            return "%s{inplace}" % self.__class__.__name__
+        return "%s" % self.__class__.__name__
+    def __init__(self, inplace=False):
        self.inplace = inplace
        if inplace:
            self.view_map = {0: [0]}
@@ -57,7 +62,12 @@ diagonal_subtensor = DiagonalSubtensor(False)
 class IncDiagonalSubtensor(Op):
-    def __init__(self, inplace):
+    def __str__(self):
+        if self.inplace:
+            return "%s{inplace}" % self.__class__.__name__
+        return "%s" % self.__class__.__name__
+    def __init__(self, inplace=False):
        self.inplace = inplace
        if inplace:
            self.destroy_map = {0: [0]}
@@ -176,24 +186,49 @@ def conv3d(signals, filters,
    return out_5d
-@cuda.opt.register_opt()
+def make_gpu_optimizer(op, to_gpu):
-@theano.gof.local_optimizer([])
+    """This function create optimizer that move some inputs to the GPU
-def local_gpu_diagonal_subtensor(node):
+    for op that work on both CPU and GPU.
-    """
-    diagonal_subtensor(host_from_gpu()) -> host_from_gpu(diagonal_subtensor)
+    The op object is created by calling op(), so good default value
-    gpu_from_host(diagonal_subtensor) -> diagonal_subtensor(gpu_from_host)
+    are needed.
+    We suppose the same op work with CPU and GPU inputs.
+    :param op: the op that support GPU inputs
+    :param to_gpu: a list of op inputs that are moved to the GPU.
    """
-    if isinstance(node.op, DiagonalSubtensor):
+    @theano.gof.local_optimizer([])
-        input = node.inputs[0]
+    def local_to_gpu(node):
-        if input.owner and isinstance(input.owner.op, cuda.HostFromGpu):
+        """
-            return [cuda.host_from_gpu(diagonal_subtensor(cuda.gpu_from_host(input),
+        op(host_from_gpu()) -> host_from_gpu(op)
-                                                     *node.inputs[1:]))]
+        gpu_from_host(op) -> op(gpu_from_host)
-    if node.op == cuda.gpu_from_host:
+        """
-        host_input = node.inputs[0]
+        if isinstance(node.op, op):
-        if host_input.owner and isinstance(host_input.owner.op,
+            #op(host_from_gpu()) -> host_from_gpu(op)
-                                           DiagonalSubtensor):
+            #If any of the input that go on the GPU are on the GPU,
-            diag_node = host_input.owner
+            #move the op to the gpu.
-            return [tensor.diagonal_subtensor(
+            if any(node.inputs[idx].owner and
-                cuda.gpu_from_host(diag_node.inputs[0]),
+                   isinstance(node.inputs[idx].owner.op, cuda.HostFromGpu)
-                *diag_node.inputs[1:])]
+                   for idx in to_gpu):
-    return False
+                new_inp = list(node.inputs)
+                for idx in to_gpu:
+                    new_inp[idx] = cuda.gpu_from_host(new_inp[idx])
+                return [cuda.host_from_gpu(op()(*new_inp))]
+        if node.op == cuda.gpu_from_host:
+            #gpu_from_host(op) -> op(gpu_from_host)
+            host_input = node.inputs[0]
+            if host_input.owner and isinstance(host_input.owner.op,
+                                               op):
+                op_node = host_input.owner
+                new_inp = list(op_node.inputs)
+                for idx in to_gpu:
+                    new_inp[idx] = cuda.gpu_from_host(new_inp[idx])
+                return [op()(*new_inp)]
+        return False
+    local_to_gpu.__name__ = "local_to_gpu_" + op.__name__
+    cuda.opt.register_opt()(local_to_gpu)
+make_gpu_optimizer(DiagonalSubtensor, [0])
+make_gpu_optimizer(IncDiagonalSubtensor, [0, 3])
--- a/theanoconv3d2d/tests.py
+++ b/theanoconv3d2d/tests.py
@@ -123,12 +123,24 @@ def test_conv3d():
        s_filters = shared(filters)
        s_output = shared(signals*0)
+        out = conv3d(s_signals, s_filters,
+                     signals_shape=signals.shape,
+                     filters_shape=filters.shape)
        newconv3d = theano.function([], [],
-                                    updates={s_output: conv3d(s_signals, s_filters,
+                                    updates={s_output: out},
-                                                              signals_shape=signals.shape,
-                                                              filters_shape=filters.shape)},
                                    mode=mode)
        t0 = time.time()
        newconv3d()
        print time.time() - t0
+        gsignals, gfilters = theano.grad(out.sum(), [s_signals, s_filters])
+        gnewconv3d = theano.function([], [],
+                                     updates=[(s_filters, gfilters),
+                                              (s_signals, gsignals)],
+                                     mode=mode,
+                                     name='grad')
+        t0 = time.time()
+        gnewconv3d()
+        print 'grad', time.time() - t0