Merge pull request #6214 from ReyhaneAskari/add_sync

sync attribute added to theano function

Merge pull request #6214 from ReyhaneAskari/add_sync
2c001bd9 · abergeron · GitHub · 1414b2d2 · da59d9a1 · 2c001bd9
--- a/theano/compile/function_module.py
+++ b/theano/compile/function_module.py
@@ -1006,6 +1006,15 @@ class Function(object):
        """
        return [i.variable for i in self.maker.inputs if i.implicit]

+    def sync_shared(self):
+        if (hasattr(theano, "gpuarray") and
+                theano.gpuarray.pygpu_activated):
+            import pygpu
+            for i, inp in enumerate(self.input_storage):
+                if i in self.maker.fgraph.update_mapping.values():
+                    if isinstance(inp.data, pygpu.gpuarray.GpuArray):
+                        inp.data.sync()
+

 # pickling/deepcopy support for Function
 def _pickle_Function(f):
@@ -1688,6 +1697,7 @@ class FunctionMaker(object):
        fn = self.function_builder(_fn, _i, _o, self.indices, self.outputs,
                                   defaults, self.unpack_single,
                                   self.return_none, self.output_keys, self)
+
        fn.profile = self.profile
        return fn


--- a/theano/compile/tests/test_function_module.py
+++ b/theano/compile/tests/test_function_module.py
@@ -3,6 +3,7 @@ import copy
 import six.moves.cPickle as pickle
 import numpy as np
 import unittest
+import time


 from theano import config, gof
@@ -907,6 +908,71 @@ def test_empty_givens_updates():
    function([theano.In(x)], y, updates={})


+def test_sync_update():
+    # This test if sync_update work. This can only be tested when
+    # there is a GPU.  To test if we really sync, we compare a case we
+    # can run in parallel GPU and CPU computation. Then we sync to
+    # disable that parallel computation. Then we assert the time is
+    # higher.
+
+    import theano.gpuarray.tests.config
+    if theano.gpuarray.pygpu_activated:
+        sizes = [100, 500, 1000, 2000, 5000, 10000, 20000, 40000]
+        size = sizes[0]
+        w = theano.gpuarray.gpuarray_shared_constructor(
+            np.random.rand(size, size).astype('float32'), 'w',
+            target=theano.gpuarray.tests.config.test_ctx_name)
+        x = theano.gpuarray.gpuarray_shared_constructor(
+            np.random.rand(size, size).astype('float32'), 'w',
+            target=theano.gpuarray.tests.config.test_ctx_name)
+
+        updates = [(w, w + np.asarray(0.001, 'float32') * T.dot(x, x))]
+
+        f = theano.function([], updates=updates,
+                            mode=theano.gpuarray.tests.config.mode_with_gpu)
+        assert len(f.maker.fgraph.apply_nodes) == 1
+        assert any(isinstance(n.op, theano.gpuarray.blas.GpuGemm)
+                   for n in f.maker.fgraph.apply_nodes)
+        # Make sure libgpuarray have compile all kernels
+        f()
+        f.sync_shared()
+
+        # Find a good size that will take about .5s.
+        # This is to make the test more stable across different GPUs.
+        size = sizes[-1]
+        for i in sizes:
+            data = np.random.rand(i, i).astype('float32')
+            w.set_value(data)
+            x.set_value(data)
+            t0 = time.time()
+            f()
+            f.sync_shared()
+            t1 = time.time()
+            if (t1 - t0) < 0.5:
+                continue
+            size = i
+            break
+        # sync to make sure all computation are done
+        f.sync_shared()
+
+        t_0 = time.time()
+        for i in range(3):
+            f()
+            # Sync after each call to see the slowdown from sync.
+            f.sync_shared()
+            time.sleep(.5)
+        t_1 = time.time()
+        for i in range(3):
+            f()
+            time.sleep(.5)
+        f.sync_shared()
+        # Sync to make sure all computation are finished.
+        t_2 = time.time()
+        assert (t_1 - t_0) > (t_2 - t_1)
+    else:
+        raise SkipTest("Sync is only availble when pygpu is activated.")
+
+
 if __name__ == '__main__':

    if 1: