提交 2c001bd9 authored 作者: abergeron's avatar abergeron 提交者: GitHub

Merge pull request #6214 from ReyhaneAskari/add_sync

sync attribute added to theano function
...@@ -1006,6 +1006,15 @@ class Function(object): ...@@ -1006,6 +1006,15 @@ class Function(object):
""" """
return [i.variable for i in self.maker.inputs if i.implicit] return [i.variable for i in self.maker.inputs if i.implicit]
def sync_shared(self):
if (hasattr(theano, "gpuarray") and
theano.gpuarray.pygpu_activated):
import pygpu
for i, inp in enumerate(self.input_storage):
if i in self.maker.fgraph.update_mapping.values():
if isinstance(inp.data, pygpu.gpuarray.GpuArray):
inp.data.sync()
# pickling/deepcopy support for Function # pickling/deepcopy support for Function
def _pickle_Function(f): def _pickle_Function(f):
...@@ -1688,6 +1697,7 @@ class FunctionMaker(object): ...@@ -1688,6 +1697,7 @@ class FunctionMaker(object):
fn = self.function_builder(_fn, _i, _o, self.indices, self.outputs, fn = self.function_builder(_fn, _i, _o, self.indices, self.outputs,
defaults, self.unpack_single, defaults, self.unpack_single,
self.return_none, self.output_keys, self) self.return_none, self.output_keys, self)
fn.profile = self.profile fn.profile = self.profile
return fn return fn
......
...@@ -3,6 +3,7 @@ import copy ...@@ -3,6 +3,7 @@ import copy
import six.moves.cPickle as pickle import six.moves.cPickle as pickle
import numpy as np import numpy as np
import unittest import unittest
import time
from theano import config, gof from theano import config, gof
...@@ -907,6 +908,71 @@ def test_empty_givens_updates(): ...@@ -907,6 +908,71 @@ def test_empty_givens_updates():
function([theano.In(x)], y, updates={}) function([theano.In(x)], y, updates={})
def test_sync_update():
# This test if sync_update work. This can only be tested when
# there is a GPU. To test if we really sync, we compare a case we
# can run in parallel GPU and CPU computation. Then we sync to
# disable that parallel computation. Then we assert the time is
# higher.
import theano.gpuarray.tests.config
if theano.gpuarray.pygpu_activated:
sizes = [100, 500, 1000, 2000, 5000, 10000, 20000, 40000]
size = sizes[0]
w = theano.gpuarray.gpuarray_shared_constructor(
np.random.rand(size, size).astype('float32'), 'w',
target=theano.gpuarray.tests.config.test_ctx_name)
x = theano.gpuarray.gpuarray_shared_constructor(
np.random.rand(size, size).astype('float32'), 'w',
target=theano.gpuarray.tests.config.test_ctx_name)
updates = [(w, w + np.asarray(0.001, 'float32') * T.dot(x, x))]
f = theano.function([], updates=updates,
mode=theano.gpuarray.tests.config.mode_with_gpu)
assert len(f.maker.fgraph.apply_nodes) == 1
assert any(isinstance(n.op, theano.gpuarray.blas.GpuGemm)
for n in f.maker.fgraph.apply_nodes)
# Make sure libgpuarray have compile all kernels
f()
f.sync_shared()
# Find a good size that will take about .5s.
# This is to make the test more stable across different GPUs.
size = sizes[-1]
for i in sizes:
data = np.random.rand(i, i).astype('float32')
w.set_value(data)
x.set_value(data)
t0 = time.time()
f()
f.sync_shared()
t1 = time.time()
if (t1 - t0) < 0.5:
continue
size = i
break
# sync to make sure all computation are done
f.sync_shared()
t_0 = time.time()
for i in range(3):
f()
# Sync after each call to see the slowdown from sync.
f.sync_shared()
time.sleep(.5)
t_1 = time.time()
for i in range(3):
f()
time.sleep(.5)
f.sync_shared()
# Sync to make sure all computation are finished.
t_2 = time.time()
assert (t_1 - t_0) > (t_2 - t_1)
else:
raise SkipTest("Sync is only availble when pygpu is activated.")
if __name__ == '__main__': if __name__ == '__main__':
if 1: if 1:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论