提交 2c001bd9 authored 作者: abergeron's avatar abergeron 提交者: GitHub

Merge pull request #6214 from ReyhaneAskari/add_sync

sync attribute added to theano function
......@@ -1006,6 +1006,15 @@ class Function(object):
"""
return [i.variable for i in self.maker.inputs if i.implicit]
def sync_shared(self):
if (hasattr(theano, "gpuarray") and
theano.gpuarray.pygpu_activated):
import pygpu
for i, inp in enumerate(self.input_storage):
if i in self.maker.fgraph.update_mapping.values():
if isinstance(inp.data, pygpu.gpuarray.GpuArray):
inp.data.sync()
# pickling/deepcopy support for Function
def _pickle_Function(f):
......@@ -1688,6 +1697,7 @@ class FunctionMaker(object):
fn = self.function_builder(_fn, _i, _o, self.indices, self.outputs,
defaults, self.unpack_single,
self.return_none, self.output_keys, self)
fn.profile = self.profile
return fn
......
......@@ -3,6 +3,7 @@ import copy
import six.moves.cPickle as pickle
import numpy as np
import unittest
import time
from theano import config, gof
......@@ -907,6 +908,71 @@ def test_empty_givens_updates():
function([theano.In(x)], y, updates={})
def test_sync_update():
# This test if sync_update work. This can only be tested when
# there is a GPU. To test if we really sync, we compare a case we
# can run in parallel GPU and CPU computation. Then we sync to
# disable that parallel computation. Then we assert the time is
# higher.
import theano.gpuarray.tests.config
if theano.gpuarray.pygpu_activated:
sizes = [100, 500, 1000, 2000, 5000, 10000, 20000, 40000]
size = sizes[0]
w = theano.gpuarray.gpuarray_shared_constructor(
np.random.rand(size, size).astype('float32'), 'w',
target=theano.gpuarray.tests.config.test_ctx_name)
x = theano.gpuarray.gpuarray_shared_constructor(
np.random.rand(size, size).astype('float32'), 'w',
target=theano.gpuarray.tests.config.test_ctx_name)
updates = [(w, w + np.asarray(0.001, 'float32') * T.dot(x, x))]
f = theano.function([], updates=updates,
mode=theano.gpuarray.tests.config.mode_with_gpu)
assert len(f.maker.fgraph.apply_nodes) == 1
assert any(isinstance(n.op, theano.gpuarray.blas.GpuGemm)
for n in f.maker.fgraph.apply_nodes)
# Make sure libgpuarray have compile all kernels
f()
f.sync_shared()
# Find a good size that will take about .5s.
# This is to make the test more stable across different GPUs.
size = sizes[-1]
for i in sizes:
data = np.random.rand(i, i).astype('float32')
w.set_value(data)
x.set_value(data)
t0 = time.time()
f()
f.sync_shared()
t1 = time.time()
if (t1 - t0) < 0.5:
continue
size = i
break
# sync to make sure all computation are done
f.sync_shared()
t_0 = time.time()
for i in range(3):
f()
# Sync after each call to see the slowdown from sync.
f.sync_shared()
time.sleep(.5)
t_1 = time.time()
for i in range(3):
f()
time.sleep(.5)
f.sync_shared()
# Sync to make sure all computation are finished.
t_2 = time.time()
assert (t_1 - t_0) > (t_2 - t_1)
else:
raise SkipTest("Sync is only availble when pygpu is activated.")
if __name__ == '__main__':
if 1:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论