提交 9433d5d2 authored 作者: James Bergstra's avatar James Bergstra

vm linker

上级 2677e15f
......@@ -331,6 +331,7 @@ class Function(object):
self.unpack_single = unpack_single
self.return_none = return_none
self.maker = maker
self.profile = None # reassigned in FunctionMaker.create
# We will be popping stuff off this `containers` object. It is a copy.
containers = list(self.input_storage)
......
......@@ -4,7 +4,9 @@ import os, logging
import numpy, theano
from theano import gof
from theano.configparser import config, AddConfigVar, StrParam
import theano.gof.vm
from theano.configparser import config, AddConfigVar, StrParam, EnumStr
_logger = logging.getLogger('theano.compile.mode')
......@@ -55,7 +57,11 @@ predefined_linkers = {
'c' : gof.CLinker(),
'c|py' : gof.OpWiseCLinker(allow_gc=True),
'c|py_nogc' : gof.OpWiseCLinker(allow_gc=False),
'c&py' : gof.DualLinker(checker = check_equal)
'c&py' : gof.DualLinker(checker = check_equal),
'vm' : gof.vm.VM_Linker(allow_gc=True, use_cloop=False),
'cvm' : gof.vm.VM_Linker(allow_gc=True, use_cloop=True),
'vm_nogc' : gof.vm.VM_Linker(allow_gc=False, use_cloop=False),
'cvm_nogc': gof.vm.VM_Linker(allow_gc=False, use_cloop=True),
}
......@@ -249,6 +255,7 @@ class Mode(object):
self._optimizer = optimizer
self.call_time = 0
self.fn_time = 0
linker.mode = self #TODO: WHY IS THIS HERE?
self.optimizer_time = 0
self.linker_time = 0
......@@ -290,15 +297,27 @@ class Mode(object):
FAST_COMPILE = Mode('py', 'fast_compile')
FAST_RUN = Mode('c|py', 'fast_run')
FAST_RUN_NOGC = Mode("c|py_nogc", 'fast_run')
SANITY_CHECK = [Mode('c|py', None),
Mode('c|py', 'fast_run')]
STABILIZE = Mode("c|py", OPT_STABILIZE)
predefined_modes = {'FAST_COMPILE': FAST_COMPILE,
'FAST_RUN': FAST_RUN,
'FAST_RUN_NOGC':FAST_RUN_NOGC,
'SANITY_CHECK': SANITY_CHECK,
'STABILIZE': STABILIZE}
'STABILIZE': STABILIZE,
'VM':Mode('vm', 'fast_run'),
'VM_NOGC':Mode('vm_nogc', 'fast_run'),
'CVM':Mode('cvm', 'fast_run'),
'CVM_NOGC':Mode('cvm_nogc', 'fast_run'),
}
#Don't add FAST_RUN_NOGC to this list(as well as other ALL CAPS short cut)
#The way to get FAST_RUN_NOGC is with the flag 'linker=c|py_nogc'
#The old all capital letter way of working is deprecated as it is not scalable.
AddConfigVar('mode',
"Default compilation mode",
EnumStr(*(predefined_modes.keys() + [
'Mode','DEBUG_MODE', 'PROFILE_MODE'])),
in_c_key=False)
instanciated_default_mode=None
def get_mode(orig_string):
......@@ -329,7 +348,7 @@ def get_mode(orig_string):
ret = DebugMode(optimizer=config.optimizer)
else:
# The import is needed in case string is ProfileMode
from profilemode import ProfileMode
from profilemode import ProfileMode,prof_mode_instance_to_print
ret = eval(string+'(linker=config.linker, optimizer=config.optimizer)')
elif predefined_modes.has_key(string):
ret = predefined_modes[string]
......@@ -349,7 +368,6 @@ def get_mode(orig_string):
#must tell python to print the summary at the end.
if string == 'ProfileMode':
#need to import later to break circular dependency.
from profilemode import prof_mode_instance_to_print
prof_mode_instance_to_print.append(ret)
return ret
......@@ -365,3 +383,4 @@ def register_mode(name, mode):
if name in predefined_modes:
raise ValueError('Mode name already taken: %s' % name)
predefined_modes[name] = mode
......@@ -10,6 +10,8 @@ import random
import numpy.random
from theano.tests import unittest_tools as utt
import theano.tensor as T
class T_bunch_of_modes(unittest.TestCase):
......
......@@ -65,15 +65,6 @@ AddConfigVar('force_device',
BoolParam(False, allow_override=False),
in_c_key=False)
#Don't add FAST_RUN_NOGC to this list(as well as other ALL CAPS short cut)
#The way to get FAST_RUN_NOGC is with the flag 'linker=c|py_nogc'
#The old all capital letter way of working is deprecated as it is not scalable.
AddConfigVar('mode',
"Default compilation mode",
EnumStr('Mode', 'ProfileMode', 'DebugMode', 'FAST_RUN',
'FAST_COMPILE', 'PROFILE_MODE', 'DEBUG_MODE'),
in_c_key=False)
# Test whether or not gcc is present: disable C code if it is not.
# Using the dummy file descriptor below is a workaround for a crash experienced
# in an unusual Python 2.4.4 Windows environment with the default stdin=None.
......@@ -84,13 +75,15 @@ try:
# Keep the default linker the same as the one for the mode FAST_RUN
AddConfigVar('linker',
"Default linker used if the theano flags mode is Mode or ProfileMode",
EnumStr('c|py', 'py', 'c', 'c|py_nogc', 'c&py'),
EnumStr('c|py', 'py', 'c', 'c|py_nogc', 'c&py',
'vm', 'cvm', 'vm_nogc', 'cvm_nogc'),
in_c_key=False)
except OSError:
# gcc is not present, linker should default to python only
AddConfigVar('linker',
"Default linker used if the theano flags mode is Mode or ProfileMode",
EnumStr('py', 'c|py', 'c', 'c|py_nogc', 'c&py'),
EnumStr('c|py', 'py', 'c', 'c|py_nogc', 'c&py',
'vm', 'cvm', 'vm_nogc', 'cvm_nogc'),
in_c_key=False)
warning('GCC not detected ! Theano will be unable to execute optimized '+
'C-implementations (for both CPU and GPU) and will default to '+
......
差异被折叠。
import os
import theano
from theano import config
from theano.gof.compilelock import get_lock, release_lock
from theano.gof import cmodule
get_lock()
try:
dirname = 'lazylinker_ext'
cfile = os.path.join(theano.__path__[0], 'gof', 'lazylinker_c.c')
code = open(cfile).read()
loc = os.path.join(config.compiledir, dirname)
if not os.path.exists(loc):
os.mkdir(loc)
cmodule.gcc_module_compile_str(dirname, code, location=loc)
from lazylinker_ext.lazylinker_ext import *
finally:
# Release lock on compilation directory.
release_lock()
import gc
import sys
import time
import line_profiler
import numpy
from theano import function
from theano.gof import vm,link, OpWiseCLinker
from theano.compile import Mode
from theano import tensor
from theano.lazycond import cond
import theano
def test_speed():
def build_graph(x, depth=5):
z = x
for d in range(depth):
z = (z + z)
return z
def numpy_version(x, depth):
z = x
for d in xrange(depth):
z = (z+z)
return z
def time_numpy():
steps_a = 5
steps_b = 100
x = numpy.asarray([2.0, 3.0], dtype=theano.config.floatX)
numpy_version(x, steps_a)
t0 = time.time()
print numpy_version(x, steps_a)
t1 = time.time()
t2 = time.time()
print numpy_version(x, steps_b)
t3 = time.time()
t_a = t1 - t0
t_b = t3 - t2
print "%s takes %f s/Kop" % (
'numpy',
(1000*(t_b-t_a) / (steps_b - steps_a)))
def time_linker(name, linker):
steps_a = 5
steps_b = 100
x = tensor.vector()
a = build_graph(x,steps_a)
b = build_graph(x,steps_b)
f_a = function([x], a,
mode=Mode(optimizer=None, linker=linker()),
#profile='f_a speed test %s'%name,
)
f_b = function([x], b,
mode=Mode(optimizer=None, linker=linker()),
#profile='f_b speed test %s'%name,
)
print f_a([2.0, 3.0])
t0 = time.time()
print f_a([2.0, 3.0])
t1 = time.time()
print f_b([2.0, 3.0])
t2 = time.time()
print f_b([2.0, 3.0])
t3 = time.time()
t_a = t1 - t0
t_b = t3 - t2
print "%s takes %f s/Kop" % (
name,
(1000*(t_b-t_a) / (steps_b - steps_a)))
time_linker('c|py', OpWiseCLinker)
time_linker('vmLinker', vm.VM_Linker)
time_linker('vmLinker_nogc', lambda : vm.VM_Linker(allow_gc=False))
time_linker('vmLinker_CLOOP', lambda : vm.VM_Linker(allow_gc=False,
use_cloop=True))
time_numpy()
def test_speed_lazy():
def build_graph(x, depth=5):
z = x
for d in range(depth):
z = cond(z> 0, -z, z)
return z
def time_linker(name, linker):
steps_a = 10
steps_b = 100
x = tensor.vector()
a = build_graph(x, steps_a)
b = build_graph(x, steps_b)
f_a = function([x], a,
mode=Mode(optimizer=None,
linker=linker()),
#profile='f_a lazy cond %s'%name,
)
f_b = function([x], b,
mode=Mode(optimizer=None,
linker=linker()),
#profile='f_b lazy cond %s'%name,
)
print f_a([2.0])
t0 = time.time()
print f_a([2.0])
t1 = time.time()
print f_b([2.0])
t2 = time.time()
print f_b([2.0])
t3 = time.time()
t_a = t1 - t0
t_b = t3 - t2
print "%s takes %f s/Kop" % (
name,
(1000*(t_b-t_a) / (steps_b - steps_a)))
time_linker('vmLinker', vm.VM_Linker)
time_linker('vmLinker_nogc', lambda : vm.VM_Linker(allow_gc=False))
time_linker('vmLinker_C', lambda : vm.VM_Linker(allow_gc=False,
use_cloop=True))
run_memory_usage_tests = False
if run_memory_usage_tests:
# these are not normal unit tests, do not run them as part of standard
# suite. I ran them while looking at top, and stopped when memory usage was
# stable.
def test_leak2():
import theano.sandbox.cuda as cuda
for i in xrange(1000000):
n = numpy.asarray([2.3, 4.5], dtype='f')
c = sys.getrefcount(n)
a = cuda.CudaNdarray(n)
assert c == sys.getrefcount(n)
if not i % 1000:
print '.',
print gc.collect(),
print gc.collect()
sys.stdout.flush()
def test_no_leak_many_graphs():
# Verify no memory leaks when creating and deleting a lot of functions
# This isn't really a unit test, you have to run it and look at top to see
# if there's a leak
for i in xrange(10000):
x = tensor.vector()
z = x
for d in range(10):
z = tensor.sin(-z+ 1)
f = function([x], z, mode=Mode(optimizer=None, linker='cvm'))
if not i % 100:
print gc.collect()
sys.stdout.flush()
gc.collect()
if 1:
f([2.0])
f([3.0])
f([4.0])
f([5.0])
def test_no_leak_many_call_lazy():
# Verify no memory leaks when calling a function a lot of times
# This isn't really a unit test, you have to run it and look at top to see
# if there's a leak
def build_graph(x, depth=5):
z = x
for d in range(depth):
z = cond(z> 0, -z, z)
return z
def time_linker(name, linker):
steps_a = 10
x = tensor.vector()
a = build_graph(x, steps_a)
f_a = function([x], a,
mode=Mode(optimizer=None,
linker=linker()))
for i in xrange(100000):
f_a([2.0])
if 0: # this doesn't seem to work, prints 0 for everything
import resource
pre = resource.getrusage(resource.RUSAGE_SELF)
post = resource.getrusage(resource.RUSAGE_SELF)
print pre.ru_ixrss, post.ru_ixrss
print pre.ru_idrss, post.ru_idrss
print pre.ru_maxrss, post.ru_maxrss
time_linker('vmLinker_C', lambda : vm.VM_Linker(allow_gc=False, use_cloop=True))
def test_no_leak_many_call_nonlazy():
# Verify no memory leaks when calling a function a lot of times
# This isn't really a unit test, you have to run it and look at top to see
# if there's a leak
def build_graph(x, depth=5):
z = x
for d in range(depth):
z = tensor.sin(-z+1)
return z
def time_linker(name, linker):
steps_a = 10
x = tensor.vector()
a = build_graph(x,steps_a)
f_a = function([x], a,
mode=Mode(optimizer=None,
linker=linker()))
for i in xrange(500000):
f_a([2.0])
time_linker('vmLinker_C', lambda : vm.VM_Linker(allow_gc=False, use_cloop=True))
差异被折叠。
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论