提交 9433d5d2 authored 作者: James Bergstra's avatar James Bergstra

vm linker

上级 2677e15f
...@@ -331,6 +331,7 @@ class Function(object): ...@@ -331,6 +331,7 @@ class Function(object):
self.unpack_single = unpack_single self.unpack_single = unpack_single
self.return_none = return_none self.return_none = return_none
self.maker = maker self.maker = maker
self.profile = None # reassigned in FunctionMaker.create
# We will be popping stuff off this `containers` object. It is a copy. # We will be popping stuff off this `containers` object. It is a copy.
containers = list(self.input_storage) containers = list(self.input_storage)
......
...@@ -4,7 +4,9 @@ import os, logging ...@@ -4,7 +4,9 @@ import os, logging
import numpy, theano import numpy, theano
from theano import gof from theano import gof
from theano.configparser import config, AddConfigVar, StrParam import theano.gof.vm
from theano.configparser import config, AddConfigVar, StrParam, EnumStr
_logger = logging.getLogger('theano.compile.mode') _logger = logging.getLogger('theano.compile.mode')
...@@ -55,7 +57,11 @@ predefined_linkers = { ...@@ -55,7 +57,11 @@ predefined_linkers = {
'c' : gof.CLinker(), 'c' : gof.CLinker(),
'c|py' : gof.OpWiseCLinker(allow_gc=True), 'c|py' : gof.OpWiseCLinker(allow_gc=True),
'c|py_nogc' : gof.OpWiseCLinker(allow_gc=False), 'c|py_nogc' : gof.OpWiseCLinker(allow_gc=False),
'c&py' : gof.DualLinker(checker = check_equal) 'c&py' : gof.DualLinker(checker = check_equal),
'vm' : gof.vm.VM_Linker(allow_gc=True, use_cloop=False),
'cvm' : gof.vm.VM_Linker(allow_gc=True, use_cloop=True),
'vm_nogc' : gof.vm.VM_Linker(allow_gc=False, use_cloop=False),
'cvm_nogc': gof.vm.VM_Linker(allow_gc=False, use_cloop=True),
} }
...@@ -249,6 +255,7 @@ class Mode(object): ...@@ -249,6 +255,7 @@ class Mode(object):
self._optimizer = optimizer self._optimizer = optimizer
self.call_time = 0 self.call_time = 0
self.fn_time = 0 self.fn_time = 0
linker.mode = self #TODO: WHY IS THIS HERE?
self.optimizer_time = 0 self.optimizer_time = 0
self.linker_time = 0 self.linker_time = 0
...@@ -290,15 +297,27 @@ class Mode(object): ...@@ -290,15 +297,27 @@ class Mode(object):
FAST_COMPILE = Mode('py', 'fast_compile') FAST_COMPILE = Mode('py', 'fast_compile')
FAST_RUN = Mode('c|py', 'fast_run') FAST_RUN = Mode('c|py', 'fast_run')
FAST_RUN_NOGC = Mode("c|py_nogc", 'fast_run') FAST_RUN_NOGC = Mode("c|py_nogc", 'fast_run')
SANITY_CHECK = [Mode('c|py', None),
Mode('c|py', 'fast_run')]
STABILIZE = Mode("c|py", OPT_STABILIZE) STABILIZE = Mode("c|py", OPT_STABILIZE)
predefined_modes = {'FAST_COMPILE': FAST_COMPILE, predefined_modes = {'FAST_COMPILE': FAST_COMPILE,
'FAST_RUN': FAST_RUN, 'FAST_RUN': FAST_RUN,
'FAST_RUN_NOGC':FAST_RUN_NOGC, 'FAST_RUN_NOGC':FAST_RUN_NOGC,
'SANITY_CHECK': SANITY_CHECK, 'STABILIZE': STABILIZE,
'STABILIZE': STABILIZE} 'VM':Mode('vm', 'fast_run'),
'VM_NOGC':Mode('vm_nogc', 'fast_run'),
'CVM':Mode('cvm', 'fast_run'),
'CVM_NOGC':Mode('cvm_nogc', 'fast_run'),
}
#Don't add FAST_RUN_NOGC to this list(as well as other ALL CAPS short cut)
#The way to get FAST_RUN_NOGC is with the flag 'linker=c|py_nogc'
#The old all capital letter way of working is deprecated as it is not scalable.
AddConfigVar('mode',
"Default compilation mode",
EnumStr(*(predefined_modes.keys() + [
'Mode','DEBUG_MODE', 'PROFILE_MODE'])),
in_c_key=False)
instanciated_default_mode=None instanciated_default_mode=None
def get_mode(orig_string): def get_mode(orig_string):
...@@ -329,7 +348,7 @@ def get_mode(orig_string): ...@@ -329,7 +348,7 @@ def get_mode(orig_string):
ret = DebugMode(optimizer=config.optimizer) ret = DebugMode(optimizer=config.optimizer)
else: else:
# The import is needed in case string is ProfileMode # The import is needed in case string is ProfileMode
from profilemode import ProfileMode from profilemode import ProfileMode,prof_mode_instance_to_print
ret = eval(string+'(linker=config.linker, optimizer=config.optimizer)') ret = eval(string+'(linker=config.linker, optimizer=config.optimizer)')
elif predefined_modes.has_key(string): elif predefined_modes.has_key(string):
ret = predefined_modes[string] ret = predefined_modes[string]
...@@ -349,7 +368,6 @@ def get_mode(orig_string): ...@@ -349,7 +368,6 @@ def get_mode(orig_string):
#must tell python to print the summary at the end. #must tell python to print the summary at the end.
if string == 'ProfileMode': if string == 'ProfileMode':
#need to import later to break circular dependency. #need to import later to break circular dependency.
from profilemode import prof_mode_instance_to_print
prof_mode_instance_to_print.append(ret) prof_mode_instance_to_print.append(ret)
return ret return ret
...@@ -365,3 +383,4 @@ def register_mode(name, mode): ...@@ -365,3 +383,4 @@ def register_mode(name, mode):
if name in predefined_modes: if name in predefined_modes:
raise ValueError('Mode name already taken: %s' % name) raise ValueError('Mode name already taken: %s' % name)
predefined_modes[name] = mode predefined_modes[name] = mode
...@@ -10,6 +10,8 @@ import random ...@@ -10,6 +10,8 @@ import random
import numpy.random import numpy.random
from theano.tests import unittest_tools as utt from theano.tests import unittest_tools as utt
import theano.tensor as T
class T_bunch_of_modes(unittest.TestCase): class T_bunch_of_modes(unittest.TestCase):
......
...@@ -65,15 +65,6 @@ AddConfigVar('force_device', ...@@ -65,15 +65,6 @@ AddConfigVar('force_device',
BoolParam(False, allow_override=False), BoolParam(False, allow_override=False),
in_c_key=False) in_c_key=False)
#Don't add FAST_RUN_NOGC to this list(as well as other ALL CAPS short cut)
#The way to get FAST_RUN_NOGC is with the flag 'linker=c|py_nogc'
#The old all capital letter way of working is deprecated as it is not scalable.
AddConfigVar('mode',
"Default compilation mode",
EnumStr('Mode', 'ProfileMode', 'DebugMode', 'FAST_RUN',
'FAST_COMPILE', 'PROFILE_MODE', 'DEBUG_MODE'),
in_c_key=False)
# Test whether or not gcc is present: disable C code if it is not. # Test whether or not gcc is present: disable C code if it is not.
# Using the dummy file descriptor below is a workaround for a crash experienced # Using the dummy file descriptor below is a workaround for a crash experienced
# in an unusual Python 2.4.4 Windows environment with the default stdin=None. # in an unusual Python 2.4.4 Windows environment with the default stdin=None.
...@@ -84,13 +75,15 @@ try: ...@@ -84,13 +75,15 @@ try:
# Keep the default linker the same as the one for the mode FAST_RUN # Keep the default linker the same as the one for the mode FAST_RUN
AddConfigVar('linker', AddConfigVar('linker',
"Default linker used if the theano flags mode is Mode or ProfileMode", "Default linker used if the theano flags mode is Mode or ProfileMode",
EnumStr('c|py', 'py', 'c', 'c|py_nogc', 'c&py'), EnumStr('c|py', 'py', 'c', 'c|py_nogc', 'c&py',
'vm', 'cvm', 'vm_nogc', 'cvm_nogc'),
in_c_key=False) in_c_key=False)
except OSError: except OSError:
# gcc is not present, linker should default to python only # gcc is not present, linker should default to python only
AddConfigVar('linker', AddConfigVar('linker',
"Default linker used if the theano flags mode is Mode or ProfileMode", "Default linker used if the theano flags mode is Mode or ProfileMode",
EnumStr('py', 'c|py', 'c', 'c|py_nogc', 'c&py'), EnumStr('c|py', 'py', 'c', 'c|py_nogc', 'c&py',
'vm', 'cvm', 'vm_nogc', 'cvm_nogc'),
in_c_key=False) in_c_key=False)
warning('GCC not detected ! Theano will be unable to execute optimized '+ warning('GCC not detected ! Theano will be unable to execute optimized '+
'C-implementations (for both CPU and GPU) and will default to '+ 'C-implementations (for both CPU and GPU) and will default to '+
......
差异被折叠。
import os
import theano
from theano import config
from theano.gof.compilelock import get_lock, release_lock
from theano.gof import cmodule
get_lock()
try:
dirname = 'lazylinker_ext'
cfile = os.path.join(theano.__path__[0], 'gof', 'lazylinker_c.c')
code = open(cfile).read()
loc = os.path.join(config.compiledir, dirname)
if not os.path.exists(loc):
os.mkdir(loc)
cmodule.gcc_module_compile_str(dirname, code, location=loc)
from lazylinker_ext.lazylinker_ext import *
finally:
# Release lock on compilation directory.
release_lock()
import gc
import sys
import time
import line_profiler
import numpy
from theano import function
from theano.gof import vm,link, OpWiseCLinker
from theano.compile import Mode
from theano import tensor
from theano.lazycond import cond
import theano
def test_speed():
def build_graph(x, depth=5):
z = x
for d in range(depth):
z = (z + z)
return z
def numpy_version(x, depth):
z = x
for d in xrange(depth):
z = (z+z)
return z
def time_numpy():
steps_a = 5
steps_b = 100
x = numpy.asarray([2.0, 3.0], dtype=theano.config.floatX)
numpy_version(x, steps_a)
t0 = time.time()
print numpy_version(x, steps_a)
t1 = time.time()
t2 = time.time()
print numpy_version(x, steps_b)
t3 = time.time()
t_a = t1 - t0
t_b = t3 - t2
print "%s takes %f s/Kop" % (
'numpy',
(1000*(t_b-t_a) / (steps_b - steps_a)))
def time_linker(name, linker):
steps_a = 5
steps_b = 100
x = tensor.vector()
a = build_graph(x,steps_a)
b = build_graph(x,steps_b)
f_a = function([x], a,
mode=Mode(optimizer=None, linker=linker()),
#profile='f_a speed test %s'%name,
)
f_b = function([x], b,
mode=Mode(optimizer=None, linker=linker()),
#profile='f_b speed test %s'%name,
)
print f_a([2.0, 3.0])
t0 = time.time()
print f_a([2.0, 3.0])
t1 = time.time()
print f_b([2.0, 3.0])
t2 = time.time()
print f_b([2.0, 3.0])
t3 = time.time()
t_a = t1 - t0
t_b = t3 - t2
print "%s takes %f s/Kop" % (
name,
(1000*(t_b-t_a) / (steps_b - steps_a)))
time_linker('c|py', OpWiseCLinker)
time_linker('vmLinker', vm.VM_Linker)
time_linker('vmLinker_nogc', lambda : vm.VM_Linker(allow_gc=False))
time_linker('vmLinker_CLOOP', lambda : vm.VM_Linker(allow_gc=False,
use_cloop=True))
time_numpy()
def test_speed_lazy():
def build_graph(x, depth=5):
z = x
for d in range(depth):
z = cond(z> 0, -z, z)
return z
def time_linker(name, linker):
steps_a = 10
steps_b = 100
x = tensor.vector()
a = build_graph(x, steps_a)
b = build_graph(x, steps_b)
f_a = function([x], a,
mode=Mode(optimizer=None,
linker=linker()),
#profile='f_a lazy cond %s'%name,
)
f_b = function([x], b,
mode=Mode(optimizer=None,
linker=linker()),
#profile='f_b lazy cond %s'%name,
)
print f_a([2.0])
t0 = time.time()
print f_a([2.0])
t1 = time.time()
print f_b([2.0])
t2 = time.time()
print f_b([2.0])
t3 = time.time()
t_a = t1 - t0
t_b = t3 - t2
print "%s takes %f s/Kop" % (
name,
(1000*(t_b-t_a) / (steps_b - steps_a)))
time_linker('vmLinker', vm.VM_Linker)
time_linker('vmLinker_nogc', lambda : vm.VM_Linker(allow_gc=False))
time_linker('vmLinker_C', lambda : vm.VM_Linker(allow_gc=False,
use_cloop=True))
run_memory_usage_tests = False
if run_memory_usage_tests:
# these are not normal unit tests, do not run them as part of standard
# suite. I ran them while looking at top, and stopped when memory usage was
# stable.
def test_leak2():
import theano.sandbox.cuda as cuda
for i in xrange(1000000):
n = numpy.asarray([2.3, 4.5], dtype='f')
c = sys.getrefcount(n)
a = cuda.CudaNdarray(n)
assert c == sys.getrefcount(n)
if not i % 1000:
print '.',
print gc.collect(),
print gc.collect()
sys.stdout.flush()
def test_no_leak_many_graphs():
# Verify no memory leaks when creating and deleting a lot of functions
# This isn't really a unit test, you have to run it and look at top to see
# if there's a leak
for i in xrange(10000):
x = tensor.vector()
z = x
for d in range(10):
z = tensor.sin(-z+ 1)
f = function([x], z, mode=Mode(optimizer=None, linker='cvm'))
if not i % 100:
print gc.collect()
sys.stdout.flush()
gc.collect()
if 1:
f([2.0])
f([3.0])
f([4.0])
f([5.0])
def test_no_leak_many_call_lazy():
# Verify no memory leaks when calling a function a lot of times
# This isn't really a unit test, you have to run it and look at top to see
# if there's a leak
def build_graph(x, depth=5):
z = x
for d in range(depth):
z = cond(z> 0, -z, z)
return z
def time_linker(name, linker):
steps_a = 10
x = tensor.vector()
a = build_graph(x, steps_a)
f_a = function([x], a,
mode=Mode(optimizer=None,
linker=linker()))
for i in xrange(100000):
f_a([2.0])
if 0: # this doesn't seem to work, prints 0 for everything
import resource
pre = resource.getrusage(resource.RUSAGE_SELF)
post = resource.getrusage(resource.RUSAGE_SELF)
print pre.ru_ixrss, post.ru_ixrss
print pre.ru_idrss, post.ru_idrss
print pre.ru_maxrss, post.ru_maxrss
time_linker('vmLinker_C', lambda : vm.VM_Linker(allow_gc=False, use_cloop=True))
def test_no_leak_many_call_nonlazy():
# Verify no memory leaks when calling a function a lot of times
# This isn't really a unit test, you have to run it and look at top to see
# if there's a leak
def build_graph(x, depth=5):
z = x
for d in range(depth):
z = tensor.sin(-z+1)
return z
def time_linker(name, linker):
steps_a = 10
x = tensor.vector()
a = build_graph(x,steps_a)
f_a = function([x], a,
mode=Mode(optimizer=None,
linker=linker()))
for i in xrange(500000):
f_a([2.0])
time_linker('vmLinker_C', lambda : vm.VM_Linker(allow_gc=False, use_cloop=True))
差异被折叠。
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论