提交 48ced75a authored 作者: Pascal Lamblin's avatar Pascal Lamblin

Simplify clear_old, and avoid going through all keys more than is necessary.

上级 28a50655
...@@ -496,21 +496,26 @@ class ModuleCache(object): ...@@ -496,21 +496,26 @@ class ModuleCache(object):
Older modules will be deleted in ``clear_old``. Older modules will be deleted in ``clear_old``.
""" """
def refresh(self, delete_if_problem=False): def refresh(self, age_thresh_use=None, delete_if_problem=False):
"""Update cache data by walking the cache directory structure. """Update cache data by walking the cache directory structure.
Load key.pkl files that have not been loaded yet. Load key.pkl files that have not been loaded yet.
Remove entries which have been removed from the filesystem. Remove entries which have been removed from the filesystem.
Also, remove malformed cache directories. Also, remove malformed cache directories.
:param age_thresh_use: Do not use modules olther than this.
Defaults to self.age_thresh_use.
:param delete_if_problem: If True, cache entries that meet one of those :param delete_if_problem: If True, cache entries that meet one of those
two conditions are deleted: two conditions are deleted:
- Those for which unpickling the KeyData file fails with an - Those for which unpickling the KeyData file fails with an
unknown exception. unknown exception.
- Duplicated modules, regardless of their age. - Duplicated modules, regardless of their age.
:returns: a list of modules of age higher than self.age_thresh_use. :returns: a list of modules of age higher than age_thresh_use.
""" """
if age_thresh_use is None:
age_thresh_use = self.age_thresh_use
start_time = time.time() start_time = time.time()
too_old_to_use = [] too_old_to_use = []
...@@ -540,7 +545,7 @@ class ModuleCache(object): ...@@ -540,7 +545,7 @@ class ModuleCache(object):
_rmtree(root, ignore_nocleanup=True, _rmtree(root, ignore_nocleanup=True,
msg="missing module file", level="info") msg="missing module file", level="info")
continue continue
if (time_now - last_access_time(entry)) < self.age_thresh_use: if (time_now - last_access_time(entry)) < age_thresh_use:
debug('refresh adding', key_pkl) debug('refresh adding', key_pkl)
def unpickle_failure(): def unpickle_failure():
info("ModuleCache.refresh() Failed to unpickle " info("ModuleCache.refresh() Failed to unpickle "
...@@ -978,35 +983,31 @@ class ModuleCache(object): ...@@ -978,35 +983,31 @@ class ModuleCache(object):
if age_thresh_del is None: if age_thresh_del is None:
age_thresh_del = self.age_thresh_del age_thresh_del = self.age_thresh_del
# Ensure that the too_old_to_use list return by refresh() will
# contain all modules older thatn age_thresh_del.
if age_thresh_del < self.age_thresh_use:
info("Clearing modules that were not deemed to old to use:",
"age_thresh_del=%d," % age_thresh_del,
"self.age_thresh_use=%d" % self.age_thresh_use)
age_thresh_use = age_thresh_del
else:
age_thresh_use = None
compilelock.get_lock() compilelock.get_lock()
try: try:
# Update the age of modules that have been accessed by other # Update the age of modules that have been accessed by other
# processes and get all module that are too old to use # processes and get all module that are too old to use
# (not loaded in self.entry_from_key). # (not loaded in self.entry_from_key).
too_old_to_use = self.refresh(delete_if_problem=delete_if_problem) too_old_to_use = self.refresh(
time_now = time.time() age_thresh_use=age_thresh_use,
delete_if_problem=delete_if_problem)
# Build list of module files and associated keys. for entry in too_old_to_use:
entry_to_key_data = dict((entry, None) for entry in too_old_to_use) # TODO: we are assuming that modules that haven't been
for key_data in self.module_hash_to_key_data.itervalues(): # accessed in over age_thresh_del are not currently in
entry = key_data.get_entry() # use by other processes, but that could be false for
# Since we loaded this file, it should not be in # long-running jobs, or if age_thresh_del < 0.
# too_old_to_use.
assert entry not in entry_to_key_data
entry_to_key_data[entry] = key_data
for entry, key_data in entry_to_key_data.iteritems():
age = time_now - last_access_time(entry)
if age > age_thresh_del:
# TODO: we are assuming that modules that haven't been accessed in over
# age_thresh_del are not currently in use by other processes, but that could be
# false for long-running jobs...
assert entry not in self.module_from_name assert entry not in self.module_from_name
if key_data is not None:
key_data.delete_keys_from(self.entry_from_key)
del self.module_hash_to_key_data[key_data.module_hash]
if key_data.key_pkl in self.loaded_key_pkl:
self.loaded_key_pkl.remove(key_data.key_pkl)
parent = os.path.dirname(entry) parent = os.path.dirname(entry)
assert parent.startswith(os.path.join(self.dirname, 'tmp')) assert parent.startswith(os.path.join(self.dirname, 'tmp'))
_rmtree(parent, msg='old cache directory', level='info', _rmtree(parent, msg='old cache directory', level='info',
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论