提交 b4563c5b authored 作者: Virgile Andreani's avatar Virgile Andreani 提交者: Ricardo Vieira

Refactor check_duplicate_key and remove mod.cu

上级 4d5aca03
...@@ -641,7 +641,7 @@ class ModuleCache: ...@@ -641,7 +641,7 @@ class ModuleCache:
The cache contains one directory for each module, containing: The cache contains one directory for each module, containing:
- the dynamic library file itself (e.g. ``.so/.pyd``), - the dynamic library file itself (e.g. ``.so/.pyd``),
- an empty ``__init__.py`` file, so Python can import it, - an empty ``__init__.py`` file, so Python can import it,
- a file containing the source code for the module (e.g. ``mod.cpp/mod.cu``), - a file containing the source code for the module (e.g. ``mod.cpp``),
- a ``key.pkl`` file, containing a KeyData object with all the keys - a ``key.pkl`` file, containing a KeyData object with all the keys
associated with that module, associated with that module,
- possibly a ``delete.me`` file, meaning this directory has been marked - possibly a ``delete.me`` file, meaning this directory has been marked
......
import os import os
import pickle import pickle
import sys import sys
from collections import Counter
from pytensor.configdefaults import config from pytensor.configdefaults import config
...@@ -15,14 +16,13 @@ if len(sys.argv) > 1: ...@@ -15,14 +16,13 @@ if len(sys.argv) > 1:
else: else:
dirs = os.listdir(config.compiledir) dirs = os.listdir(config.compiledir)
dirs = [os.path.join(config.compiledir, d) for d in dirs] dirs = [os.path.join(config.compiledir, d) for d in dirs]
keys: dict = {} # key -> nb seen keys: Counter[bytes] = Counter() # key -> nb seen
mods: dict = {} mods: dict = {}
for dir in dirs: for dir in dirs:
key = None key = None
try: try:
with open(os.path.join(dir, "key.pkl")) as f: with open(os.path.join(dir, "key.pkl"), "rb") as f:
key = f.read() key = f.read()
keys.setdefault(key, 0)
keys[key] += 1 keys[key] += 1
del f del f
except OSError: except OSError:
...@@ -30,62 +30,49 @@ for dir in dirs: ...@@ -30,62 +30,49 @@ for dir in dirs:
pass pass
try: try:
path = os.path.join(dir, "mod.cpp") path = os.path.join(dir, "mod.cpp")
if not os.path.exists(path): with open(path) as fmod:
path = os.path.join(dir, "mod.cu") mod = fmod.read()
with open(path) as f:
mod = f.read()
mods.setdefault(mod, ()) mods.setdefault(mod, ())
mods[mod] += (key,) mods[mod] += (key,)
del mod del mod
del f del fmod
del path del path
except OSError: except OSError:
print(dir, "don't have a mod.{cpp,cu} file") print(dir, "don't have a mod.cpp file")
if DISPLAY_DUPLICATE_KEYS: if DISPLAY_DUPLICATE_KEYS:
for k, v in keys.items(): for k, v in keys.items():
if v > 1: if v > 1:
print("Duplicate key (%i copies): %s" % (v, pickle.loads(k))) print("Duplicate key (%i copies): %s" % (v, pickle.loads(k)))
nbs_keys: dict = {} # nb seen -> now many key # nb seen -> how many keys
for val in keys.values(): nbs_keys = Counter(val for val in keys.values())
nbs_keys.setdefault(val, 0)
nbs_keys[val] += 1
nbs_mod: dict = {} # nb seen -> how many key # nb seen -> how many keys
nbs_mod_to_key = {} # nb seen -> keys nbs_mod = Counter(len(kk) for kk in mods.values())
more_than_one = 0 # nb seen -> keys
for mod, kk in mods.items(): nbs_mod_to_key = {len(kk): kk for kk in mods.values()}
val = len(kk) more_than_one = sum(len(kk) > 1 for kk in mods.values())
nbs_mod.setdefault(val, 0)
nbs_mod[val] += 1
if val > 1:
more_than_one += 1
nbs_mod_to_key[val] = kk
if DISPLAY_MOST_FREQUENT_DUPLICATE_CCODE: if DISPLAY_MOST_FREQUENT_DUPLICATE_CCODE:
m = max(nbs_mod.keys()) m = max(nbs_mod)
print("The keys associated to the mod.{cpp,cu} with the most number of copy:") print("The keys associated to the mod.cpp with the most number of copy:")
for kk in nbs_mod_to_key[m]: for kk in nbs_mod_to_key[m]:
kk = pickle.loads(kk) kk = pickle.loads(kk)
print(kk) print(kk)
print("key.pkl histograph") print("key.pkl histograph")
l = list(nbs_keys.items()) print(sorted(nbs_keys.items()))
l.sort()
print(l)
print("mod.{cpp,cu} histogram") print("mod.cpp histogram")
l = list(nbs_mod.items()) print(sorted(nbs_mod.items()))
l.sort()
print(l)
total = sum(len(k) for k in list(mods.values())) total = sum(len(k) for k in mods.values())
uniq = len(mods) uniq = len(mods)
useless = total - uniq useless = total - uniq
print("mod.{cpp,cu} total:", total) print("mod.cpp total:", total)
print("mod.{cpp,cu} uniq:", uniq) print("mod.cpp uniq:", uniq)
print("mod.{cpp,cu} with more than 1 copy:", more_than_one) print("mod.cpp with more than 1 copy:", more_than_one)
print("mod.{cpp,cu} useless:", useless, float(useless) / total * 100, "%") print("mod.cpp useless:", useless, float(useless) / total * 100, "%")
print("nb directory", len(dirs)) print("nb directory", len(dirs))
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论