提交 3a4474c8 authored 作者: Frederic Bastien's avatar Frederic Bastien

sort the histograph of keys and code file. Add an option to print the keys that…

sort the histograph of keys and code file. Add an option to print the keys that give the most frequently the same c code.
上级 c3c5d8a9
import pickle import cPickle
import os, sys import os, sys
import theano import theano
DISPLAY_DUPLICATE_KEYS = False DISPLAY_DUPLICATE_KEYS = False
DISPLAY_MOST_FREQUENT_DUPLICATE_CCODE = False
dirs = [] dirs = []
if len(sys.argv)>1: if len(sys.argv)>1:
...@@ -16,13 +17,13 @@ keys = {} # key -> nb seen ...@@ -16,13 +17,13 @@ keys = {} # key -> nb seen
mods = {} mods = {}
for dir in dirs: for dir in dirs:
key = None
try: try:
f = open(os.path.join(dir, "key.pkl")) f = open(os.path.join(dir, "key.pkl"))
key = f.read() key = f.read()
f.close() f.close()
keys.setdefault(key, 0) keys.setdefault(key, 0)
keys[key]+=1 keys[key]+=1
del key
del f del f
except IOError: except IOError:
#print dir, "don't have a key.pkl file" #print dir, "don't have a key.pkl file"
...@@ -34,8 +35,8 @@ for dir in dirs: ...@@ -34,8 +35,8 @@ for dir in dirs:
f = open(path) f = open(path)
mod = f.read() mod = f.read()
f.close() f.close()
mods.setdefault(mod, 0) mods.setdefault(mod, ())
mods[mod]+=1 mods[mod]+=(key,)
del mod del mod
del f del f
del path del path
...@@ -46,27 +47,42 @@ for dir in dirs: ...@@ -46,27 +47,42 @@ for dir in dirs:
if DISPLAY_DUPLICATE_KEYS: if DISPLAY_DUPLICATE_KEYS:
for k, v in keys.iteritems(): for k, v in keys.iteritems():
if v > 1: if v > 1:
print "Duplicate key (%i copies): %s" % (v, pickle.loads(k)) print "Duplicate key (%i copies): %s" % (v, cPickle.loads(k))
nbs = {} # nb seen -> now many key nbs_keys = {} # nb seen -> now many key
for val in keys.values(): for val in keys.values():
nbs.setdefault(val, 0) nbs_keys.setdefault(val, 0)
nbs[val]+=1 nbs_keys[val]+=1
print "key.pkl histograph" nbs_mod = {} # nb seen -> how many key
print nbs nbs_mod_to_key = {} #nb seen -> keys
nbs = {} # nb seen -> now many key
more_then_one = 0 more_then_one = 0
for val in mods.values(): for mod,kk in mods.iteritems():
nbs.setdefault(val, 0) val = len(kk)
nbs[val]+=1 nbs_mod.setdefault(val, 0)
nbs_mod[val]+=1
if val>1: if val>1:
more_then_one += 1 more_then_one += 1
nbs_mod_to_key[val] = kk
if DISPLAY_MOST_FREQUENT_DUPLICATE_CCODE:
m = max(nbs_mod.keys())
print "The keys associated to the mod.{cpp,cu} with the most number of copy:"
for kk in nbs_mod_to_key[m]:
kk = cPickle.loads(kk)
print kk
print "key.pkl histograph"
l = nbs_keys.items()
l.sort()
print l
print "mod.{cpp,cu} histogram" print "mod.{cpp,cu} histogram"
print nbs l = nbs_mod.items()
total = sum(mods.values()) l.sort()
print l
total = sum([len(k) for k in mods.values()])
uniq = len(mods) uniq = len(mods)
useless = total - uniq useless = total - uniq
print "mod.{cpp,cu} total:", total print "mod.{cpp,cu} total:", total
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论