'analyze' also finds newBooks

This commit is contained in:
Dominik Moritz Roth 2022-02-11 17:37:23 +01:00
parent 08fb19c6b9
commit bcec24fbf7

View File

@ -799,7 +799,7 @@ def addImageToNode(node, cache, shape='circularImage'):
else: else:
img = cache[name] img = cache[name]
if img: if img:
node['imagePadding'] = '100px' #node['imagePadding'] = '100px'
node['image']=img node['image']=img
node['shape']=shape node['shape']=shape
@ -992,7 +992,7 @@ def progress(G, minimum=3.5):
print('Progress: '+str(perc)+'%') print('Progress: '+str(perc)+'%')
def analyze(G, type_name, name, dist=2.1): def analyze(G, books, type_name, name, dist=2.1):
from fuzzywuzzy import fuzz from fuzzywuzzy import fuzz
type_ident = type_name[0] type_ident = type_name[0]
full_name = type_ident + "/" + name full_name = type_ident + "/" + name
@ -1009,6 +1009,8 @@ def analyze(G, type_name, name, dist=2.1):
if bestRatio < 70: if bestRatio < 70:
print("Best Match: "+match['label']) print("Best Match: "+match['label'])
findNewBooks(G, books, num=-1, minRecSco=1)
menge = set() menge = set()
waveFlow(G, match, n, dist, menge) waveFlow(G, match, n, dist, menge)
for n in list(G.nodes): for n in list(G.nodes):
@ -1093,6 +1095,7 @@ def shell(G, books, mu, std):
def newBooks(G, books, num, mu, std): def newBooks(G, books, num, mu, std):
removeBad(G, mu-std*2) removeBad(G, mu-std*2)
removeThinRecs(G, 2)
findNewBooks(G, books, num, minRecSco = mu-std) findNewBooks(G, books, num, minRecSco = mu-std)
removeUnread(G) removeUnread(G)
removeUselessReadBooks(G) removeUselessReadBooks(G)
@ -1107,9 +1110,7 @@ def newBooks(G, books, num, mu, std):
addScoreToLabels(G) addScoreToLabels(G)
def findNewBooks(G, books, num, minRecSco=5): def findNewBooks(G, books, num=-1, minRecSco=5):
removeBad(G, 0.1, groups=['recommender'])
removeThinRecs(G, 2)
mrbdf = pd.read_csv('mrb_db.csv') mrbdf = pd.read_csv('mrb_db.csv')
recs = [] recs = []
for n in list(G.nodes): for n in list(G.nodes):
@ -1140,13 +1141,13 @@ def findNewBooks(G, books, num, minRecSco=5):
scores = [] scores = []
for m in list(G.adj[n]): for m in list(G.adj[n]):
adj = G.nodes[m] adj = G.nodes[m]
if adj['t'] == 'recommender': if adj['t'] == 'recommender' and adj['score']!=None:
scores.append(adj['score']) scores.append(adj['score'])
ses.append(adj['se']) ses.append(adj['se'])
ses.append(min(ses)) if not len(scores):
if False and len(scores) < 2:
G.remove_node(n) G.remove_node(n)
else: else:
ses.append(min(ses))
node['fake_se'] = sum(ses)/(len(ses)**1.2) + 0.5 + 0.5 * (len(scores)==1) # This is not how SE works. DILLIGAF? node['fake_se'] = sum(ses)/(len(ses)**1.2) + 0.5 + 0.5 * (len(scores)==1) # This is not how SE works. DILLIGAF?
node['score'] = sum(scores)/len(scores)*1.2 - node['fake_se']*2 + 0.5 - 0.1/math.sqrt(len(scores)) node['score'] = sum(scores)/len(scores)*1.2 - node['fake_se']*2 + 0.5 - 0.1/math.sqrt(len(scores))
if len(scores)==1: if len(scores)==1:
@ -1154,7 +1155,8 @@ def findNewBooks(G, books, num, minRecSco=5):
node['value'] = 20 + 5 * float(node['score']) node['value'] = 20 + 5 * float(node['score'])
node['label'] += " ({:.2f}±{:.1f})".format(node['score'], node['fake_se']) node['label'] += " ({:.2f}±{:.1f})".format(node['score'], node['fake_se'])
node['label'] += '\n ' + node['author'] node['label'] += '\n ' + node['author']
removeKeepBest(G, num, 10, 'newBook') if num!=-1:
removeKeepBest(G, num, 10, 'newBook')
# while batchSize is implemented, we only get a good gonvergence when we disable it (batchSize=-1) # while batchSize is implemented, we only get a good gonvergence when we disable it (batchSize=-1)
# but might be necessary to enable later for a larger libary for better training performance... # but might be necessary to enable later for a larger libary for better training performance...
@ -1354,7 +1356,7 @@ def cliInterface():
elif args.cmd=="read": elif args.cmd=="read":
readBooksAnalysis(G, args.min_rating, args.all_tags, args.only_connected, not args.keep_top_lists) readBooksAnalysis(G, args.min_rating, args.all_tags, args.only_connected, not args.keep_top_lists)
elif args.cmd=="analyze": elif args.cmd=="analyze":
analyze(G, args.type, args.name, args.d) analyze(G, books, args.type, args.name, args.d)
elif args.cmd=="full": elif args.cmd=="full":
fullGraph(G, not args.keep_top_lists) fullGraph(G, not args.keep_top_lists)
elif args.cmd=="competence": elif args.cmd=="competence":