diff --git a/caliGraph.py b/caliGraph.py index ccd4bff..a2de9b4 100755 --- a/caliGraph.py +++ b/caliGraph.py @@ -799,7 +799,7 @@ def addImageToNode(node, cache, shape='circularImage'): else: img = cache[name] if img: - node['imagePadding'] = '100px' + #node['imagePadding'] = '100px' node['image']=img node['shape']=shape @@ -992,7 +992,7 @@ def progress(G, minimum=3.5): print('Progress: '+str(perc)+'%') -def analyze(G, type_name, name, dist=2.1): +def analyze(G, books, type_name, name, dist=2.1): from fuzzywuzzy import fuzz type_ident = type_name[0] full_name = type_ident + "/" + name @@ -1009,6 +1009,8 @@ def analyze(G, type_name, name, dist=2.1): if bestRatio < 70: print("Best Match: "+match['label']) + findNewBooks(G, books, num=-1, minRecSco=1) + menge = set() waveFlow(G, match, n, dist, menge) for n in list(G.nodes): @@ -1093,6 +1095,7 @@ def shell(G, books, mu, std): def newBooks(G, books, num, mu, std): removeBad(G, mu-std*2) + removeThinRecs(G, 2) findNewBooks(G, books, num, minRecSco = mu-std) removeUnread(G) removeUselessReadBooks(G) @@ -1107,9 +1110,7 @@ def newBooks(G, books, num, mu, std): addScoreToLabels(G) -def findNewBooks(G, books, num, minRecSco=5): - removeBad(G, 0.1, groups=['recommender']) - removeThinRecs(G, 2) +def findNewBooks(G, books, num=-1, minRecSco=5): mrbdf = pd.read_csv('mrb_db.csv') recs = [] for n in list(G.nodes): @@ -1140,13 +1141,13 @@ def findNewBooks(G, books, num, minRecSco=5): scores = [] for m in list(G.adj[n]): adj = G.nodes[m] - if adj['t'] == 'recommender': + if adj['t'] == 'recommender' and adj['score']!=None: scores.append(adj['score']) ses.append(adj['se']) - ses.append(min(ses)) - if False and len(scores) < 2: + if not len(scores): G.remove_node(n) else: + ses.append(min(ses)) node['fake_se'] = sum(ses)/(len(ses)**1.2) + 0.5 + 0.5 * (len(scores)==1) # This is not how SE works. DILLIGAF? node['score'] = sum(scores)/len(scores)*1.2 - node['fake_se']*2 + 0.5 - 0.1/math.sqrt(len(scores)) if len(scores)==1: @@ -1154,7 +1155,8 @@ def findNewBooks(G, books, num, minRecSco=5): node['value'] = 20 + 5 * float(node['score']) node['label'] += " ({:.2f}±{:.1f})".format(node['score'], node['fake_se']) node['label'] += '\n ' + node['author'] - removeKeepBest(G, num, 10, 'newBook') + if num!=-1: + removeKeepBest(G, num, 10, 'newBook') # while batchSize is implemented, we only get a good gonvergence when we disable it (batchSize=-1) # but might be necessary to enable later for a larger libary for better training performance... @@ -1354,7 +1356,7 @@ def cliInterface(): elif args.cmd=="read": readBooksAnalysis(G, args.min_rating, args.all_tags, args.only_connected, not args.keep_top_lists) elif args.cmd=="analyze": - analyze(G, args.type, args.name, args.d) + analyze(G, books, args.type, args.name, args.d) elif args.cmd=="full": fullGraph(G, not args.keep_top_lists) elif args.cmd=="competence":