'analyze' also finds newBooks

2022-02-11 17:37:23 +01:00 · 2022-02-11 17:37:23 +01:00 · bcec24fbf7
commit bcec24fbf7
parent 08fb19c6b9
1 changed files with 12 additions and 10 deletions
--- a/caliGraph.py
+++ b/caliGraph.py
@ -799,7 +799,7 @@ def addImageToNode(node, cache, shape='circularImage'):
    else:
        img = cache[name]
    if img:
-        node['imagePadding'] = '100px'
+        #node['imagePadding'] = '100px'
        node['image']=img
        node['shape']=shape

@ -992,7 +992,7 @@ def progress(G, minimum=3.5):
    print('Progress: '+str(perc)+'%')


-def analyze(G, type_name, name, dist=2.1):
+def analyze(G, books, type_name, name, dist=2.1):
    from fuzzywuzzy import fuzz
    type_ident = type_name[0]
    full_name = type_ident + "/" + name
@ -1009,6 +1009,8 @@ def analyze(G, type_name, name, dist=2.1):
    if bestRatio < 70:
        print("Best Match: "+match['label'])

+    findNewBooks(G, books, num=-1, minRecSco=1)
+
    menge = set()
    waveFlow(G, match, n, dist, menge)
    for n in list(G.nodes):
@ -1093,6 +1095,7 @@ def shell(G, books, mu, std):

 def newBooks(G, books, num, mu, std):
    removeBad(G, mu-std*2)
+    removeThinRecs(G, 2)
    findNewBooks(G, books, num, minRecSco = mu-std)
    removeUnread(G)
    removeUselessReadBooks(G)
@ -1107,9 +1110,7 @@ def newBooks(G, books, num, mu, std):
    addScoreToLabels(G)


-def findNewBooks(G, books, num, minRecSco=5):
-    removeBad(G, 0.1, groups=['recommender'])
-    removeThinRecs(G, 2)
+def findNewBooks(G, books, num=-1, minRecSco=5):
    mrbdf = pd.read_csv('mrb_db.csv')
    recs = []
    for n in list(G.nodes):
@ -1140,13 +1141,13 @@ def findNewBooks(G, books, num, minRecSco=5):
            scores = []
            for m in list(G.adj[n]):
                adj = G.nodes[m]
-                if adj['t'] == 'recommender':
+                if adj['t'] == 'recommender' and adj['score']!=None:
                    scores.append(adj['score'])
                    ses.append(adj['se'])
-            ses.append(min(ses))
-            if False and len(scores) < 2:
+            if not len(scores):
                G.remove_node(n)
            else:
+                ses.append(min(ses))
                node['fake_se'] = sum(ses)/(len(ses)**1.2) + 0.5 + 0.5 * (len(scores)==1) # This is not how SE works. DILLIGAF?
                node['score'] = sum(scores)/len(scores)*1.2 - node['fake_se']*2 + 0.5 - 0.1/math.sqrt(len(scores))
                if len(scores)==1:
@ -1154,6 +1155,7 @@ def findNewBooks(G, books, num, minRecSco=5):
                node['value'] = 20 + 5 * float(node['score'])
                node['label'] += " ({:.2f}±{:.1f})".format(node['score'], node['fake_se'])
                node['label'] += '\n ' + node['author']
+    if num!=-1:
        removeKeepBest(G, num, 10, 'newBook')

 # while batchSize is implemented, we only get a good gonvergence when we disable it (batchSize=-1)
@ -1354,7 +1356,7 @@ def cliInterface():
    elif args.cmd=="read":
        readBooksAnalysis(G, args.min_rating, args.all_tags, args.only_connected, not args.keep_top_lists)
    elif args.cmd=="analyze":
-        analyze(G, args.type, args.name, args.d)
+        analyze(G, books, args.type, args.name, args.d)
    elif args.cmd=="full":
        fullGraph(G, not args.keep_top_lists)
    elif args.cmd=="competence":