Better Tag-Pruning (keep more good tags)

2021-06-23 15:45:32 +02:00 · 2021-06-23 15:45:32 +02:00 · cc6606f468
commit cc6606f468
parent a8d9f96e70
1 changed files with 16 additions and 8 deletions
--- a/caliGraph.py
+++ b/caliGraph.py
@ -28,11 +28,13 @@ def getRecommenders(book):
    for tag in book['tags']:
        if tag.find(" Recommendation") != -1:
            yield tag.replace(" Recommendation", "")
        elif tag.find("s Literature Club") != -1:
            yield tag.replace("s Literature Club", "")
 def getTags(book):
    for tag in book['tags']:
-        if tag.find(" Recommendation") == -1 and tag.find(" Top ") == -1:
+        if tag.find(" Recommendation") == -1 and tag.find("s Literature Club") == -1 and tag.find(" Top ") == -1:
            yield tag
@ -166,7 +168,13 @@ def pruneTags(G, minCons=2):
            foundCon = 0
            for book in G.adj[n]:
                for con in G.adj[book]:
-                    if G.nodes[con]['t'] not in ['tag', 'topList', 'series']:
+                    conType = G.nodes[con]['t']
                    if conType not in ['topList']:
                        if conType in ['recommender']:
                            foundCon += 0.5
                        elif conType in ['tag', 'series']:
                            foundCon += 0.25
                        else:
                            foundCon += 1
            if foundCon > minCons:
                G.remove_node(n)
@ -483,9 +491,9 @@ def recommendNBooks(G, mu, std, n):
    removeEdge(G)
    removeHighSpanTags(G, 9)
    removeDangling(G, alsoBooks=False)
-    pruneTags(G, 4)
+    pruneTags(G, 6)
    removeBad(G, mu, groups=['book'])
-    pruneTags(G, 3)
+    pruneTags(G, 4.25)
    pruneRecommenderCons(G, int(n/7)+1)
    pruneAuthorCons(G, int(n/15))
    removeTopLists(G)
@ -544,7 +552,7 @@ def analyze(G, type_name, name, dist=2.7):
        print("Best Match: "+match['label'])
    menge = set()
-    pruneDist(G, match, n, dist, menge)
+    waveFlow(G, match, n, dist, menge)
    for n in list(G.nodes):
        if n not in menge:
            G.remove_node(n)
@ -560,7 +568,7 @@ def analyze(G, type_name, name, dist=2.7):
    addScoreToLabels(G)
    match['label'] = "*"+match['label']+"*"
-def pruneDist(G, node, n, dist, menge, firstEdge=False):
+def waveFlow(G, node, n, dist, menge, firstEdge=False):
    if dist <= 0:
        return
    dist -= 1
@ -600,7 +608,7 @@ def pruneDist(G, node, n, dist, menge, firstEdge=False):
    for m in list(G.adj[n]):
        node = G.nodes[m]
        if node in bestlist or node in keeplist:
-            pruneDist(G, node, m, dist, menge, firstEdge=firstEdge)
+            waveFlow(G, node, m, dist, menge, firstEdge=firstEdge)
 def cliInterface():
    import argparse