From cc6606f468201d1875c09ba20219e4022bd781af Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Wed, 23 Jun 2021 15:45:32 +0200 Subject: [PATCH] Better Tag-Pruning (keep more good tags) --- caliGraph.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/caliGraph.py b/caliGraph.py index 6a07968..74f4276 100755 --- a/caliGraph.py +++ b/caliGraph.py @@ -28,11 +28,13 @@ def getRecommenders(book): for tag in book['tags']: if tag.find(" Recommendation") != -1: yield tag.replace(" Recommendation", "") + elif tag.find("s Literature Club") != -1: + yield tag.replace("s Literature Club", "") def getTags(book): for tag in book['tags']: - if tag.find(" Recommendation") == -1 and tag.find(" Top ") == -1: + if tag.find(" Recommendation") == -1 and tag.find("s Literature Club") == -1 and tag.find(" Top ") == -1: yield tag @@ -166,8 +168,14 @@ def pruneTags(G, minCons=2): foundCon = 0 for book in G.adj[n]: for con in G.adj[book]: - if G.nodes[con]['t'] not in ['tag', 'topList', 'series']: - foundCon += 1 + conType = G.nodes[con]['t'] + if conType not in ['topList']: + if conType in ['recommender']: + foundCon += 0.5 + elif conType in ['tag', 'series']: + foundCon += 0.25 + else: + foundCon += 1 if foundCon > minCons: G.remove_node(n) @@ -483,9 +491,9 @@ def recommendNBooks(G, mu, std, n): removeEdge(G) removeHighSpanTags(G, 9) removeDangling(G, alsoBooks=False) - pruneTags(G, 4) + pruneTags(G, 6) removeBad(G, mu, groups=['book']) - pruneTags(G, 3) + pruneTags(G, 4.25) pruneRecommenderCons(G, int(n/7)+1) pruneAuthorCons(G, int(n/15)) removeTopLists(G) @@ -544,7 +552,7 @@ def analyze(G, type_name, name, dist=2.7): print("Best Match: "+match['label']) menge = set() - pruneDist(G, match, n, dist, menge) + waveFlow(G, match, n, dist, menge) for n in list(G.nodes): if n not in menge: G.remove_node(n) @@ -560,7 +568,7 @@ def analyze(G, type_name, name, dist=2.7): addScoreToLabels(G) match['label'] = "*"+match['label']+"*" -def pruneDist(G, node, n, dist, menge, firstEdge=False): +def waveFlow(G, node, n, dist, menge, firstEdge=False): if dist <= 0: return dist -= 1 @@ -600,7 +608,7 @@ def pruneDist(G, node, n, dist, menge, firstEdge=False): for m in list(G.adj[n]): node = G.nodes[m] if node in bestlist or node in keeplist: - pruneDist(G, node, m, dist, menge, firstEdge=firstEdge) + waveFlow(G, node, m, dist, menge, firstEdge=firstEdge) def cliInterface(): import argparse