From 612d8f296767531392740407f02c7704d2b4ab09 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Tue, 15 Jun 2021 14:23:49 +0200 Subject: [PATCH] Added pruning of connections from authors above theshold and tweaked some params --- main.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/main.py b/main.py index 1b80617..61b56c3 100644 --- a/main.py +++ b/main.py @@ -195,6 +195,30 @@ def pruneRecommenderCons(G, maxCons=5): if foundCon < 2: G.remove_node(m) +def pruneAuthorCons(G, maxCons=3): + for n in list(G.nodes): + node = G.nodes[n] + if node['t'] == 'author': + if len(G.adj[n]) > maxCons: + bestlist = [] + for m in list(G.adj[n]): + book = G.nodes[m] + if book['t'] == 'book': + if 'score' in book and book['score'] != None: + bestlist.append(book) + bestlist.sort(key=lambda node: node['score'], reverse=True) + bestlist = bestlist[:maxCons] + + for m in list(G.adj[n]): + book = G.nodes[m] + if book['t'] == 'book' and book not in bestlist or 'score' in book and book['score'] == None: + if not 'rating' in book or book['rating'] == None: + foundCon = 0 + for con in G.adj[m]: + if G.nodes[con]['t'] not in ['topList']: + foundCon += 1 + if foundCon < 2: + G.remove_node(m) def removeHighSpanTags(G, maxCons=5): for n in list(G.nodes): @@ -465,7 +489,8 @@ def recommendNBooks(n): pruneTags(G, 4) removeBad(G, mu, groups=['book']) pruneTags(G, 3) - pruneRecommenderCons(G, 5) + pruneRecommenderCons(G, int(n/7)+1) + pruneAuthorCons(G, int(n/15)) removeTopLists(G) removeDangling(G, alsoBooks=True) removeKeepBest(G, n, maxDistForRead=0.75)