From 33ba27e2d04922776bcdb25cfcb73856c1fdcbc5 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 31 Jan 2022 13:45:26 +0100 Subject: [PATCH] Implemented Pagerank --- caliGraph.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/caliGraph.py b/caliGraph.py index 4c2577b..51a68bf 100755 --- a/caliGraph.py +++ b/caliGraph.py @@ -407,6 +407,8 @@ def scoreUnread(G, globMu, globStd): if len(feedbacks): node['mean'], node['std'] = norm.fit(feedbacks) node['se'] = globStd / math.sqrt(len(feedbacks)) + feedbacks.append(node['pagerank_score']) + ws.append(['pagerank']) feedbacks.append(node['std']) ws.append(['sigma']) feedbacks.append(node['se']) @@ -477,6 +479,15 @@ def getKeywords(txt,rake): return k return [] +def runPagerank(G): + try: + scores = nx.pagerank(G=G) + except nx.exception.PowerIterationFailedConvergence: + print('[!] Could not calculate pagerank-scores: power iteration of the eigenvector calculation did not converge') + scores = {} + for n in list(G.nodes): + G.nodes[n]['pagerank_score'] = scores[n] if n in scores else 0 + def buildBookGraph(books, darkMode=False, extractKeywords=True, mergeTags=True): G = nx.Graph() if extractKeywords: @@ -622,6 +633,7 @@ def buildFullGraph(darkMode=False): graphAddTopLists(G, books, darkMode=darkMode) graphAddSeries(G, books, darkMode=darkMode) graphAddTags(G, books, darkMode=darkMode) + runPagerank(G) return G, books @@ -898,6 +910,7 @@ def evaluateFitness(books, debugPrint=False): graphAddTopLists(G, books) graphAddSeries(G, books) graphAddTags(G, books) + runPagerank(G) ratedBooks = [n for n in list(G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None] boundsLoss = 0 @@ -932,8 +945,8 @@ def evaluateFitness(books, debugPrint=False): for g in gradient: gradient[g] /= len(errSq) if debugPrint: - print(sum(errSq)/len(errSq), 0.005*regressionLoss, 0.2*boundsLoss/len(ratedBooks), 1.0*sum(linSepLoss)/len(linSepLoss)) - fit = sum(errSq)/len(errSq) + 0.005*regressionLoss + 0.2*boundsLoss/len(ratedBooks) - 1.0*sum(linSepLoss)/len(linSepLoss) + print(sum(errSq)/len(errSq), 0.003*regressionLoss, 0.2*boundsLoss/len(ratedBooks), 1.0*sum(linSepLoss)/len(linSepLoss)) + fit = sum(errSq)/len(errSq) + 0.003*regressionLoss + 0.2*boundsLoss/len(ratedBooks) - 1.0*sum(linSepLoss)/len(linSepLoss) return fit, gradient def train(initGamma, full=True):