From 9a02bdc2a86e6c99dacb814db4f9ec332772853f Mon Sep 17 00:00:00 2001
From: Dominik Roth <dominik.roth.dev@gmail.com>
Date: Tue, 22 Feb 2022 15:22:36 +0100
Subject: [PATCH] Allow disabling pagerank

---
 caliGraph.py | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/caliGraph.py b/caliGraph.py
index b1929af..97cde1f 100755
--- a/caliGraph.py
+++ b/caliGraph.py
@@ -427,7 +427,8 @@ def scoreUnread(G, globMu, globStd):
                 neuralBins['sigma'] = [node['std']]
                 neuralBins['median'] = [node['median']]
                 neuralBins['se'] = [node['se']]
-                neuralBins['pagerank'] = [node['pagerank_score']]
+                if 'pagerank_score' in node:
+                    neuralBins['pagerank'] = [node['pagerank_score']]
                 if 'tgb_rank' in node:
                     neuralBins['tgbrank'] = [10/math.ln10(10+node['tgb_rank'])]
                 neuralBins['bias'] = [globMu]
@@ -1199,7 +1200,7 @@ def findNewBooks(G, books, mu, num=-1, minRecSco=5):
 # while batchSize is implemented, we only get a good gonvergence when we disable it (batchSize=-1)
 # but might be necessary to enable later for a larger libary for better training performance...
 # maybe try again for 128 books?
-def evaluateFitness(books, batchSize=16, debugPrint=False):
+def evaluateFitness(books, batchSize=16, debugPrint=False, runPagerank=True):
     global weights
     G = buildBookGraph(books)
     graphAddAuthors(G, books)
@@ -1207,7 +1208,8 @@ def evaluateFitness(books, batchSize=16, debugPrint=False):
     graphAddTopLists(G, books)
     graphAddSeries(G, books)
     graphAddTags(G, books)
-    runPagerank(G)
+    if runPagerank:
+        runPagerank(G)
 
     ratedBooks = [n for n in list(G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None]
     boundsLoss = 0
@@ -1216,7 +1218,7 @@ def evaluateFitness(books, batchSize=16, debugPrint=False):
     gradient = {}
     for w in weights:
         gradient[w] = 0
-    mu, sigma = genScores(G, books)
+    mu, sigma = genScores(G, books, calcPagerank=runPagerank)
     batch = random.sample(ratedBooks, batchSize) if batchSize!=-1 and len(ratedBooks) > batchSize else ratedBooks
     for b in G.nodes:
         if b in ratedBooks:
@@ -1248,7 +1250,7 @@ def evaluateFitness(books, batchSize=16, debugPrint=False):
     fit = sum(errSq)/len(errSq) + 0.001*regressionLoss
     return fit, gradient
 
-def train(initGamma, full=True):
+def train(initGamma, full=True, noPagerank=False):
     global weights
     if full:
         for wt in weights:
@@ -1257,7 +1259,7 @@ def train(initGamma, full=True):
     gamma = initGamma
     books = loadBooksFromDB()
     bestWeights = copy.copy(weights)
-    mse, gradient = evaluateFitness(books)
+    mse, gradient = evaluateFitness(books, runPagerank=not noPagerank)
     delta = math.sqrt(sum(gradient[g]**2 for g in gradient)/len(gradient))
     best_mse = mse
     stagLen = 0
@@ -1275,7 +1277,7 @@ def train(initGamma, full=True):
                 weights[wt] += gamma*gradient[wt]/math.sqrt(delta)
             #else:
             #    del weights[wt]
-        mse, gradient = evaluateFitness(books)
+        mse, gradient = evaluateFitness(books, runPagerank=not noPagerank)
         if mse < last_mse:
             gamma = gamma*1.25
         else:
@@ -1332,6 +1334,7 @@ def cliInterface():
     parser.add_argument('--v3d', action="store_true")
     parser.add_argument('--imgs', action="store_true")
     parser.add_argument('--perf-test', action="store_true")
+    parser.add_argument('--no-pagerank', action="store_true")
     cmds = parser.add_subparsers(required=True, dest='cmd')
 
     p_rec = cmds.add_parser('recommend', description="TODO", aliases=['rec'])
@@ -1371,12 +1374,13 @@ def cliInterface():
 
     args = parser.parse_args()
 
-    if args.perfTest:
+    if args.perf_test:
         perfTestCLI(args)
     else:
         mainCLI(args)
 
 def perfTestCLI(args):
+    import time
     from pycallgraph import PyCallGraph
     from pycallgraph import Config
     from pycallgraph import GlobbingFilter
@@ -1385,18 +1389,18 @@ def perfTestCLI(args):
     config.trace_filter = GlobbingFilter(exclude=[
         "pycallgraph.*",
     ])
-    with PyCallGraph(output=GraphvizOutput(output_file='perfTests/serve_httpd_' + str(int(time.time())) + '.png'), config=config):
+    with PyCallGraph(output=GraphvizOutput(output_file='perfTests/' + str(int(time.time())) + '.png'), config=config):
         mainCLI(args)
 
 def mainCLI(args):
     if args.cmd=="train":
-        train(args.g, args.full)
+        train(args.g, args.full, args.no_pagerank)
         exit()
 
     bestListT = 'book'
 
     G, books = buildFullGraph(darkMode=args.dark)
-    mu, std = genScores(G, books)
+    mu, std = genScores(G, books, calcPagerank=not args.no_pagerank)
 
     if not args.keep_whitepapers:
         removeWhitepapers(G)