From 9a02bdc2a86e6c99dacb814db4f9ec332772853f Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Tue, 22 Feb 2022 15:22:36 +0100 Subject: [PATCH] Allow disabling pagerank --- caliGraph.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/caliGraph.py b/caliGraph.py index b1929af..97cde1f 100755 --- a/caliGraph.py +++ b/caliGraph.py @@ -427,7 +427,8 @@ def scoreUnread(G, globMu, globStd): neuralBins['sigma'] = [node['std']] neuralBins['median'] = [node['median']] neuralBins['se'] = [node['se']] - neuralBins['pagerank'] = [node['pagerank_score']] + if 'pagerank_score' in node: + neuralBins['pagerank'] = [node['pagerank_score']] if 'tgb_rank' in node: neuralBins['tgbrank'] = [10/math.ln10(10+node['tgb_rank'])] neuralBins['bias'] = [globMu] @@ -1199,7 +1200,7 @@ def findNewBooks(G, books, mu, num=-1, minRecSco=5): # while batchSize is implemented, we only get a good gonvergence when we disable it (batchSize=-1) # but might be necessary to enable later for a larger libary for better training performance... # maybe try again for 128 books? -def evaluateFitness(books, batchSize=16, debugPrint=False): +def evaluateFitness(books, batchSize=16, debugPrint=False, runPagerank=True): global weights G = buildBookGraph(books) graphAddAuthors(G, books) @@ -1207,7 +1208,8 @@ def evaluateFitness(books, batchSize=16, debugPrint=False): graphAddTopLists(G, books) graphAddSeries(G, books) graphAddTags(G, books) - runPagerank(G) + if runPagerank: + runPagerank(G) ratedBooks = [n for n in list(G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None] boundsLoss = 0 @@ -1216,7 +1218,7 @@ def evaluateFitness(books, batchSize=16, debugPrint=False): gradient = {} for w in weights: gradient[w] = 0 - mu, sigma = genScores(G, books) + mu, sigma = genScores(G, books, calcPagerank=runPagerank) batch = random.sample(ratedBooks, batchSize) if batchSize!=-1 and len(ratedBooks) > batchSize else ratedBooks for b in G.nodes: if b in ratedBooks: @@ -1248,7 +1250,7 @@ def evaluateFitness(books, batchSize=16, debugPrint=False): fit = sum(errSq)/len(errSq) + 0.001*regressionLoss return fit, gradient -def train(initGamma, full=True): +def train(initGamma, full=True, noPagerank=False): global weights if full: for wt in weights: @@ -1257,7 +1259,7 @@ def train(initGamma, full=True): gamma = initGamma books = loadBooksFromDB() bestWeights = copy.copy(weights) - mse, gradient = evaluateFitness(books) + mse, gradient = evaluateFitness(books, runPagerank=not noPagerank) delta = math.sqrt(sum(gradient[g]**2 for g in gradient)/len(gradient)) best_mse = mse stagLen = 0 @@ -1275,7 +1277,7 @@ def train(initGamma, full=True): weights[wt] += gamma*gradient[wt]/math.sqrt(delta) #else: # del weights[wt] - mse, gradient = evaluateFitness(books) + mse, gradient = evaluateFitness(books, runPagerank=not noPagerank) if mse < last_mse: gamma = gamma*1.25 else: @@ -1332,6 +1334,7 @@ def cliInterface(): parser.add_argument('--v3d', action="store_true") parser.add_argument('--imgs', action="store_true") parser.add_argument('--perf-test', action="store_true") + parser.add_argument('--no-pagerank', action="store_true") cmds = parser.add_subparsers(required=True, dest='cmd') p_rec = cmds.add_parser('recommend', description="TODO", aliases=['rec']) @@ -1371,12 +1374,13 @@ def cliInterface(): args = parser.parse_args() - if args.perfTest: + if args.perf_test: perfTestCLI(args) else: mainCLI(args) def perfTestCLI(args): + import time from pycallgraph import PyCallGraph from pycallgraph import Config from pycallgraph import GlobbingFilter @@ -1385,18 +1389,18 @@ def perfTestCLI(args): config.trace_filter = GlobbingFilter(exclude=[ "pycallgraph.*", ]) - with PyCallGraph(output=GraphvizOutput(output_file='perfTests/serve_httpd_' + str(int(time.time())) + '.png'), config=config): + with PyCallGraph(output=GraphvizOutput(output_file='perfTests/' + str(int(time.time())) + '.png'), config=config): mainCLI(args) def mainCLI(args): if args.cmd=="train": - train(args.g, args.full) + train(args.g, args.full, args.no_pagerank) exit() bestListT = 'book' G, books = buildFullGraph(darkMode=args.dark) - mu, std = genScores(G, books) + mu, std = genScores(G, books, calcPagerank=not args.no_pagerank) if not args.keep_whitepapers: removeWhitepapers(G)