Approximate Pagerank while training (+performance) and implemented

batch-training (disabled do to non-convergence)
2022-02-04 20:34:59 +01:00 · 2022-02-04 20:34:59 +01:00 · 5a229a4b2b
commit 5a229a4b2b
parent aa2e5a41bb
2 changed files with 12 additions and 6 deletions
--- a/caliGraph.py
+++ b/caliGraph.py
@ -640,9 +640,10 @@ def buildFullGraph(darkMode=False):
    return G, books


-def genScores(G, books):
+def genScores(G, books, calcPagerank=True):
    globMu, globStd = calcRecDist(G, books)
-    runPagerank(G)
+    if calcPagerank:
+        runPagerank(G)
    scoreOpinions(G, globMu, globStd)
    scoreUnread(G, globMu, globStd)
    return globMu, globStd
@ -906,7 +907,10 @@ def shell(G, books, mu, std):
    from ptpython.repl import embed
    embed(globals(), locals())

-def evaluateFitness(books, debugPrint=False):
+# while batchSize is implemented, we only get a good gonvergence when we disable it (batchSize=-1)
+# but might be necessary to enable later for a larger libary for better training performance...
+# maybe try again for 128 books?
+def evaluateFitness(books, batchSize=-1, debugPrint=False):
    global weights
    G = buildBookGraph(books)
    graphAddAuthors(G, books)
@ -914,6 +918,7 @@ def evaluateFitness(books, debugPrint=False):
    graphAddTopLists(G, books)
    graphAddSeries(G, books)
    graphAddTags(G, books)
+    runPagerank(G)

    ratedBooks = [n for n in list(G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None]
    boundsLoss = 0
@ -924,10 +929,11 @@ def evaluateFitness(books, debugPrint=False):
        gradient[wt] = 0
    mu, sigma = genScores(G, books)
    for b in G.nodes:
-        if b in ratedBooks:
+        batch = random.sample(ratedBooks, batchSize) if batchSize!=-1 and len(ratedBooks) > batchSize else ratedBooks
+        if b in batch:
            rating = G.nodes[b]['rating']
            G.nodes[b]['rating'] = None
-            _, _ = genScores(G, books)
+            _, _ = genScores(G, books, calcPagerank=False)
            if G.nodes[b]['score'] > rating: # over estimated
                errSq.append(((rating - G.nodes[b]['score'])**2)*2)
            else:
--- a/neuralWeights.json
+++ b/neuralWeights.json
@ -1 +1 @@
-{"topList": 0.6242390366079424, "recommender": 0.19944547091869327, "author": 0.5538234948661109, "series": 0.3459804141050738, "tag": 0.014690671910647879, "pagerank": 0.19122234015715153, "mu": 0.6727738107799146, "sigma": 0.6261661745577459, "bias": 0.10501847188587837}
+{"topList": 0.5830829472833505, "recommender": 0.1853397829569056, "author": 0.68263573164139, "series": 0.3041545091933662, "tag": 0.007576290358844896, "pagerank": 0.07216203473617633, "mu": 0.8203785554750931, "sigma": 0.5483459224046601, "bias": 0.1280901837325877}