From 9318811d8ae4ba0af3f58220b84a32919794304c Mon Sep 17 00:00:00 2001
From: Dominik Roth <dominik.roth.dev@gmail.com>
Date: Fri, 24 Sep 2021 17:13:36 +0200
Subject: [PATCH] Small tweaks to the scroring-algo and less calls to calibre
 when training

---
 caliGraph.py | 40 +++++++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/caliGraph.py b/caliGraph.py
index bdc2df5..89719c1 100755
--- a/caliGraph.py
+++ b/caliGraph.py
@@ -316,7 +316,7 @@ def scoreOpinions(G, globMu, globStd, errorFac=0):
     for n in list(G.nodes):
         node = G.nodes[n]
         feedbacks = []
-        if node['t'] in ['topList', 'recommender', 'author', 'series', 'tag']:
+        if node['t'] not in ['book']:
             adjacens = list(G.adj[n].keys())
             for adj in adjacens:
                 adjNode = G.nodes[adj]
@@ -351,10 +351,12 @@ def scoreUnread(G, globMu, globStd, errorFac=-0.6):
                             weights.append(w)
                 if len(feedbacks):
                     node['meanUnweighted'], node['std'] = norm.fit(feedbacks)
+                    node['se'] = globStd / math.sqrt(len(feedbacks))
                     feedbacks.append(node['std'])
                     weights.append(getWeightForType('sigma'))
+                    feedbacks.append(1-1/len(feedbacks))
+                    weights.append(getWeightForType('stability'))
                     node['mean'] = sum([fb*w for fb, w in zip(feedbacks, weights)])/len(feedbacks)
-                    node['se'] = globStd / math.sqrt(len(feedbacks))
                     node['score'] = node['mean'] + errorFac*node['se']
                 else:
                     node['score'] = globMu + errorFac*globStd + len(feedbacks)*0.0000000001
@@ -507,9 +509,9 @@ def addScoreToLabels(G):
                 node['label'] += " ("+str(node['rating'])+")"
             else:
                 if 'score' in node and node['score'] != None:
-                    node['label'] += " (~{:.2f}".format(node['score'])+")"
+                    node['label'] += " (~{:.2f}±{:.2f})".format(node['score'], node['std'])
                 else:
-                    node['label'] += " (~0)"
+                    node['label'] += " (~0±∞)"
 
 
 def genAndShowHTML(G, showButtons=False):
@@ -674,8 +676,14 @@ def waveFlow(G, node, n, dist, menge, firstEdge=False):
         if node in bestlist or node in keeplist:
             waveFlow(G, node, m, dist, menge, firstEdge=firstEdge)
 
-def evaluateFitness():
-    G, books = buildFullGraph()
+def evaluateFitness(books):
+    G = buildBookGraph(books)
+    graphAddAuthors(G, books)
+    graphAddRecommenders(G, books)
+    graphAddTopLists(G, books)
+    graphAddSeries(G, books)
+    graphAddTags(G, books)
+
     ratedBooks = [n for n in list(G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None]
     errSq = []
     for m in ratedBooks:
@@ -691,26 +699,28 @@ def evaluateFitness():
 
 def train(gamma = 0.1):
     global weights
+    books = loadBooksFromDB()
     bestWeights = copy.copy(weights)
-    best_mse = evaluateFitness()
+    best_mse = evaluateFitness(books)
     w = list(weights.keys())
     attr = random.choice(w)
     delta = gamma * (-0.5 + (0.75 + 0.25*random.random()))
 
-    while True:
+    while gamma > 1.0e-08:
         print({'mse': best_mse, 'w': weights, 'gamma': gamma})
         weights = copy.copy(bestWeights)
-        if gamma < 0.01 and random.random() < 0.5:
-            gamma = 0.01
-            weights[attr] = -1+random.random()*2
+        if gamma < 0.01:
+            while random.random() < 0.5:
+                attr = random.choice(w)
+                weights[attr] = -0.1+random.random()*1.5
         else:
             weights[attr] += delta
-        if attr not in ['sigma, mu']:
-            weights[attr] = min(max(0, weight[attr]), 1.5)
-        mse = evaluateFitness()
+        if attr not in ['sigma', 'mu', 'stability']:
+            weights[attr] = min(max(0, weights[attr]), 3)
+        mse = evaluateFitness(books)
         if mse < best_mse: # got better
             saveWeights(weights)
-            gamma *= 1.75
+            gamma = max(gamma*1.75, 0.001)
             bestWeights = copy.copy(weights)
             best_mse = mse
             delta *= 2