From 9318811d8ae4ba0af3f58220b84a32919794304c Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Fri, 24 Sep 2021 17:13:36 +0200 Subject: [PATCH] Small tweaks to the scroring-algo and less calls to calibre when training --- caliGraph.py | 40 +++++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/caliGraph.py b/caliGraph.py index bdc2df5..89719c1 100755 --- a/caliGraph.py +++ b/caliGraph.py @@ -316,7 +316,7 @@ def scoreOpinions(G, globMu, globStd, errorFac=0): for n in list(G.nodes): node = G.nodes[n] feedbacks = [] - if node['t'] in ['topList', 'recommender', 'author', 'series', 'tag']: + if node['t'] not in ['book']: adjacens = list(G.adj[n].keys()) for adj in adjacens: adjNode = G.nodes[adj] @@ -351,10 +351,12 @@ def scoreUnread(G, globMu, globStd, errorFac=-0.6): weights.append(w) if len(feedbacks): node['meanUnweighted'], node['std'] = norm.fit(feedbacks) + node['se'] = globStd / math.sqrt(len(feedbacks)) feedbacks.append(node['std']) weights.append(getWeightForType('sigma')) + feedbacks.append(1-1/len(feedbacks)) + weights.append(getWeightForType('stability')) node['mean'] = sum([fb*w for fb, w in zip(feedbacks, weights)])/len(feedbacks) - node['se'] = globStd / math.sqrt(len(feedbacks)) node['score'] = node['mean'] + errorFac*node['se'] else: node['score'] = globMu + errorFac*globStd + len(feedbacks)*0.0000000001 @@ -507,9 +509,9 @@ def addScoreToLabels(G): node['label'] += " ("+str(node['rating'])+")" else: if 'score' in node and node['score'] != None: - node['label'] += " (~{:.2f}".format(node['score'])+")" + node['label'] += " (~{:.2f}±{:.2f})".format(node['score'], node['std']) else: - node['label'] += " (~0)" + node['label'] += " (~0±∞)" def genAndShowHTML(G, showButtons=False): @@ -674,8 +676,14 @@ def waveFlow(G, node, n, dist, menge, firstEdge=False): if node in bestlist or node in keeplist: waveFlow(G, node, m, dist, menge, firstEdge=firstEdge) -def evaluateFitness(): - G, books = buildFullGraph() +def evaluateFitness(books): + G = buildBookGraph(books) + graphAddAuthors(G, books) + graphAddRecommenders(G, books) + graphAddTopLists(G, books) + graphAddSeries(G, books) + graphAddTags(G, books) + ratedBooks = [n for n in list(G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None] errSq = [] for m in ratedBooks: @@ -691,26 +699,28 @@ def evaluateFitness(): def train(gamma = 0.1): global weights + books = loadBooksFromDB() bestWeights = copy.copy(weights) - best_mse = evaluateFitness() + best_mse = evaluateFitness(books) w = list(weights.keys()) attr = random.choice(w) delta = gamma * (-0.5 + (0.75 + 0.25*random.random())) - while True: + while gamma > 1.0e-08: print({'mse': best_mse, 'w': weights, 'gamma': gamma}) weights = copy.copy(bestWeights) - if gamma < 0.01 and random.random() < 0.5: - gamma = 0.01 - weights[attr] = -1+random.random()*2 + if gamma < 0.01: + while random.random() < 0.5: + attr = random.choice(w) + weights[attr] = -0.1+random.random()*1.5 else: weights[attr] += delta - if attr not in ['sigma, mu']: - weights[attr] = min(max(0, weight[attr]), 1.5) - mse = evaluateFitness() + if attr not in ['sigma', 'mu', 'stability']: + weights[attr] = min(max(0, weights[attr]), 3) + mse = evaluateFitness(books) if mse < best_mse: # got better saveWeights(weights) - gamma *= 1.75 + gamma = max(gamma*1.75, 0.001) bestWeights = copy.copy(weights) best_mse = mse delta *= 2