From cb0ad906ebb9302b1138018598095abb51499432 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Fri, 24 Sep 2021 16:13:55 +0200 Subject: [PATCH] Implemented Neural Net + training --- caliGraph.py | 126 +++++++++++++++++++++++++++++++++++++-------- neuralWeights.json | 1 + 2 files changed, 106 insertions(+), 21 deletions(-) create mode 100644 neuralWeights.json diff --git a/caliGraph.py b/caliGraph.py index 740ae05..6e08c59 100755 --- a/caliGraph.py +++ b/caliGraph.py @@ -2,6 +2,7 @@ import os import json import math +import copy import random import numpy as np @@ -78,7 +79,7 @@ def getAllTags(books): return list(tags) -def getTopListWheight(book, topList): +def getTopListWeight(book, topList): minScope = 100000 for tag in book['tags']: if tag.find(topList+" Top ") != -1: @@ -294,7 +295,24 @@ def removeUselessReadBooks(G): else: # No unrated book in cousins G.remove_node(n) -def scoreOpinions(G, globMu, globStd, errorFac=-0.5): +def removeUselessTags(G): + for n in list(G.nodes): + node = G.nodes[n] + if node['t'] == 'tag': + for adj in G.adj[n]: + foundUnread = True + adjNode = G.nodes[adj] + if adjNode['t']=='book' and 'score' in adjNode: + break + else: # No unrated book here + foundUnread = False + if foundUnread: + break + else: # No unrated book in cousins + G.remove_node(n) + + +def scoreOpinions(G, globMu, globStd, errorFac=0): for n in list(G.nodes): node = G.nodes[n] feedbacks = [] @@ -319,7 +337,7 @@ def scoreOpinions(G, globMu, globStd, errorFac=-0.5): def scoreUnread(G, globMu, globStd, errorFac=-0.6): for n in list(G.nodes): feedbacks = [globMu] - wheights = [getWheightForType('mu')] + weights = [getWeightForType('mu')] node = G.nodes[n] if node['t'] == 'book': if node['rating'] == None: @@ -327,13 +345,15 @@ def scoreUnread(G, globMu, globStd, errorFac=-0.6): for adj in adjacens: adjNode = G.nodes[adj] if 'score' in adjNode and adjNode['score'] != None: - w = getWheightForType(adjNode['t'], G[n][adj]['wheight'] if 'wheight' in G[n][adj] else None) + w = getWeightForType(adjNode['t'], G[n][adj]['weight'] if 'weight' in G[n][adj] else 1) for fb in adjNode['feedbacks']: feedbacks.append(fb) - wheights.append(w) + weights.append(w) if len(feedbacks): node['meanUnweighted'], node['std'] = norm.fit(feedbacks) - node['mean'] = sum([fb*w for fb, w in zip(feedbacks, wheights)])/len(feedbacks) + feedbacks.append(node['std']) + weights.append(getWeightForType('sigma')) + node['mean'] = sum([fb*w for fb, w in zip(feedbacks, weights)])/len(feedbacks) node['se'] = globStd / math.sqrt(len(feedbacks)) node['score'] = node['mean'] + errorFac*node['se'] else: @@ -343,11 +363,13 @@ def scoreUnread(G, globMu, globStd, errorFac=-0.6): node['score'] += 0.000000001 # TODO: Make this neural and train it -def getWheightForType(nodeType, edgeWheight=None): +def getWeightForType(nodeType, edgeWeight=1): + global weights + w = weights[nodeType] if nodeType == 'topList': - return edgeWheight*0.5 + return edgeWeight*w else: - return 1.0 + return w def printBestList(G, num=-1): bestlist = [] @@ -423,7 +445,7 @@ def graphAddTopLists(G, books): G.add_node('t/'+tl, color='yellow', t='topList', label=tl) for book in books: for top in getTopLists(book): - G.add_edge('t/'+top, book['id'], wheight=getTopListWheight( + G.add_edge('t/'+top, book['id'], weight=getTopListWeight( book, top), color=readColor(book)) return G @@ -520,25 +542,27 @@ def genScores(G, books): def recommendNBooks(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True): removeRestOfSeries(G) - removeBad(G, mu-std-1.5) - removeKeepBest(G, int(n*2) + 5, maxDistForRead=1.5) + removeBad(G, mu-std*2-1) + removeKeepBest(G, int(n*2) + 5, maxDistForRead=1.5*2) removeEdge(G) removeHighSpanTags(G, 9) removeDangling(G, alsoBooks=False) - pruneTags(G, 6) + pruneTags(G, 6*2) removeBad(G, mu, groups=['book']) removeUselessReadBooks(G) - pruneTags(G, 4.25) + pruneTags(G, 4.25*2) pruneRecommenderCons(G, int(n/7)+1) pruneAuthorCons(G, int(n/15)) + removeUselessTags(G) if removeTopListsB: removeTopLists(G) removeDangling(G, alsoBooks=True) - removeKeepBest(G, n, maxDistForRead=0.75) + removeKeepBest(G, n, maxDistForRead=0.75*2) removeEdge(G) removeDangling(G, alsoBooks=True) if removeUselessRecommenders: removeUnusedRecommenders(G) + removeDangling(G, alsoBooks=True) scaleBooksByRating(G) scaleOpinionsByRating(G) @@ -650,6 +674,54 @@ def waveFlow(G, node, n, dist, menge, firstEdge=False): if node in bestlist or node in keeplist: waveFlow(G, node, m, dist, menge, firstEdge=firstEdge) +def evaluateFitness(): + G, books = buildFullGraph() + ratedBooks = [n for n in list(G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None] + errSq = [] + for m in ratedBooks: + rating = G.nodes[m]['rating'] + G.nodes[m]['rating'] = None + mu, std = genScores(G, books) + errSq.append((rating - G.nodes[m]['score'])**2) + G.nodes[m]['rating'] = rating + return sum(errSq) / len(errSq) + +def train(gamma = 0.1): + global weights + bestWeights = copy.copy(weights) + best_mse = evaluateFitness() + w = list(weights.keys()) + attr = random.choice(w) + delta = gamma * (-0.5 + (0.75 + 0.25*random.random())) + + while True: + print({'mse': best_mse, 'w': weights, 'gamma': gamma}) + weights = copy.copy(bestWeights) + weights[attr] += delta + mse = evaluateFitness() + if mse < best_mse: # got better + saveWeights(weights) + gamma *= 1.1 + bestWeights = copy.copy(weights) + best_mse = mse + delta *= 2 + if random.random() < 0.10: + attr = random.choice(w) + else: + weights = copy.copy(bestWeights) + gamma *= 0.8 + attr = random.choice(w) + delta = gamma * (-0.5 + (0.75 + 0.25*random.random())) + +def saveWeights(weights): + with open('neuralWeights.json', 'w') as f: + f.write(json.dumps(weights)) + +def loadWeights(): + with open('neuralWeights.json', 'r') as f: + weights = json.loads(f.read()) + return weights + def cliInterface(): import argparse @@ -677,18 +749,19 @@ def cliInterface(): p_show.add_argument('name', type=str) p_show.add_argument('-d', type=float, default=2.7, help='depth of expansion') + p_train = cmds.add_parser('train', description="TODO", aliases=[]) + p_train.add_argument('-g', type=float, default=0.1, help='learning rate gamma') + p_full = cmds.add_parser('full', description="TODO", aliases=[]) args = parser.parse_args() + if args.cmd=="train": + train(args.g) + exit() + G, books = buildFullGraph() mu, std = genScores(G, books) - if not args.keep_priv: - removePriv(G) - if args.remove_read: - removeRead(G) - elif args.remove_unread: - removeUnread(G) if args.cmd=="recommend": recommendNBooks(G, mu, std, args.n, not args.keep_top_lists, not args.keep_useless_recommenders) @@ -701,6 +774,15 @@ def cliInterface(): else: raise Exception("Bad") + if not args.keep_priv: + removePriv(G) + if args.remove_read: + removeRead(G) + elif args.remove_unread: + removeUnread(G) + + removeDangling(G, alsoBooks=True) + if args.remove_edge: removeEdge(G) @@ -709,5 +791,7 @@ def cliInterface(): if not args.no_web: genAndShowHTML(G) + +weights = loadWeights() if __name__ == "__main__": cliInterface() diff --git a/neuralWeights.json b/neuralWeights.json new file mode 100644 index 0000000..0637a91 --- /dev/null +++ b/neuralWeights.json @@ -0,0 +1 @@ +{'topList': 2.2007417599641768, 'recommender': 1.338876321897804, 'author': 2.1427159240782587, 'series': 1.0, 'tag': 1.0, 'mu': 1.3256374080828757, 'sigma': -0.9188502564501485}