Implemented Neural Net + training

This commit is contained in:
Dominik Moritz Roth 2021-09-24 16:13:55 +02:00
parent 0231d97a42
commit cb0ad906eb
2 changed files with 106 additions and 21 deletions

View File

@ -2,6 +2,7 @@
import os import os
import json import json
import math import math
import copy
import random import random
import numpy as np import numpy as np
@ -78,7 +79,7 @@ def getAllTags(books):
return list(tags) return list(tags)
def getTopListWheight(book, topList): def getTopListWeight(book, topList):
minScope = 100000 minScope = 100000
for tag in book['tags']: for tag in book['tags']:
if tag.find(topList+" Top ") != -1: if tag.find(topList+" Top ") != -1:
@ -294,7 +295,24 @@ def removeUselessReadBooks(G):
else: # No unrated book in cousins else: # No unrated book in cousins
G.remove_node(n) G.remove_node(n)
def scoreOpinions(G, globMu, globStd, errorFac=-0.5): def removeUselessTags(G):
for n in list(G.nodes):
node = G.nodes[n]
if node['t'] == 'tag':
for adj in G.adj[n]:
foundUnread = True
adjNode = G.nodes[adj]
if adjNode['t']=='book' and 'score' in adjNode:
break
else: # No unrated book here
foundUnread = False
if foundUnread:
break
else: # No unrated book in cousins
G.remove_node(n)
def scoreOpinions(G, globMu, globStd, errorFac=0):
for n in list(G.nodes): for n in list(G.nodes):
node = G.nodes[n] node = G.nodes[n]
feedbacks = [] feedbacks = []
@ -319,7 +337,7 @@ def scoreOpinions(G, globMu, globStd, errorFac=-0.5):
def scoreUnread(G, globMu, globStd, errorFac=-0.6): def scoreUnread(G, globMu, globStd, errorFac=-0.6):
for n in list(G.nodes): for n in list(G.nodes):
feedbacks = [globMu] feedbacks = [globMu]
wheights = [getWheightForType('mu')] weights = [getWeightForType('mu')]
node = G.nodes[n] node = G.nodes[n]
if node['t'] == 'book': if node['t'] == 'book':
if node['rating'] == None: if node['rating'] == None:
@ -327,13 +345,15 @@ def scoreUnread(G, globMu, globStd, errorFac=-0.6):
for adj in adjacens: for adj in adjacens:
adjNode = G.nodes[adj] adjNode = G.nodes[adj]
if 'score' in adjNode and adjNode['score'] != None: if 'score' in adjNode and adjNode['score'] != None:
w = getWheightForType(adjNode['t'], G[n][adj]['wheight'] if 'wheight' in G[n][adj] else None) w = getWeightForType(adjNode['t'], G[n][adj]['weight'] if 'weight' in G[n][adj] else 1)
for fb in adjNode['feedbacks']: for fb in adjNode['feedbacks']:
feedbacks.append(fb) feedbacks.append(fb)
wheights.append(w) weights.append(w)
if len(feedbacks): if len(feedbacks):
node['meanUnweighted'], node['std'] = norm.fit(feedbacks) node['meanUnweighted'], node['std'] = norm.fit(feedbacks)
node['mean'] = sum([fb*w for fb, w in zip(feedbacks, wheights)])/len(feedbacks) feedbacks.append(node['std'])
weights.append(getWeightForType('sigma'))
node['mean'] = sum([fb*w for fb, w in zip(feedbacks, weights)])/len(feedbacks)
node['se'] = globStd / math.sqrt(len(feedbacks)) node['se'] = globStd / math.sqrt(len(feedbacks))
node['score'] = node['mean'] + errorFac*node['se'] node['score'] = node['mean'] + errorFac*node['se']
else: else:
@ -343,11 +363,13 @@ def scoreUnread(G, globMu, globStd, errorFac=-0.6):
node['score'] += 0.000000001 node['score'] += 0.000000001
# TODO: Make this neural and train it # TODO: Make this neural and train it
def getWheightForType(nodeType, edgeWheight=None): def getWeightForType(nodeType, edgeWeight=1):
global weights
w = weights[nodeType]
if nodeType == 'topList': if nodeType == 'topList':
return edgeWheight*0.5 return edgeWeight*w
else: else:
return 1.0 return w
def printBestList(G, num=-1): def printBestList(G, num=-1):
bestlist = [] bestlist = []
@ -423,7 +445,7 @@ def graphAddTopLists(G, books):
G.add_node('t/'+tl, color='yellow', t='topList', label=tl) G.add_node('t/'+tl, color='yellow', t='topList', label=tl)
for book in books: for book in books:
for top in getTopLists(book): for top in getTopLists(book):
G.add_edge('t/'+top, book['id'], wheight=getTopListWheight( G.add_edge('t/'+top, book['id'], weight=getTopListWeight(
book, top), color=readColor(book)) book, top), color=readColor(book))
return G return G
@ -520,25 +542,27 @@ def genScores(G, books):
def recommendNBooks(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True): def recommendNBooks(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True):
removeRestOfSeries(G) removeRestOfSeries(G)
removeBad(G, mu-std-1.5) removeBad(G, mu-std*2-1)
removeKeepBest(G, int(n*2) + 5, maxDistForRead=1.5) removeKeepBest(G, int(n*2) + 5, maxDistForRead=1.5*2)
removeEdge(G) removeEdge(G)
removeHighSpanTags(G, 9) removeHighSpanTags(G, 9)
removeDangling(G, alsoBooks=False) removeDangling(G, alsoBooks=False)
pruneTags(G, 6) pruneTags(G, 6*2)
removeBad(G, mu, groups=['book']) removeBad(G, mu, groups=['book'])
removeUselessReadBooks(G) removeUselessReadBooks(G)
pruneTags(G, 4.25) pruneTags(G, 4.25*2)
pruneRecommenderCons(G, int(n/7)+1) pruneRecommenderCons(G, int(n/7)+1)
pruneAuthorCons(G, int(n/15)) pruneAuthorCons(G, int(n/15))
removeUselessTags(G)
if removeTopListsB: if removeTopListsB:
removeTopLists(G) removeTopLists(G)
removeDangling(G, alsoBooks=True) removeDangling(G, alsoBooks=True)
removeKeepBest(G, n, maxDistForRead=0.75) removeKeepBest(G, n, maxDistForRead=0.75*2)
removeEdge(G) removeEdge(G)
removeDangling(G, alsoBooks=True) removeDangling(G, alsoBooks=True)
if removeUselessRecommenders: if removeUselessRecommenders:
removeUnusedRecommenders(G) removeUnusedRecommenders(G)
removeDangling(G, alsoBooks=True)
scaleBooksByRating(G) scaleBooksByRating(G)
scaleOpinionsByRating(G) scaleOpinionsByRating(G)
@ -650,6 +674,54 @@ def waveFlow(G, node, n, dist, menge, firstEdge=False):
if node in bestlist or node in keeplist: if node in bestlist or node in keeplist:
waveFlow(G, node, m, dist, menge, firstEdge=firstEdge) waveFlow(G, node, m, dist, menge, firstEdge=firstEdge)
def evaluateFitness():
G, books = buildFullGraph()
ratedBooks = [n for n in list(G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None]
errSq = []
for m in ratedBooks:
rating = G.nodes[m]['rating']
G.nodes[m]['rating'] = None
mu, std = genScores(G, books)
errSq.append((rating - G.nodes[m]['score'])**2)
G.nodes[m]['rating'] = rating
return sum(errSq) / len(errSq)
def train(gamma = 0.1):
global weights
bestWeights = copy.copy(weights)
best_mse = evaluateFitness()
w = list(weights.keys())
attr = random.choice(w)
delta = gamma * (-0.5 + (0.75 + 0.25*random.random()))
while True:
print({'mse': best_mse, 'w': weights, 'gamma': gamma})
weights = copy.copy(bestWeights)
weights[attr] += delta
mse = evaluateFitness()
if mse < best_mse: # got better
saveWeights(weights)
gamma *= 1.1
bestWeights = copy.copy(weights)
best_mse = mse
delta *= 2
if random.random() < 0.10:
attr = random.choice(w)
else:
weights = copy.copy(bestWeights)
gamma *= 0.8
attr = random.choice(w)
delta = gamma * (-0.5 + (0.75 + 0.25*random.random()))
def saveWeights(weights):
with open('neuralWeights.json', 'w') as f:
f.write(json.dumps(weights))
def loadWeights():
with open('neuralWeights.json', 'r') as f:
weights = json.loads(f.read())
return weights
def cliInterface(): def cliInterface():
import argparse import argparse
@ -677,18 +749,19 @@ def cliInterface():
p_show.add_argument('name', type=str) p_show.add_argument('name', type=str)
p_show.add_argument('-d', type=float, default=2.7, help='depth of expansion') p_show.add_argument('-d', type=float, default=2.7, help='depth of expansion')
p_train = cmds.add_parser('train', description="TODO", aliases=[])
p_train.add_argument('-g', type=float, default=0.1, help='learning rate gamma')
p_full = cmds.add_parser('full', description="TODO", aliases=[]) p_full = cmds.add_parser('full', description="TODO", aliases=[])
args = parser.parse_args() args = parser.parse_args()
if args.cmd=="train":
train(args.g)
exit()
G, books = buildFullGraph() G, books = buildFullGraph()
mu, std = genScores(G, books) mu, std = genScores(G, books)
if not args.keep_priv:
removePriv(G)
if args.remove_read:
removeRead(G)
elif args.remove_unread:
removeUnread(G)
if args.cmd=="recommend": if args.cmd=="recommend":
recommendNBooks(G, mu, std, args.n, not args.keep_top_lists, not args.keep_useless_recommenders) recommendNBooks(G, mu, std, args.n, not args.keep_top_lists, not args.keep_useless_recommenders)
@ -701,6 +774,15 @@ def cliInterface():
else: else:
raise Exception("Bad") raise Exception("Bad")
if not args.keep_priv:
removePriv(G)
if args.remove_read:
removeRead(G)
elif args.remove_unread:
removeUnread(G)
removeDangling(G, alsoBooks=True)
if args.remove_edge: if args.remove_edge:
removeEdge(G) removeEdge(G)
@ -709,5 +791,7 @@ def cliInterface():
if not args.no_web: if not args.no_web:
genAndShowHTML(G) genAndShowHTML(G)
weights = loadWeights()
if __name__ == "__main__": if __name__ == "__main__":
cliInterface() cliInterface()

1
neuralWeights.json Normal file
View File

@ -0,0 +1 @@
{'topList': 2.2007417599641768, 'recommender': 1.338876321897804, 'author': 2.1427159240782587, 'series': 1.0, 'tag': 1.0, 'mu': 1.3256374080828757, 'sigma': -0.9188502564501485}