Implemented Neural Net + training

2021-09-24 16:13:55 +02:00 · 2021-09-24 16:13:55 +02:00 · cb0ad906eb
commit cb0ad906eb
parent 0231d97a42
2 changed files with 106 additions and 21 deletions
--- a/caliGraph.py
+++ b/caliGraph.py
@ -2,6 +2,7 @@
 import os
 import json
 import math
 import copy
 import random
 import numpy as np
@ -78,7 +79,7 @@ def getAllTags(books):
    return list(tags)
-def getTopListWheight(book, topList):
+def getTopListWeight(book, topList):
    minScope = 100000
    for tag in book['tags']:
        if tag.find(topList+" Top ") != -1:
@ -294,7 +295,24 @@ def removeUselessReadBooks(G):
            else: # No unrated book in cousins
                G.remove_node(n)
-def scoreOpinions(G, globMu, globStd, errorFac=-0.5):
+def removeUselessTags(G):
    for n in list(G.nodes):
        node = G.nodes[n]
        if node['t'] == 'tag':
            for adj in G.adj[n]:
                foundUnread = True
                adjNode = G.nodes[adj]
                if adjNode['t']=='book' and 'score' in adjNode:
                    break
                else: # No unrated book here
                    foundUnread = False
                if foundUnread:
                    break
            else: # No unrated book in cousins
                G.remove_node(n)
 def scoreOpinions(G, globMu, globStd, errorFac=0):
    for n in list(G.nodes):
        node = G.nodes[n]
        feedbacks = []
@ -319,7 +337,7 @@ def scoreOpinions(G, globMu, globStd, errorFac=-0.5):
 def scoreUnread(G, globMu, globStd, errorFac=-0.6):
    for n in list(G.nodes):
        feedbacks = [globMu]
-        wheights = [getWheightForType('mu')]
+        weights = [getWeightForType('mu')]
        node = G.nodes[n]
        if node['t'] == 'book':
            if node['rating'] == None:
@ -327,13 +345,15 @@ def scoreUnread(G, globMu, globStd, errorFac=-0.6):
                for adj in adjacens:
                    adjNode = G.nodes[adj]
                    if 'score' in adjNode and adjNode['score'] != None:
-                        w = getWheightForType(adjNode['t'], G[n][adj]['wheight'] if 'wheight' in G[n][adj] else None)
+                        w = getWeightForType(adjNode['t'], G[n][adj]['weight'] if 'weight' in G[n][adj] else 1)
                        for fb in adjNode['feedbacks']:
                            feedbacks.append(fb)
-                            wheights.append(w)
+                            weights.append(w)
                if len(feedbacks):
                    node['meanUnweighted'], node['std'] = norm.fit(feedbacks)
-                    node['mean'] = sum([fb*w for fb, w in zip(feedbacks, wheights)])/len(feedbacks)
+                    feedbacks.append(node['std'])
                    weights.append(getWeightForType('sigma'))
                    node['mean'] = sum([fb*w for fb, w in zip(feedbacks, weights)])/len(feedbacks)
                    node['se'] = globStd / math.sqrt(len(feedbacks))
                    node['score'] = node['mean'] + errorFac*node['se']
                else:
@ -343,11 +363,13 @@ def scoreUnread(G, globMu, globStd, errorFac=-0.6):
                        node['score'] += 0.000000001
 # TODO: Make this neural and train it
-def getWheightForType(nodeType, edgeWheight=None):
+def getWeightForType(nodeType, edgeWeight=1):
    global weights
    w = weights[nodeType]
    if nodeType == 'topList':
-        return edgeWheight*0.5
+        return edgeWeight*w
    else:
-        return 1.0
+        return w
 def printBestList(G, num=-1):
    bestlist = []
@ -423,7 +445,7 @@ def graphAddTopLists(G, books):
        G.add_node('t/'+tl, color='yellow', t='topList', label=tl)
    for book in books:
        for top in getTopLists(book):
-            G.add_edge('t/'+top, book['id'], wheight=getTopListWheight(
+            G.add_edge('t/'+top, book['id'], weight=getTopListWeight(
                book, top), color=readColor(book))
    return G
@ -520,25 +542,27 @@ def genScores(G, books):
 def recommendNBooks(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True):
    removeRestOfSeries(G)
-    removeBad(G, mu-std-1.5)
+    removeBad(G, mu-std*2-1)
-    removeKeepBest(G, int(n*2) + 5, maxDistForRead=1.5)
+    removeKeepBest(G, int(n*2) + 5, maxDistForRead=1.5*2)
    removeEdge(G)
    removeHighSpanTags(G, 9)
    removeDangling(G, alsoBooks=False)
-    pruneTags(G, 6)
+    pruneTags(G, 6*2)
    removeBad(G, mu, groups=['book'])
    removeUselessReadBooks(G)
-    pruneTags(G, 4.25)
+    pruneTags(G, 4.25*2)
    pruneRecommenderCons(G, int(n/7)+1)
    pruneAuthorCons(G, int(n/15))
    removeUselessTags(G)
    if removeTopListsB:
        removeTopLists(G)
    removeDangling(G, alsoBooks=True)
-    removeKeepBest(G, n, maxDistForRead=0.75)
+    removeKeepBest(G, n, maxDistForRead=0.75*2)
    removeEdge(G)
    removeDangling(G, alsoBooks=True)
    if removeUselessRecommenders:
        removeUnusedRecommenders(G)
        removeDangling(G, alsoBooks=True)
    scaleBooksByRating(G)
    scaleOpinionsByRating(G)
@ -650,6 +674,54 @@ def waveFlow(G, node, n, dist, menge, firstEdge=False):
        if node in bestlist or node in keeplist:
            waveFlow(G, node, m, dist, menge, firstEdge=firstEdge)
 def evaluateFitness():
    G, books = buildFullGraph()
    ratedBooks = [n for n in list(G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None]
    errSq = []
    for m in ratedBooks:
        rating = G.nodes[m]['rating']
        G.nodes[m]['rating'] = None
        mu, std = genScores(G, books)
        errSq.append((rating - G.nodes[m]['score'])**2)
        G.nodes[m]['rating'] = rating
    return sum(errSq) / len(errSq)
 def train(gamma = 0.1):
    global weights
    bestWeights = copy.copy(weights)
    best_mse = evaluateFitness()
    w = list(weights.keys())
    attr = random.choice(w)
    delta = gamma * (-0.5 + (0.75 + 0.25*random.random()))
    while True:
        print({'mse': best_mse, 'w': weights, 'gamma': gamma})
        weights = copy.copy(bestWeights)
        weights[attr] += delta
        mse = evaluateFitness()
        if mse < best_mse: # got better
            saveWeights(weights)
            gamma *= 1.1
            bestWeights = copy.copy(weights)
            best_mse = mse
            delta *= 2
            if random.random() < 0.10:
                attr = random.choice(w)
        else:
            weights = copy.copy(bestWeights)
            gamma *= 0.8
            attr = random.choice(w)
            delta = gamma * (-0.5 + (0.75 + 0.25*random.random()))
 def saveWeights(weights):
    with open('neuralWeights.json', 'w') as f:
        f.write(json.dumps(weights))
 def loadWeights():
    with open('neuralWeights.json', 'r') as f:
        weights = json.loads(f.read())
    return weights
 def cliInterface():
    import argparse
@ -677,18 +749,19 @@ def cliInterface():
    p_show.add_argument('name', type=str)
    p_show.add_argument('-d', type=float, default=2.7, help='depth of expansion')
    p_train = cmds.add_parser('train', description="TODO", aliases=[])
    p_train.add_argument('-g', type=float, default=0.1, help='learning rate gamma')
    p_full = cmds.add_parser('full', description="TODO", aliases=[])
    args = parser.parse_args()
    if args.cmd=="train":
        train(args.g)
        exit()
    G, books = buildFullGraph()
    mu, std = genScores(G, books)
    if not args.keep_priv:
        removePriv(G)
    if args.remove_read:
        removeRead(G)
    elif args.remove_unread:
        removeUnread(G)
    if args.cmd=="recommend":
        recommendNBooks(G, mu, std, args.n, not args.keep_top_lists, not args.keep_useless_recommenders)
@ -701,6 +774,15 @@ def cliInterface():
    else:
        raise Exception("Bad")
    if not args.keep_priv:
        removePriv(G)
    if args.remove_read:
        removeRead(G)
    elif args.remove_unread:
        removeUnread(G)
    removeDangling(G, alsoBooks=True)
    if args.remove_edge:
        removeEdge(G)
@ -709,5 +791,7 @@ def cliInterface():
    if not args.no_web:
        genAndShowHTML(G)
 weights = loadWeights()
 if __name__ == "__main__":
    cliInterface()
--- a/neuralWeights.json
+++ b/neuralWeights.json
@ -0,0 +1 @@
 {'topList': 2.2007417599641768, 'recommender': 1.338876321897804, 'author': 2.1427159240782587, 'series': 1.0, 'tag': 1.0, 'mu': 1.3256374080828757, 'sigma': -0.9188502564501485}
		`@ -0,0 +1 @@`
							`{'topList': 2.2007417599641768, 'recommender': 1.338876321897804, 'author': 2.1427159240782587, 'series': 1.0, 'tag': 1.0, 'mu': 1.3256374080828757, 'sigma': -0.9188502564501485}`