Implemented Neural Net + training

2021-09-24 16:13:55 +02:00 · 2021-09-24 16:13:55 +02:00 · cb0ad906eb
commit cb0ad906eb
parent 0231d97a42
2 changed files with 106 additions and 21 deletions
--- a/caliGraph.py
+++ b/caliGraph.py
@ -2,6 +2,7 @@
 import os
 import json
 import math
+import copy
 import random

 import numpy as np
@ -78,7 +79,7 @@ def getAllTags(books):
    return list(tags)


-def getTopListWheight(book, topList):
+def getTopListWeight(book, topList):
    minScope = 100000
    for tag in book['tags']:
        if tag.find(topList+" Top ") != -1:
@ -294,7 +295,24 @@ def removeUselessReadBooks(G):
            else: # No unrated book in cousins
                G.remove_node(n)

-def scoreOpinions(G, globMu, globStd, errorFac=-0.5):
+def removeUselessTags(G):
+    for n in list(G.nodes):
+        node = G.nodes[n]
+        if node['t'] == 'tag':
+            for adj in G.adj[n]:
+                foundUnread = True
+                adjNode = G.nodes[adj]
+                if adjNode['t']=='book' and 'score' in adjNode:
+                    break
+                else: # No unrated book here
+                    foundUnread = False
+                if foundUnread:
+                    break
+            else: # No unrated book in cousins
+                G.remove_node(n)
+
+
+def scoreOpinions(G, globMu, globStd, errorFac=0):
    for n in list(G.nodes):
        node = G.nodes[n]
        feedbacks = []
@ -319,7 +337,7 @@ def scoreOpinions(G, globMu, globStd, errorFac=-0.5):
 def scoreUnread(G, globMu, globStd, errorFac=-0.6):
    for n in list(G.nodes):
        feedbacks = [globMu]
-        wheights = [getWheightForType('mu')]
+        weights = [getWeightForType('mu')]
        node = G.nodes[n]
        if node['t'] == 'book':
            if node['rating'] == None:
@ -327,13 +345,15 @@ def scoreUnread(G, globMu, globStd, errorFac=-0.6):
                for adj in adjacens:
                    adjNode = G.nodes[adj]
                    if 'score' in adjNode and adjNode['score'] != None:
-                        w = getWheightForType(adjNode['t'], G[n][adj]['wheight'] if 'wheight' in G[n][adj] else None)
+                        w = getWeightForType(adjNode['t'], G[n][adj]['weight'] if 'weight' in G[n][adj] else 1)
                        for fb in adjNode['feedbacks']:
                            feedbacks.append(fb)
-                            wheights.append(w)
+                            weights.append(w)
                if len(feedbacks):
                    node['meanUnweighted'], node['std'] = norm.fit(feedbacks)
-                    node['mean'] = sum([fb*w for fb, w in zip(feedbacks, wheights)])/len(feedbacks)
+                    feedbacks.append(node['std'])
+                    weights.append(getWeightForType('sigma'))
+                    node['mean'] = sum([fb*w for fb, w in zip(feedbacks, weights)])/len(feedbacks)
                    node['se'] = globStd / math.sqrt(len(feedbacks))
                    node['score'] = node['mean'] + errorFac*node['se']
                else:
@ -343,11 +363,13 @@ def scoreUnread(G, globMu, globStd, errorFac=-0.6):
                        node['score'] += 0.000000001

 # TODO: Make this neural and train it
-def getWheightForType(nodeType, edgeWheight=None):
+def getWeightForType(nodeType, edgeWeight=1):
+    global weights
+    w = weights[nodeType]
    if nodeType == 'topList':
-        return edgeWheight*0.5
+        return edgeWeight*w
    else:
-        return 1.0
+        return w

 def printBestList(G, num=-1):
    bestlist = []
@ -423,7 +445,7 @@ def graphAddTopLists(G, books):
        G.add_node('t/'+tl, color='yellow', t='topList', label=tl)
    for book in books:
        for top in getTopLists(book):
-            G.add_edge('t/'+top, book['id'], wheight=getTopListWheight(
+            G.add_edge('t/'+top, book['id'], weight=getTopListWeight(
                book, top), color=readColor(book))
    return G

@ -520,25 +542,27 @@ def genScores(G, books):

 def recommendNBooks(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True):
    removeRestOfSeries(G)
-    removeBad(G, mu-std-1.5)
-    removeKeepBest(G, int(n*2) + 5, maxDistForRead=1.5)
+    removeBad(G, mu-std*2-1)
+    removeKeepBest(G, int(n*2) + 5, maxDistForRead=1.5*2)
    removeEdge(G)
    removeHighSpanTags(G, 9)
    removeDangling(G, alsoBooks=False)
-    pruneTags(G, 6)
+    pruneTags(G, 6*2)
    removeBad(G, mu, groups=['book'])
    removeUselessReadBooks(G)
-    pruneTags(G, 4.25)
+    pruneTags(G, 4.25*2)
    pruneRecommenderCons(G, int(n/7)+1)
    pruneAuthorCons(G, int(n/15))
+    removeUselessTags(G)
    if removeTopListsB:
        removeTopLists(G)
    removeDangling(G, alsoBooks=True)
-    removeKeepBest(G, n, maxDistForRead=0.75)
+    removeKeepBest(G, n, maxDistForRead=0.75*2)
    removeEdge(G)
    removeDangling(G, alsoBooks=True)
    if removeUselessRecommenders:
        removeUnusedRecommenders(G)
+        removeDangling(G, alsoBooks=True)

    scaleBooksByRating(G)
    scaleOpinionsByRating(G)
@ -650,6 +674,54 @@ def waveFlow(G, node, n, dist, menge, firstEdge=False):
        if node in bestlist or node in keeplist:
            waveFlow(G, node, m, dist, menge, firstEdge=firstEdge)

+def evaluateFitness():
+    G, books = buildFullGraph()
+    ratedBooks = [n for n in list(G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None]
+    errSq = []
+    for m in ratedBooks:
+        rating = G.nodes[m]['rating']
+        G.nodes[m]['rating'] = None
+        mu, std = genScores(G, books)
+        errSq.append((rating - G.nodes[m]['score'])**2)
+        G.nodes[m]['rating'] = rating
+    return sum(errSq) / len(errSq)
+
+def train(gamma = 0.1):
+    global weights
+    bestWeights = copy.copy(weights)
+    best_mse = evaluateFitness()
+    w = list(weights.keys())
+    attr = random.choice(w)
+    delta = gamma * (-0.5 + (0.75 + 0.25*random.random()))
+
+    while True:
+        print({'mse': best_mse, 'w': weights, 'gamma': gamma})
+        weights = copy.copy(bestWeights)
+        weights[attr] += delta
+        mse = evaluateFitness()
+        if mse < best_mse: # got better
+            saveWeights(weights)
+            gamma *= 1.1
+            bestWeights = copy.copy(weights)
+            best_mse = mse
+            delta *= 2
+            if random.random() < 0.10:
+                attr = random.choice(w)
+        else:
+            weights = copy.copy(bestWeights)
+            gamma *= 0.8
+            attr = random.choice(w)
+            delta = gamma * (-0.5 + (0.75 + 0.25*random.random()))
+
+def saveWeights(weights):
+    with open('neuralWeights.json', 'w') as f:
+        f.write(json.dumps(weights))
+
+def loadWeights():
+    with open('neuralWeights.json', 'r') as f:
+        weights = json.loads(f.read())
+    return weights
+
 def cliInterface():
    import argparse

@ -677,18 +749,19 @@ def cliInterface():
    p_show.add_argument('name', type=str)
    p_show.add_argument('-d', type=float, default=2.7, help='depth of expansion')

+    p_train = cmds.add_parser('train', description="TODO", aliases=[])
+    p_train.add_argument('-g', type=float, default=0.1, help='learning rate gamma')
+
    p_full = cmds.add_parser('full', description="TODO", aliases=[])

    args = parser.parse_args()

+    if args.cmd=="train":
+        train(args.g)
+        exit()
+
    G, books = buildFullGraph()
    mu, std = genScores(G, books)
-    if not args.keep_priv:
-        removePriv(G)
-    if args.remove_read:
-        removeRead(G)
-    elif args.remove_unread:
-        removeUnread(G)

    if args.cmd=="recommend":
        recommendNBooks(G, mu, std, args.n, not args.keep_top_lists, not args.keep_useless_recommenders)
@ -701,6 +774,15 @@ def cliInterface():
    else:
        raise Exception("Bad")

+    if not args.keep_priv:
+        removePriv(G)
+    if args.remove_read:
+        removeRead(G)
+    elif args.remove_unread:
+        removeUnread(G)
+
+    removeDangling(G, alsoBooks=True)
+
    if args.remove_edge:
        removeEdge(G)

@ -709,5 +791,7 @@ def cliInterface():
    if not args.no_web:
        genAndShowHTML(G)

+
+weights = loadWeights()
 if __name__ == "__main__":
    cliInterface()
--- a/neuralWeights.json
+++ b/neuralWeights.json
@ -0,0 +1 @@
+{'topList': 2.2007417599641768, 'recommender': 1.338876321897804, 'author': 2.1427159240782587, 'series': 1.0, 'tag': 1.0, 'mu': 1.3256374080828757, 'sigma': -0.9188502564501485}
				`@ -0,0 +1 @@`
				`{'topList': 2.2007417599641768, 'recommender': 1.338876321897804, 'author': 2.1427159240782587, 'series': 1.0, 'tag': 1.0, 'mu': 1.3256374080828757, 'sigma': -0.9188502564501485}`