Switched to a simpler bayesian model for score generation

2021-09-24 14:49:59 +02:00 · 2021-09-24 14:49:59 +02:00 · 0231d97a42
commit 0231d97a42
parent 4fa3a57cc7
1 changed files with 22 additions and 30 deletions
--- a/caliGraph.py
+++ b/caliGraph.py
@ -294,7 +294,7 @@ def removeUselessReadBooks(G):
            else: # No unrated book in cousins
                G.remove_node(n)

-def scoreOpinions(G, globMu, globStd, errorFac=0.5):
+def scoreOpinions(G, globMu, globStd, errorFac=-0.5):
    for n in list(G.nodes):
        node = G.nodes[n]
        feedbacks = []
@ -308,7 +308,7 @@ def scoreOpinions(G, globMu, globStd, errorFac=0.5):
                node['mean'], node['std'] = norm.fit(feedbacks)
                node['se'] = globStd / math.sqrt(len(feedbacks))
                ratio = len(feedbacks) / len(adjacens)
-                node['score'] = node['mean'] - errorFac * \
+                node['score'] = node['mean'] + errorFac * \
                    node['se']*(6/7 + (1-ratio)/7) + 0.01 * \
                    (node['t'] == 'recommender') \
                    - 0.5 / len(feedbacks)**2
@ -316,12 +316,10 @@ def scoreOpinions(G, globMu, globStd, errorFac=0.5):
            else:
                node['score'] = None

-
-def scoreUnread(G, globMu, globStd, errorFac=0.6):
+def scoreUnread(G, globMu, globStd, errorFac=-0.6):
    for n in list(G.nodes):
-        feedbacks = []
-        deepFeedbacks = [globMu - globStd*0.5]
-        deepLen = 1
+        feedbacks = [globMu]
+        wheights = [getWheightForType('mu')]
        node = G.nodes[n]
        if node['t'] == 'book':
            if node['rating'] == None:
@ -329,33 +327,27 @@ def scoreUnread(G, globMu, globStd, errorFac=0.6):
                for adj in adjacens:
                    adjNode = G.nodes[adj]
                    if 'score' in adjNode and adjNode['score'] != None:
-                        if adjNode['t'] == 'tag':
-                            w = int(10/(len(G.adj[adj])))
-                        elif adjNode['t'] == 'topList':
-                            w = int(G[n][adj]['wheight']*5)
-                        else:
-                            w = 10
-                        feedbacks.append(adjNode['score'])
+                        w = getWheightForType(adjNode['t'], G[n][adj]['wheight'] if 'wheight' in G[n][adj] else None)
                        for fb in adjNode['feedbacks']:
-                            for i in range(w):
-                                deepFeedbacks.append(fb)
-                            deepLen += w
+                            feedbacks.append(fb)
+                            wheights.append(w)
                if len(feedbacks):
-                    node['mean'], node['std'] = norm.fit(deepFeedbacks)
-                    node['mean2'], node['std2'] = norm.fit(feedbacks)
-                    if deepLen:
-                        node['se'] = globStd / math.sqrt(deepLen)
-                    # - errorFac*node['se']
-                        node['score'] = (
-                            (node['mean'] - errorFac*node['se'])*3 + node['mean2']*2)/5
+                    node['meanUnweighted'], node['std'] = norm.fit(feedbacks)
+                    node['mean'] = sum([fb*w for fb, w in zip(feedbacks, wheights)])/len(feedbacks)
+                    node['se'] = globStd / math.sqrt(len(feedbacks))
+                    node['score'] = node['mean'] + errorFac*node['se']
                else:
-                        node['score'] = globMu - errorFac*globStd
+                    node['score'] = globMu + errorFac*globStd + len(feedbacks)*0.0000000001
                if 'series' in node:
                    if node['series_index'] == 1.0:
                        node['score'] += 0.000000001
-                else:
-                    node['score'] = None

+# TODO: Make this neural and train it
+def getWheightForType(nodeType, edgeWheight=None):
+    if nodeType == 'topList':
+        return edgeWheight*0.5
+    else:
+        return 1.0

 def printBestList(G, num=-1):
    bestlist = []