WIP

2022-02-24 20:12:08 +01:00 · 2022-02-24 20:12:08 +01:00 · b10fcac016
commit b10fcac016
parent 53a7b07c06
2 changed files with 69 additions and 2 deletions
--- a/caliGraph.py
+++ b/caliGraph.py
@ -20,6 +20,8 @@ import plotly.graph_objects as go

 import wikipedia

+from py import *
+
 def getAllAuthors(books):
    authors = set()
    for book in books:
@ -390,7 +392,7 @@ def scoreOpinions(G, globMu, globStd):
    for n in list(G.nodes):
        node = G.nodes[n]
        feedbacks = []
-        if node['t'] not in ['book']:
+        if node['t'] not in ['book', 'newBooks']:
            adjacens = list(G.adj[n].keys())
            for adj in adjacens:
                adjNode = G.nodes[adj]
@ -403,7 +405,8 @@ def scoreOpinions(G, globMu, globStd):
                node['score'] = node['mean']
                node['feedbacks'] = feedbacks
            else:
-                node['score'] = None
+                node['score'] = globMu - globStd
+            node['score'] = node['score'] / 2 + node['gpr_score']

 def scoreUnread(G, globMu, globStd):
    neuralBins = defaultdict(list)
@ -428,6 +431,7 @@ def scoreUnread(G, globMu, globStd):
                neuralBins['median'] = [node['median']]
                neuralBins['se'] = [node['se']]
                neuralBins['pagerank'] = [node['pagerank_score']]
+                neuralBins['gpr_score'] = [node['gpr_score']]
                if 'tgb_rank' in node:
                    neuralBins['tgbrank'] = [10/math.ln10(10+node['tgb_rank'])]
                neuralBins['bias'] = [globMu]
@ -815,6 +819,7 @@ def genScores(G, books, calcPagerank=True):
    globMu, globStd = calcRecDist(G, books)
    if calcPagerank:
        runPagerank(G)
+    genGprScores(G, globMu, globStd)
    scoreOpinions(G, globMu, globStd)
    scoreUnread(G, globMu, globStd)
    return globMu, globStd
--- a/py/gp.py
+++ b/py/gp.py
@ -0,0 +1,62 @@
+import numpy as np
+from sklearn.gaussian_process.kernels import Kernel, Hyperparameter
+from sklearn.gaussian_process.kernels import GenericKernelMixin
+from sklearn.gaussian_process import GaussianProcessRegressor
+#from sklearn.gaussian_process import GaussianProcessClassifier
+from sklearn.base import clone
+
+class BookKernel(GenericKernelMixin, Kernel):
+    def __init__(self, G):
+        self.baseline_similarity = 0.5
+        self.baseline_similarity_bounds = (1e-5, 1)
+
+        self.G = G
+        self.node2vec = Node2Vec(self.G, dimensions=32, walk_length=16, num_walks=256, workers=8)
+        self.model = node2vec.fit(window=10, min_count=1, batch_words=4)
+        self.wv = self.model.wv
+
+    def _f(self, s1, s2):
+        return self.wv.similarity(s1, s2)
+
+    def __call__(self, X, Y=None, eval_gradient=False):
+        if Y is None:
+            Y = X
+
+        if eval_gradient:
+            return (
+                np.array([[self._f(x, y) for y in Y] for x in X]),
+            )
+        else:
+            return np.array([[self._f(x, y) for y in Y] for x in X])
+
+    def diag(self, X):
+        return self(X)
+
+    def is_stationary(self):
+        return False
+
+    def clone_with_theta(self, theta):
+        cloned = clone(self)
+        cloned.theta = theta
+        return cloned
+
+def genGprScores(G, globMu, globStd, scoreName='gpr_score', stdName='gpr_std'):
+    gpr = GaussianProcessRegressor(kernel=BookKernel(G), random_state=3141)
+    X, y = [], []
+    for n in G.nodes:
+        node = G.nodes[n]
+        if node['rating']!=None:
+            X.append(n)
+            y.append(node['rating'])
+    gpr.fit(X, y)
+    X = []
+    for n in G.nodes:
+        node = G.nodes[n]
+        if node['rating']==None:
+            X.append(n)
+    y,stds = gpr.predict(X, return_std=True)
+    for n in G.nodes:
+        node = G.nodes[n]
+        if node['rating']==None:
+            y, std = y.pop(0), stds.pop(0)
+            node[scoreName], node[stdName] = y, std