WIP

2022-02-24 20:12:08 +01:00 · 2022-02-24 20:12:08 +01:00 · b10fcac016
commit b10fcac016
parent 53a7b07c06
2 changed files with 69 additions and 2 deletions
--- a/caliGraph.py
+++ b/caliGraph.py
@ -20,6 +20,8 @@ import plotly.graph_objects as go
 import wikipedia
 from py import *
 def getAllAuthors(books):
    authors = set()
    for book in books:
@ -390,7 +392,7 @@ def scoreOpinions(G, globMu, globStd):
    for n in list(G.nodes):
        node = G.nodes[n]
        feedbacks = []
-        if node['t'] not in ['book']:
+        if node['t'] not in ['book', 'newBooks']:
            adjacens = list(G.adj[n].keys())
            for adj in adjacens:
                adjNode = G.nodes[adj]
@ -403,7 +405,8 @@ def scoreOpinions(G, globMu, globStd):
                node['score'] = node['mean']
                node['feedbacks'] = feedbacks
            else:
-                node['score'] = None
+                node['score'] = globMu - globStd
            node['score'] = node['score'] / 2 + node['gpr_score']
 def scoreUnread(G, globMu, globStd):
    neuralBins = defaultdict(list)
@ -428,6 +431,7 @@ def scoreUnread(G, globMu, globStd):
                neuralBins['median'] = [node['median']]
                neuralBins['se'] = [node['se']]
                neuralBins['pagerank'] = [node['pagerank_score']]
                neuralBins['gpr_score'] = [node['gpr_score']]
                if 'tgb_rank' in node:
                    neuralBins['tgbrank'] = [10/math.ln10(10+node['tgb_rank'])]
                neuralBins['bias'] = [globMu]
@ -815,6 +819,7 @@ def genScores(G, books, calcPagerank=True):
    globMu, globStd = calcRecDist(G, books)
    if calcPagerank:
        runPagerank(G)
    genGprScores(G, globMu, globStd)
    scoreOpinions(G, globMu, globStd)
    scoreUnread(G, globMu, globStd)
    return globMu, globStd
--- a/py/gp.py
+++ b/py/gp.py
@ -0,0 +1,62 @@
 import numpy as np
 from sklearn.gaussian_process.kernels import Kernel, Hyperparameter
 from sklearn.gaussian_process.kernels import GenericKernelMixin
 from sklearn.gaussian_process import GaussianProcessRegressor
 #from sklearn.gaussian_process import GaussianProcessClassifier
 from sklearn.base import clone
 class BookKernel(GenericKernelMixin, Kernel):
    def __init__(self, G):
        self.baseline_similarity = 0.5
        self.baseline_similarity_bounds = (1e-5, 1)
        self.G = G
        self.node2vec = Node2Vec(self.G, dimensions=32, walk_length=16, num_walks=256, workers=8)
        self.model = node2vec.fit(window=10, min_count=1, batch_words=4)
        self.wv = self.model.wv
    def _f(self, s1, s2):
        return self.wv.similarity(s1, s2)
    def __call__(self, X, Y=None, eval_gradient=False):
        if Y is None:
            Y = X
        if eval_gradient:
            return (
                np.array([[self._f(x, y) for y in Y] for x in X]),
            )
        else:
            return np.array([[self._f(x, y) for y in Y] for x in X])
    def diag(self, X):
        return self(X)
    def is_stationary(self):
        return False
    def clone_with_theta(self, theta):
        cloned = clone(self)
        cloned.theta = theta
        return cloned
 def genGprScores(G, globMu, globStd, scoreName='gpr_score', stdName='gpr_std'):
    gpr = GaussianProcessRegressor(kernel=BookKernel(G), random_state=3141)
    X, y = [], []
    for n in G.nodes:
        node = G.nodes[n]
        if node['rating']!=None:
            X.append(n)
            y.append(node['rating'])
    gpr.fit(X, y)
    X = []
    for n in G.nodes:
        node = G.nodes[n]
        if node['rating']==None:
            X.append(n)
    y,stds = gpr.predict(X, return_std=True)
    for n in G.nodes:
        node = G.nodes[n]
        if node['rating']==None:
            y, std = y.pop(0), stds.pop(0)
            node[scoreName], node[stdName] = y, std