WIP
This commit is contained in:
parent
53a7b07c06
commit
b10fcac016
@ -20,6 +20,8 @@ import plotly.graph_objects as go
|
|||||||
|
|
||||||
import wikipedia
|
import wikipedia
|
||||||
|
|
||||||
|
from py import *
|
||||||
|
|
||||||
def getAllAuthors(books):
|
def getAllAuthors(books):
|
||||||
authors = set()
|
authors = set()
|
||||||
for book in books:
|
for book in books:
|
||||||
@ -390,7 +392,7 @@ def scoreOpinions(G, globMu, globStd):
|
|||||||
for n in list(G.nodes):
|
for n in list(G.nodes):
|
||||||
node = G.nodes[n]
|
node = G.nodes[n]
|
||||||
feedbacks = []
|
feedbacks = []
|
||||||
if node['t'] not in ['book']:
|
if node['t'] not in ['book', 'newBooks']:
|
||||||
adjacens = list(G.adj[n].keys())
|
adjacens = list(G.adj[n].keys())
|
||||||
for adj in adjacens:
|
for adj in adjacens:
|
||||||
adjNode = G.nodes[adj]
|
adjNode = G.nodes[adj]
|
||||||
@ -403,7 +405,8 @@ def scoreOpinions(G, globMu, globStd):
|
|||||||
node['score'] = node['mean']
|
node['score'] = node['mean']
|
||||||
node['feedbacks'] = feedbacks
|
node['feedbacks'] = feedbacks
|
||||||
else:
|
else:
|
||||||
node['score'] = None
|
node['score'] = globMu - globStd
|
||||||
|
node['score'] = node['score'] / 2 + node['gpr_score']
|
||||||
|
|
||||||
def scoreUnread(G, globMu, globStd):
|
def scoreUnread(G, globMu, globStd):
|
||||||
neuralBins = defaultdict(list)
|
neuralBins = defaultdict(list)
|
||||||
@ -428,6 +431,7 @@ def scoreUnread(G, globMu, globStd):
|
|||||||
neuralBins['median'] = [node['median']]
|
neuralBins['median'] = [node['median']]
|
||||||
neuralBins['se'] = [node['se']]
|
neuralBins['se'] = [node['se']]
|
||||||
neuralBins['pagerank'] = [node['pagerank_score']]
|
neuralBins['pagerank'] = [node['pagerank_score']]
|
||||||
|
neuralBins['gpr_score'] = [node['gpr_score']]
|
||||||
if 'tgb_rank' in node:
|
if 'tgb_rank' in node:
|
||||||
neuralBins['tgbrank'] = [10/math.ln10(10+node['tgb_rank'])]
|
neuralBins['tgbrank'] = [10/math.ln10(10+node['tgb_rank'])]
|
||||||
neuralBins['bias'] = [globMu]
|
neuralBins['bias'] = [globMu]
|
||||||
@ -815,6 +819,7 @@ def genScores(G, books, calcPagerank=True):
|
|||||||
globMu, globStd = calcRecDist(G, books)
|
globMu, globStd = calcRecDist(G, books)
|
||||||
if calcPagerank:
|
if calcPagerank:
|
||||||
runPagerank(G)
|
runPagerank(G)
|
||||||
|
genGprScores(G, globMu, globStd)
|
||||||
scoreOpinions(G, globMu, globStd)
|
scoreOpinions(G, globMu, globStd)
|
||||||
scoreUnread(G, globMu, globStd)
|
scoreUnread(G, globMu, globStd)
|
||||||
return globMu, globStd
|
return globMu, globStd
|
||||||
|
62
py/gp.py
Normal file
62
py/gp.py
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
import numpy as np
|
||||||
|
from sklearn.gaussian_process.kernels import Kernel, Hyperparameter
|
||||||
|
from sklearn.gaussian_process.kernels import GenericKernelMixin
|
||||||
|
from sklearn.gaussian_process import GaussianProcessRegressor
|
||||||
|
#from sklearn.gaussian_process import GaussianProcessClassifier
|
||||||
|
from sklearn.base import clone
|
||||||
|
|
||||||
|
class BookKernel(GenericKernelMixin, Kernel):
|
||||||
|
def __init__(self, G):
|
||||||
|
self.baseline_similarity = 0.5
|
||||||
|
self.baseline_similarity_bounds = (1e-5, 1)
|
||||||
|
|
||||||
|
self.G = G
|
||||||
|
self.node2vec = Node2Vec(self.G, dimensions=32, walk_length=16, num_walks=256, workers=8)
|
||||||
|
self.model = node2vec.fit(window=10, min_count=1, batch_words=4)
|
||||||
|
self.wv = self.model.wv
|
||||||
|
|
||||||
|
def _f(self, s1, s2):
|
||||||
|
return self.wv.similarity(s1, s2)
|
||||||
|
|
||||||
|
def __call__(self, X, Y=None, eval_gradient=False):
|
||||||
|
if Y is None:
|
||||||
|
Y = X
|
||||||
|
|
||||||
|
if eval_gradient:
|
||||||
|
return (
|
||||||
|
np.array([[self._f(x, y) for y in Y] for x in X]),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
return np.array([[self._f(x, y) for y in Y] for x in X])
|
||||||
|
|
||||||
|
def diag(self, X):
|
||||||
|
return self(X)
|
||||||
|
|
||||||
|
def is_stationary(self):
|
||||||
|
return False
|
||||||
|
|
||||||
|
def clone_with_theta(self, theta):
|
||||||
|
cloned = clone(self)
|
||||||
|
cloned.theta = theta
|
||||||
|
return cloned
|
||||||
|
|
||||||
|
def genGprScores(G, globMu, globStd, scoreName='gpr_score', stdName='gpr_std'):
|
||||||
|
gpr = GaussianProcessRegressor(kernel=BookKernel(G), random_state=3141)
|
||||||
|
X, y = [], []
|
||||||
|
for n in G.nodes:
|
||||||
|
node = G.nodes[n]
|
||||||
|
if node['rating']!=None:
|
||||||
|
X.append(n)
|
||||||
|
y.append(node['rating'])
|
||||||
|
gpr.fit(X, y)
|
||||||
|
X = []
|
||||||
|
for n in G.nodes:
|
||||||
|
node = G.nodes[n]
|
||||||
|
if node['rating']==None:
|
||||||
|
X.append(n)
|
||||||
|
y,stds = gpr.predict(X, return_std=True)
|
||||||
|
for n in G.nodes:
|
||||||
|
node = G.nodes[n]
|
||||||
|
if node['rating']==None:
|
||||||
|
y, std = y.pop(0), stds.pop(0)
|
||||||
|
node[scoreName], node[stdName] = y, std
|
Loading…
Reference in New Issue
Block a user