CaliGraph/py/gp.py
2022-02-24 20:18:31 +01:00

65 lines
2.0 KiB
Python

import numpy as np
from node2vec import Node2Vec
from sklearn.gaussian_process.kernels import Kernel, Hyperparameter
from sklearn.gaussian_process.kernels import GenericKernelMixin
from sklearn.gaussian_process import GaussianProcessRegressor
#from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.base import clone
class BookKernel(GenericKernelMixin, Kernel):
def __init__(self, G):
self.baseline_similarity = 0.5
self.baseline_similarity_bounds = (1e-5, 1)
self.G = G
self.node2vec = Node2Vec(self.G, dimensions=32, walk_length=16, num_walks=256, workers=8)
self.model = self.node2vec.fit(window=10, min_count=1, batch_words=4)
self.wv = self.model.wv
def _f(self, s1, s2):
return self.wv.similarity(s1, s2)
def __call__(self, X, Y=None, eval_gradient=False):
if Y is None:
Y = X
if eval_gradient:
return (
np.array([[self._f(x, y) for y in Y] for x in X]),
)
else:
return np.array([[self._f(x, y) for y in Y] for x in X])
def diag(self, X):
return self(X)
def is_stationary(self):
return False
def clone_with_theta(self, theta):
cloned = clone(self)
cloned.theta = theta
return cloned
def genGprScores(G, globMu, globStd, scoreName='gpr_score', stdName='gpr_std'):
gpr = GaussianProcessRegressor(kernel=BookKernel(G), random_state=3141)
X, y = [], []
for n in G.nodes:
node = G.nodes[n]
if node['rating']!=None:
X.append(n)
y.append(node['rating'])
gpr.fit(X, y)
X = []
for n in G.nodes:
node = G.nodes[n]
if node['rating']==None:
X.append(n)
y,stds = gpr.predict(X, return_std=True)
for n in G.nodes:
node = G.nodes[n]
if node['rating']==None:
y, std = y.pop(0), stds.pop(0)
node[scoreName], node[stdName] = y, std