import numpy as np from node2vec import Node2Vec from sklearn.gaussian_process.kernels import Kernel, Hyperparameter from sklearn.gaussian_process.kernels import GenericKernelMixin from sklearn.gaussian_process import GaussianProcessRegressor #from sklearn.gaussian_process import GaussianProcessClassifier from sklearn.base import clone class BookKernel(GenericKernelMixin, Kernel): def __init__(self, G): self.baseline_similarity = 0.5 self.baseline_similarity_bounds = (1e-5, 1) self.G = G self.node2vec = Node2Vec(self.G, dimensions=32, walk_length=16, num_walks=256, workers=8) self.model = self.node2vec.fit(window=10, min_count=1, batch_words=4) self.wv = self.model.wv def _f(self, s1, s2): return self.wv.similarity(s1, s2) def __call__(self, X, Y=None, eval_gradient=False): if Y is None: Y = X if eval_gradient: return ( np.array([[self._f(x, y) for y in Y] for x in X]), ) else: return np.array([[self._f(x, y) for y in Y] for x in X]) def diag(self, X): return self(X) def is_stationary(self): return False def clone_with_theta(self, theta): cloned = clone(self) cloned.theta = theta return cloned def genGprScores(G, globMu, globStd, scoreName='gpr_score', stdName='gpr_std'): gpr = GaussianProcessRegressor(kernel=BookKernel(G), random_state=3141) X, y = [], [] for n in G.nodes: node = G.nodes[n] if node['rating']!=None: X.append(n) y.append(node['rating']) gpr.fit(X, y) X = [] for n in G.nodes: node = G.nodes[n] if node['rating']==None: X.append(n) y,stds = gpr.predict(X, return_std=True) for n in G.nodes: node = G.nodes[n] if node['rating']==None: y, std = y.pop(0), stds.pop(0) node[scoreName], node[stdName] = y, std