GNTM gucken

This commit is contained in:
Dominik Moritz Roth 2022-02-24 21:53:54 +01:00
parent 85c800d39e
commit e87288a927
2 changed files with 32 additions and 21 deletions

View File

@ -817,9 +817,9 @@ def genScores(G, books, calcPagerank=True):
globMu, globStd = calcRecDist(G, books) globMu, globStd = calcRecDist(G, books)
if calcPagerank: if calcPagerank:
runPagerank(G) runPagerank(G)
genGprScores(G, globMu, globStd) genGprScores(G, globMu, globStd, 'score', 'std')
scoreOpinions(G, globMu, globStd) #scoreOpinions(G, globMu, globStd)
scoreUnread(G, globMu, globStd) #scoreUnread(G, globMu, globStd)
return globMu, globStd return globMu, globStd
def addImageToNode(node, cache, shape='circularImage'): def addImageToNode(node, cache, shape='circularImage'):

View File

@ -8,17 +8,17 @@ from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.base import clone from sklearn.base import clone
class BookKernel(GenericKernelMixin, Kernel): class BookKernel(GenericKernelMixin, Kernel):
def __init__(self, G): def __init__(self, wv):
self.baseline_similarity = 0.5 self.wv = wv
self.baseline_similarity_bounds = (1e-5, 1)
self.G = G
self.node2vec = Node2Vec(self.G, dimensions=32, walk_length=16, num_walks=256, workers=8)
self.model = self.node2vec.fit(window=10, min_count=1, batch_words=4)
self.wv = self.model.wv
def _f(self, s1, s2): def _f(self, s1, s2):
return self.wv.similarity(s1, s2) """
kernel value between a pair of sequences
"""
s = self.wv.similarity(s1, s2)**2*0.99 + 0.01
if s <= 0:
print('bad!')
return s
def __call__(self, X, Y=None, eval_gradient=False): def __call__(self, X, Y=None, eval_gradient=False):
if Y is None: if Y is None:
@ -26,10 +26,11 @@ class BookKernel(GenericKernelMixin, Kernel):
if eval_gradient: if eval_gradient:
return ( return (
np.array([[self._f(x, y) for y in Y] for x in X]), np.array([[self._f(x, y) for y in Y] for x in X])
) )
else: else:
return np.array([[self._f(x, y) for y in Y] for x in X]) return np.array([[self._f(x, y) for y in Y] for x in X])
#return np.array(self.wv.n_similarity(X, Y))
def diag(self, X): def diag(self, X):
return self(X) return self(X)
@ -43,22 +44,32 @@ class BookKernel(GenericKernelMixin, Kernel):
return cloned return cloned
def genGprScores(G, globMu, globStd, scoreName='gpr_score', stdName='gpr_std'): def genGprScores(G, globMu, globStd, scoreName='gpr_score', stdName='gpr_std'):
gpr = GaussianProcessRegressor(kernel=BookKernel(G), random_state=3141) print('[\] Constructing Vectorizer')
node2vec = Node2Vec(G, dimensions=32, walk_length=16, num_walks=128, workers=8)
print('[\] Fitting Embeddings for Kernel')
model = node2vec.fit(window=8, min_count=1, batch_words=4)
wv = model.wv
print('[\] Constructing Kernel')
kernel = BookKernel(wv)
X, y = [], [] X, y = [], []
for n in G.nodes: for n in G.nodes:
node = G.nodes[n] node = G.nodes[n]
if node['rating']!=None: if 'rating' in node and node['rating']!=None:
X.append(n) X.append(n)
y.append(node['rating']) y.append(node['rating'])
gpr.fit(X, y) print('[\] Fitting GP')
gpr = GaussianProcessRegressor(kernel=kernel, random_state=3141, alpha=1e-8).fit(X, y)
X = [] X = []
for n in G.nodes: for n in G.nodes:
node = G.nodes[n] node = G.nodes[n]
if node['rating']==None: if 'rating' in node and node['rating']==None:
X.append(n) X.append(n)
print('[\] Inferencing GP')
y, stds = gpr.predict(X, return_std=True) y, stds = gpr.predict(X, return_std=True)
i=0
for n in G.nodes: for n in G.nodes:
node = G.nodes[n] node = G.nodes[n]
if node['rating']==None: if 'rating' in node and node['rating']==None:
y, std = y.pop(0), stds.pop(0) s, std = y[i], stds[i]
node[scoreName], node[stdName] = y, std i+=1
node[scoreName], node[stdName] = s, std