GNTM gucken

2022-02-24 21:53:54 +01:00 · 2022-02-24 21:53:54 +01:00 · e87288a927
commit e87288a927
parent 85c800d39e
2 changed files with 32 additions and 21 deletions
--- a/caliGraph.py
+++ b/caliGraph.py
@ -817,9 +817,9 @@ def genScores(G, books, calcPagerank=True):
    globMu, globStd = calcRecDist(G, books)
    if calcPagerank:
        runPagerank(G)
-    genGprScores(G, globMu, globStd)
-    scoreOpinions(G, globMu, globStd)
-    scoreUnread(G, globMu, globStd)
+    genGprScores(G, globMu, globStd, 'score', 'std')
+    #scoreOpinions(G, globMu, globStd)
+    #scoreUnread(G, globMu, globStd)
    return globMu, globStd

 def addImageToNode(node, cache, shape='circularImage'):
--- a/py/gp.py
+++ b/py/gp.py
@ -8,17 +8,17 @@ from sklearn.gaussian_process import GaussianProcessRegressor
 from sklearn.base import clone

 class BookKernel(GenericKernelMixin, Kernel):
-    def __init__(self, G):
-        self.baseline_similarity = 0.5
-        self.baseline_similarity_bounds = (1e-5, 1)
-
-        self.G = G
-        self.node2vec = Node2Vec(self.G, dimensions=32, walk_length=16, num_walks=256, workers=8)
-        self.model = self.node2vec.fit(window=10, min_count=1, batch_words=4)
-        self.wv = self.model.wv
+    def __init__(self, wv):
+        self.wv = wv

    def _f(self, s1, s2):
-        return self.wv.similarity(s1, s2)
+        """
+        kernel value between a pair of sequences
+        """
+        s = self.wv.similarity(s1, s2)**2*0.99 + 0.01
+        if s <= 0:
+            print('bad!')
+        return s

    def __call__(self, X, Y=None, eval_gradient=False):
        if Y is None:
@ -26,10 +26,11 @@ class BookKernel(GenericKernelMixin, Kernel):

        if eval_gradient:
            return (
-                np.array([[self._f(x, y) for y in Y] for x in X]),
+                np.array([[self._f(x, y) for y in Y] for x in X])
            )
        else:
            return np.array([[self._f(x, y) for y in Y] for x in X])
+            #return np.array(self.wv.n_similarity(X, Y))

    def diag(self, X):
        return self(X)
@ -43,22 +44,32 @@ class BookKernel(GenericKernelMixin, Kernel):
        return cloned

 def genGprScores(G, globMu, globStd, scoreName='gpr_score', stdName='gpr_std'):
-    gpr = GaussianProcessRegressor(kernel=BookKernel(G), random_state=3141)
+    print('[\] Constructing Vectorizer')
+    node2vec = Node2Vec(G, dimensions=32, walk_length=16, num_walks=128, workers=8)
+    print('[\] Fitting Embeddings for Kernel')
+    model = node2vec.fit(window=8, min_count=1, batch_words=4)
+    wv = model.wv
+    print('[\] Constructing Kernel')
+    kernel = BookKernel(wv)
    X, y = [], []
    for n in G.nodes:
        node = G.nodes[n]
-        if node['rating']!=None:
+        if 'rating' in node and node['rating']!=None:
            X.append(n)
            y.append(node['rating'])
-    gpr.fit(X, y)
+    print('[\] Fitting GP')
+    gpr = GaussianProcessRegressor(kernel=kernel, random_state=3141, alpha=1e-8).fit(X, y)
    X = []
    for n in G.nodes:
        node = G.nodes[n]
-        if node['rating']==None:
+        if 'rating' in node and node['rating']==None:
            X.append(n)
-    y,stds = gpr.predict(X, return_std=True)
+    print('[\] Inferencing GP')
+    y, stds = gpr.predict(X, return_std=True)
+    i=0
    for n in G.nodes:
        node = G.nodes[n]
-        if node['rating']==None:
-            y, std = y.pop(0), stds.pop(0)
-            node[scoreName], node[stdName] = y, std
+        if 'rating' in node and node['rating']==None:
+            s, std = y[i], stds[i]
+            i+=1
+            node[scoreName], node[stdName] = s, std