GPR using Node2Vec embedding cosine similarity kernels done

More GNTM
GNTM gucken
2022-02-25 00:44:58 +01:00 · 2022-02-24 22:29:52 +01:00 · 2022-02-24 21:53:54 +01:00 · 2022-02-24 20:18:31 +01:00 · 2022-02-24 20:15:13 +01:00 · 2022-02-24 20:14:13 +01:00
5 changed files with 279 additions and 522 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,6 +2,5 @@ __pycache__
 *.html
 .venv
 neuralWeights.json
-neuralWeights.json.*
+neuralWeights.json.bak
 .imgLinkCache.json
-.idea
--- a/README.md
+++ b/README.md
@ -1,4 +1,4 @@
 # CaliGraph
 CaliGraph connects to the database of your local Calibre-Instance in order to recommend which unread books are likely to be enjoyed. The recommendations are displayed in a graph which explains the recommendation by showing correlations to previously read books, authors, recommendations by same individuals, tags...

-![Screenshot](./Screenshot_README.png)
+![Screenshot](https://gitea.dominik.roth.ml/dodox/CaliGraph/raw/branch/master/Screenshot_README.png)
--- a/caliGraph.py
+++ b/caliGraph.py
--- a/py/gp.py
+++ b/py/gp.py
@ -0,0 +1,75 @@
+import numpy as np
+
+from node2vec import Node2Vec
+from sklearn.gaussian_process.kernels import Kernel, Hyperparameter
+from sklearn.gaussian_process.kernels import GenericKernelMixin
+from sklearn.gaussian_process import GaussianProcessRegressor
+#from sklearn.gaussian_process import GaussianProcessClassifier
+from sklearn.base import clone
+
+class BookKernel(GenericKernelMixin, Kernel):
+    def __init__(self, wv):
+        self.wv = wv
+
+    def _f(self, s1, s2):
+        """
+        kernel value between a pair of sequences
+        """
+        s = self.wv.similarity(s1, s2)**2*0.99 + 0.01
+        if s <= 0:
+            print('bad!')
+        return s
+
+    def __call__(self, X, Y=None, eval_gradient=False):
+        if Y is None:
+            Y = X
+
+        if eval_gradient:
+            return (
+                np.array([[self._f(x, y) for y in Y] for x in X])
+            )
+        else:
+            return np.array([[self._f(x, y) for y in Y] for x in X])
+            #return np.array(self.wv.n_similarity(X, Y))
+
+    def diag(self, X):
+        return self(X)
+
+    def is_stationary(self):
+        return False
+
+    def clone_with_theta(self, theta):
+        cloned = clone(self)
+        cloned.theta = theta
+        return cloned
+
+def genGprScores(G, scoreName='gpr_score', stdName='gpr_std'):
+    print('[\] Constructing Feature-Space-Projector')
+    node2vec = Node2Vec(G, dimensions=32, walk_length=16, num_walks=128, workers=8)
+    print('[\] Fitting Embeddings for Kernel')
+    model = node2vec.fit(window=8, min_count=1, batch_words=4)
+    wv = model.wv
+    print('[\] Constructing Kernel')
+    kernel = BookKernel(wv)
+    print('[\] Fitting GP')
+    X, y = [], []
+    for n in G.nodes:
+        node = G.nodes[n]
+        if 'rating' in node and node['rating']!=None:
+            X.append(n)
+            y.append(node['rating'])
+    gpr = GaussianProcessRegressor(kernel=kernel, random_state=3141, alpha=1e-8).fit(X, y)
+    print('[\] Inferencing GP')
+    X = []
+    for n in G.nodes:
+        node = G.nodes[n]
+        if not 'rating' in node or node['rating']==None:
+            X.append(n)
+    y, stds = gpr.predict(X, return_std=True)
+    i=0
+    for n in G.nodes:
+        node = G.nodes[n]
+        if not 'rating' in node or node['rating']==None:
+            s, std = y[i], stds[i][i][0]
+            i+=1
+            node[scoreName], node[stdName] = float(s), float(std)
--- a/requirements.txt
+++ b/requirements.txt
@ -6,9 +6,3 @@ pyvis
 fuzzywuzzy
 rake_nltk
 ptpython
-requests
-pandas
-plotly
-wikipedia
-argcomplete
-pyzshcomplete
Author	SHA1	Message	Date
Dominik Roth	06712ee027	GPR using Node2Vec embedding cosine similarity kernels done	2022-02-25 00:44:58 +01:00
Dominik Roth	02e912d4ff	More GNTM	2022-02-24 22:29:52 +01:00
Dominik Roth	e87288a927	GNTM gucken	2022-02-24 21:53:54 +01:00
Dominik Roth	85c800d39e	fixes	2022-02-24 20:18:31 +01:00
Dominik Roth	0dc40c5635	lol	2022-02-24 20:15:13 +01:00
Dominik Roth	7c16b8044e	Revert "implemented neuralBins (performance is bad...)" This reverts commit `bd53a83058`.	2022-02-24 20:14:13 +01:00
Dominik Roth	b10fcac016	WIP	2022-02-24 20:12:08 +01:00