Compare commits

..

7 Commits
master ... gp

Author SHA1 Message Date
06712ee027 GPR using Node2Vec embedding cosine similarity kernels done 2022-02-25 00:44:58 +01:00
02e912d4ff More GNTM 2022-02-24 22:29:52 +01:00
e87288a927 GNTM gucken 2022-02-24 21:53:54 +01:00
85c800d39e fixes 2022-02-24 20:18:31 +01:00
0dc40c5635 lol 2022-02-24 20:15:13 +01:00
7c16b8044e Revert "implemented neuralBins (performance is bad...)"
This reverts commit bd53a83058.
2022-02-24 20:14:13 +01:00
b10fcac016 WIP 2022-02-24 20:12:08 +01:00
5 changed files with 279 additions and 522 deletions

3
.gitignore vendored
View File

@ -2,6 +2,5 @@ __pycache__
*.html
.venv
neuralWeights.json
neuralWeights.json.*
neuralWeights.json.bak
.imgLinkCache.json
.idea

View File

@ -1,4 +1,4 @@
# CaliGraph
CaliGraph connects to the database of your local Calibre-Instance in order to recommend which unread books are likely to be enjoyed. The recommendations are displayed in a graph which explains the recommendation by showing correlations to previously read books, authors, recommendations by same individuals, tags...
![Screenshot](./Screenshot_README.png)
![Screenshot](https://gitea.dominik.roth.ml/dodox/CaliGraph/raw/branch/master/Screenshot_README.png)

File diff suppressed because it is too large Load Diff

75
py/gp.py Normal file
View File

@ -0,0 +1,75 @@
import numpy as np
from node2vec import Node2Vec
from sklearn.gaussian_process.kernels import Kernel, Hyperparameter
from sklearn.gaussian_process.kernels import GenericKernelMixin
from sklearn.gaussian_process import GaussianProcessRegressor
#from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.base import clone
class BookKernel(GenericKernelMixin, Kernel):
def __init__(self, wv):
self.wv = wv
def _f(self, s1, s2):
"""
kernel value between a pair of sequences
"""
s = self.wv.similarity(s1, s2)**2*0.99 + 0.01
if s <= 0:
print('bad!')
return s
def __call__(self, X, Y=None, eval_gradient=False):
if Y is None:
Y = X
if eval_gradient:
return (
np.array([[self._f(x, y) for y in Y] for x in X])
)
else:
return np.array([[self._f(x, y) for y in Y] for x in X])
#return np.array(self.wv.n_similarity(X, Y))
def diag(self, X):
return self(X)
def is_stationary(self):
return False
def clone_with_theta(self, theta):
cloned = clone(self)
cloned.theta = theta
return cloned
def genGprScores(G, scoreName='gpr_score', stdName='gpr_std'):
print('[\] Constructing Feature-Space-Projector')
node2vec = Node2Vec(G, dimensions=32, walk_length=16, num_walks=128, workers=8)
print('[\] Fitting Embeddings for Kernel')
model = node2vec.fit(window=8, min_count=1, batch_words=4)
wv = model.wv
print('[\] Constructing Kernel')
kernel = BookKernel(wv)
print('[\] Fitting GP')
X, y = [], []
for n in G.nodes:
node = G.nodes[n]
if 'rating' in node and node['rating']!=None:
X.append(n)
y.append(node['rating'])
gpr = GaussianProcessRegressor(kernel=kernel, random_state=3141, alpha=1e-8).fit(X, y)
print('[\] Inferencing GP')
X = []
for n in G.nodes:
node = G.nodes[n]
if not 'rating' in node or node['rating']==None:
X.append(n)
y, stds = gpr.predict(X, return_std=True)
i=0
for n in G.nodes:
node = G.nodes[n]
if not 'rating' in node or node['rating']==None:
s, std = y[i], stds[i][i][0]
i+=1
node[scoreName], node[stdName] = float(s), float(std)

View File

@ -6,9 +6,3 @@ pyvis
fuzzywuzzy
rake_nltk
ptpython
requests
pandas
plotly
wikipedia
argcomplete
pyzshcomplete