From 880cb6ba7e9803716e93a1f9b0112c858c4c5b78 Mon Sep 17 00:00:00 2001
From: Dominik Roth <dominik.roth.dev@gmail.com>
Date: Tue, 15 Feb 2022 19:54:14 +0100
Subject: [PATCH] Added tgb (but disabled, because adds no accuracy)

---
 caliGraph.py      | 38 ++++++++++++++++++++++++++++++++------
 rec_dbs/tgb_1.csv | 18 +-----------------
 rec_dbs/tgb_2.csv |  4 +---
 3 files changed, 34 insertions(+), 26 deletions(-)

diff --git a/caliGraph.py b/caliGraph.py
index 79f20fd..2dc974a 100755
--- a/caliGraph.py
+++ b/caliGraph.py
@@ -424,6 +424,8 @@ def scoreUnread(G, globMu, globStd):
                     node['se'] = globStd / math.sqrt(len(feedbacks))
                     feedbacks.append(node['pagerank_score'])
                     ws.append(['pagerank'])
+                    #feedbacks.append(10/math.ln10(10+node['tgb_rank']) if 'tgb_rank' in node else 0)
+                    #ws.append(['tgb_rank'])
                     feedbacks.append(node['std'])
                     ws.append(['sigma'])
                     #feedbacks.append(node['median'])
@@ -476,16 +478,32 @@ def readColor(book):
 def loadBooksFromDB():
     books = loadBooksFromCalibreDB()
     infuseDataFromMRB(books)
+    #infuseDataFromTGB(books)
     return books
 
 def mrbGetBook(mrbdf, title, authors):
     title = title.split('(')[0]
     title = title.replace('*','')
-    for author in authors:
-        pot = mrbdf[mrbdf['title'].str.contains(title)]
-        parts = author.split(" ")
-        dic = pot.to_dict(orient='records')
-        for d in dic:
+    pot = mrbdf[mrbdf['title'].str.contains(title)]
+    dic = pot.to_dict(orient='records')
+    for d in dic:
+        for author in authors:
+            parts = author.split(" ")
+            for part in [parts[0], parts[-1]]:
+                if d['author'].find(part)==-1:
+                    break
+            else:
+                return d
+    return False
+
+def tgbGetBook(df, title, authors):
+    title = title.split('(')[0]
+    title = title.replace('*','')
+    pot = df[df['title'].str.contains(title)]
+    dic = pot.to_dict(orient='records')
+    for d in dic:
+        for author in authors:
+            parts = author.split(" ")
             for part in [parts[0], parts[-1]]:
                 if d['author'].find(part)==-1:
                     break
@@ -501,6 +519,14 @@ def infuseDataFromMRB(books):
             for rec in str(mrb['recommender']).split('|'):
                 book['tags'] += [rec + ':MRB']
 
+def infuseDataFromTGB(books):
+    for i in range(1,3):
+        df = pd.read_csv('rec_dbs/tgb_'+str(i)+'.csv')
+        for book in books:
+            tgb = tgbGetBook(df, book['title'], book['authors'])
+            if tgb:
+                book['tgb_rank'] = int(tgb['id'])
+
 def loadBooksFromCalibreDB():
     return json.loads(os.popen("calibredb list --for-machine -f all").read())
 
@@ -1278,7 +1304,7 @@ def loadWeights():
         with open('neuralWeights.json', 'r') as f:
             weights = json.loads(f.read())
     except IOError:
-        weights = {"topList": 0.15, "recommender": 0.30, "author": 0.70, "series": 0.05, "tag": 0.05, "pagerank": 0.05, "mu": 0.50, "sigma": 0.30, "bias": 0.25}
+        weights = {"topList": 0.15, "recommender": 0.30, "author": 0.70, "series": 0.05, "tag": 0.05, "pagerank": 0.05, "mu": 0.50, "sigma": 0.30, "bias": 0.25} #, "tgb_rank": 0.10}
     return weights
 
 def cliInterface():
diff --git a/rec_dbs/tgb_1.csv b/rec_dbs/tgb_1.csv
index 5cb8bbf..69e3ed6 100644
--- a/rec_dbs/tgb_1.csv
+++ b/rec_dbs/tgb_1.csv
@@ -1,4 +1,4 @@
-ID,Title,Author,kp,
+id,title,author,kp,
 1,In Search of Lost Time ,Marcel Proust,1913,
 2,Ulysses,James Joyce,1922,
 3,Don Quixote,Miguel de Cervantes,1605,
@@ -2689,19 +2689,3 @@ ID,Title,Author,kp,
 2686,Advertisements for Myself,Norman Mailer,1959,
 2687,Pure,Andrew Miller,2011,
 2688,The Shawl,Cynthia Ozick,1989,
-,Meghadūta,Kālidāsa,350,
-,The Human Comedy,Honoré de Balzac,1845,
-,The Chronicles of Narnia,C. S. Lewis,1950,
-,Titus Groan,Mervyn Peake,1946,
-,Plays,Pierre Corneille,1684,
-,Selected Plays of Henrick Ibsen,Henrik Ibsen,1906,
-,The Deptford Trilogy,Robertson Davies,1970,
-,Your Face Tomorrow: Dance and dream,Javier Marías,2004,
-,Your Face Tomorrow: Fever and Spear,Javier Marías,2002,
-,Selected Plays of George Bernard Shaw,George Bernard Shaw,1950,
-,"Your Face Tomorrow: Poison, Shadow and Farewell",Javier Marías,2007,
-,The Works of Moliere,Molière,1673,
-,The Complete Plays of Jean Racine,Jean Racine,1699,
-,The Forsyte Saga,John Galsworthy,1922,
-,Gormenghast,Mervyn Peake,1950,
-,Titus Alone,Mervyn Peake,1959,
diff --git a/rec_dbs/tgb_2.csv b/rec_dbs/tgb_2.csv
index 3790d9e..f124011 100644
--- a/rec_dbs/tgb_2.csv
+++ b/rec_dbs/tgb_2.csv
@@ -1,4 +1,4 @@
-ID,title,author,kp
+id,title,author,kp
 1,Essays,Michel de Montaigne,1580
 2,Walden,Henry David Thoreau,1854
 3,Confessions ,Augustine,398
@@ -1316,5 +1316,3 @@ ID,title,author,kp
 1315,On the Fabric of the Human Body,Andreas Vesalius,1543
 1316,The Crack-Up,F. Scott Fitzgerald,1945
 1317,The Power Elite,C. Wright Mills,1956
-,Mr. Wilson's Cabinet of Wonder,Lawrence Weschler,1995
-,Into the Wild,Jon Krakauer,1996