From 880cb6ba7e9803716e93a1f9b0112c858c4c5b78 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Tue, 15 Feb 2022 19:54:14 +0100 Subject: [PATCH] Added tgb (but disabled, because adds no accuracy) --- caliGraph.py | 38 ++++++++++++++++++++++++++++++++------ rec_dbs/tgb_1.csv | 18 +----------------- rec_dbs/tgb_2.csv | 4 +--- 3 files changed, 34 insertions(+), 26 deletions(-) diff --git a/caliGraph.py b/caliGraph.py index 79f20fd..2dc974a 100755 --- a/caliGraph.py +++ b/caliGraph.py @@ -424,6 +424,8 @@ def scoreUnread(G, globMu, globStd): node['se'] = globStd / math.sqrt(len(feedbacks)) feedbacks.append(node['pagerank_score']) ws.append(['pagerank']) + #feedbacks.append(10/math.ln10(10+node['tgb_rank']) if 'tgb_rank' in node else 0) + #ws.append(['tgb_rank']) feedbacks.append(node['std']) ws.append(['sigma']) #feedbacks.append(node['median']) @@ -476,16 +478,32 @@ def readColor(book): def loadBooksFromDB(): books = loadBooksFromCalibreDB() infuseDataFromMRB(books) + #infuseDataFromTGB(books) return books def mrbGetBook(mrbdf, title, authors): title = title.split('(')[0] title = title.replace('*','') - for author in authors: - pot = mrbdf[mrbdf['title'].str.contains(title)] - parts = author.split(" ") - dic = pot.to_dict(orient='records') - for d in dic: + pot = mrbdf[mrbdf['title'].str.contains(title)] + dic = pot.to_dict(orient='records') + for d in dic: + for author in authors: + parts = author.split(" ") + for part in [parts[0], parts[-1]]: + if d['author'].find(part)==-1: + break + else: + return d + return False + +def tgbGetBook(df, title, authors): + title = title.split('(')[0] + title = title.replace('*','') + pot = df[df['title'].str.contains(title)] + dic = pot.to_dict(orient='records') + for d in dic: + for author in authors: + parts = author.split(" ") for part in [parts[0], parts[-1]]: if d['author'].find(part)==-1: break @@ -501,6 +519,14 @@ def infuseDataFromMRB(books): for rec in str(mrb['recommender']).split('|'): book['tags'] += [rec + ':MRB'] +def infuseDataFromTGB(books): + for i in range(1,3): + df = pd.read_csv('rec_dbs/tgb_'+str(i)+'.csv') + for book in books: + tgb = tgbGetBook(df, book['title'], book['authors']) + if tgb: + book['tgb_rank'] = int(tgb['id']) + def loadBooksFromCalibreDB(): return json.loads(os.popen("calibredb list --for-machine -f all").read()) @@ -1278,7 +1304,7 @@ def loadWeights(): with open('neuralWeights.json', 'r') as f: weights = json.loads(f.read()) except IOError: - weights = {"topList": 0.15, "recommender": 0.30, "author": 0.70, "series": 0.05, "tag": 0.05, "pagerank": 0.05, "mu": 0.50, "sigma": 0.30, "bias": 0.25} + weights = {"topList": 0.15, "recommender": 0.30, "author": 0.70, "series": 0.05, "tag": 0.05, "pagerank": 0.05, "mu": 0.50, "sigma": 0.30, "bias": 0.25} #, "tgb_rank": 0.10} return weights def cliInterface(): diff --git a/rec_dbs/tgb_1.csv b/rec_dbs/tgb_1.csv index 5cb8bbf..69e3ed6 100644 --- a/rec_dbs/tgb_1.csv +++ b/rec_dbs/tgb_1.csv @@ -1,4 +1,4 @@ -ID,Title,Author,kp, +id,title,author,kp, 1,In Search of Lost Time ,Marcel Proust,1913, 2,Ulysses,James Joyce,1922, 3,Don Quixote,Miguel de Cervantes,1605, @@ -2689,19 +2689,3 @@ ID,Title,Author,kp, 2686,Advertisements for Myself,Norman Mailer,1959, 2687,Pure,Andrew Miller,2011, 2688,The Shawl,Cynthia Ozick,1989, -,Meghadūta,Kālidāsa,350, -,The Human Comedy,Honoré de Balzac,1845, -,The Chronicles of Narnia,C. S. Lewis,1950, -,Titus Groan,Mervyn Peake,1946, -,Plays,Pierre Corneille,1684, -,Selected Plays of Henrick Ibsen,Henrik Ibsen,1906, -,The Deptford Trilogy,Robertson Davies,1970, -,Your Face Tomorrow: Dance and dream,Javier Marías,2004, -,Your Face Tomorrow: Fever and Spear,Javier Marías,2002, -,Selected Plays of George Bernard Shaw,George Bernard Shaw,1950, -,"Your Face Tomorrow: Poison, Shadow and Farewell",Javier Marías,2007, -,The Works of Moliere,Molière,1673, -,The Complete Plays of Jean Racine,Jean Racine,1699, -,The Forsyte Saga,John Galsworthy,1922, -,Gormenghast,Mervyn Peake,1950, -,Titus Alone,Mervyn Peake,1959, diff --git a/rec_dbs/tgb_2.csv b/rec_dbs/tgb_2.csv index 3790d9e..f124011 100644 --- a/rec_dbs/tgb_2.csv +++ b/rec_dbs/tgb_2.csv @@ -1,4 +1,4 @@ -ID,title,author,kp +id,title,author,kp 1,Essays,Michel de Montaigne,1580 2,Walden,Henry David Thoreau,1854 3,Confessions ,Augustine,398 @@ -1316,5 +1316,3 @@ ID,title,author,kp 1315,On the Fabric of the Human Body,Andreas Vesalius,1543 1316,The Crack-Up,F. Scott Fitzgerald,1945 1317,The Power Elite,C. Wright Mills,1956 -,Mr. Wilson's Cabinet of Wonder,Lawrence Weschler,1995 -,Into the Wild,Jon Krakauer,1996