Fixed MRB bug

This commit is contained in:
Dominik Moritz Roth 2022-02-15 19:35:03 +01:00
parent 3a14e32e58
commit 0529172af2
4 changed files with 4050 additions and 13 deletions

View File

@ -481,15 +481,20 @@ def loadBooksFromDB():
def mrbGetBook(mrbdf, title, authors): def mrbGetBook(mrbdf, title, authors):
title = title.split('(')[0] title = title.split('(')[0]
title = title.replace('*','') title = title.replace('*','')
pot = mrbdf[mrbdf['title'].str.contains(title)]
for author in authors: for author in authors:
for part in author.split(" "): pot = mrbdf[mrbdf['title'].str.contains(title)]
if len(part)>=3: parts = author.split(" ")
pot = mrbdf[mrbdf['author'].str.contains(part)] dic = pot.to_dict(orient='records')
return pot.to_dict(orient='records')[0] if len(pot) else False for d in dic:
for part in [parts[0], parts[-1]]:
if d['author'].find(part)==-1:
break
else:
return d
return False
def infuseDataFromMRB(books): def infuseDataFromMRB(books):
mrbdf = pd.read_csv('mrb_db.csv') mrbdf = pd.read_csv('rec_dbs/mrb_db.csv')
for book in books: for book in books:
mrb = mrbGetBook(mrbdf, book['title'], book['authors']) mrb = mrbGetBook(mrbdf, book['title'], book['authors'])
if mrb: if mrb:
@ -972,20 +977,25 @@ def readBooksAnalysis(G, minRating=0, showAllTags=True, removeUnconnected=False,
scaleOpinionsByRating(G) scaleOpinionsByRating(G)
addScoreToLabels(G) addScoreToLabels(G)
def progress(G, minimum=3.5): def progress(G, books, mu, minimum=3.5):
findNewBooks(G, books, mu, -1, minRecSco = minimum)
bookCount = 0 bookCount = 0
libCount = 0
readCount = 0 readCount = 0
toReadCount = 0 toReadCount = 0
for n in list(G.nodes): for n in list(G.nodes):
node = G.nodes[n] node = G.nodes[n]
if node['t'] == 'book': if node['t'] in ['book','newBook']:
if node['t'] == 'book':
libCount +=1
bookCount += 1 bookCount += 1
if node['rating'] != None: if 'rating' in node and node['rating'] != None:
readCount += 1 readCount += 1
elif 'score' in node and (node['score'] >= minimum or node['std']==0.0): elif 'score' in node and (node['score'] >= minimum or 'std' in node and node['std']==0.0):
toReadCount += 1 toReadCount += 1
perc = round(readCount / (toReadCount+readCount) * 100, 2) perc = round(readCount / (toReadCount+readCount) * 100, 2)
print('Books in libary: '+str(bookCount)) print('Books in library: '+str(libCount))
print('Books in CaliGraph: '+str(bookCount))
print('Read Books: '+str(readCount)) print('Read Books: '+str(readCount))
print('Unread Books: '+str(bookCount-readCount)) print('Unread Books: '+str(bookCount-readCount))
print('Recommended Books (score > '+str(round(minimum, 2))+'): '+str(toReadCount)) print('Recommended Books (score > '+str(round(minimum, 2))+'): '+str(toReadCount))
@ -1111,7 +1121,7 @@ def newBooks(G, books, num, mu, std):
def findNewBooks(G, books, mu, num=-1, minRecSco=5): def findNewBooks(G, books, mu, num=-1, minRecSco=5):
mrbdf = pd.read_csv('mrb_db.csv') mrbdf = pd.read_csv('rec_dbs/mrb_db.csv')
recs = [] recs = []
for n in list(G.nodes): for n in list(G.nodes):
node = G.nodes[n] node = G.nodes[n]
@ -1366,7 +1376,7 @@ def cliInterface():
elif args.cmd=="shell": elif args.cmd=="shell":
shell(G, books, mu, std) shell(G, books, mu, std)
elif args.cmd=="progress": elif args.cmd=="progress":
progress(G, args.m) progress(G, books, mu, args.m)
return return
elif args.cmd=="newBooks": elif args.cmd=="newBooks":
bestListT = 'newBook' bestListT = 'newBook'

2707
rec_dbs/tgb_1.csv Normal file

File diff suppressed because it is too large Load Diff

1320
rec_dbs/tgb_2.csv Normal file

File diff suppressed because it is too large Load Diff