New feature: Dissonance

This commit is contained in:
Dominik Moritz Roth 2022-09-11 18:56:47 +02:00
parent 6af38c686f
commit 1c34d2876f

View File

@ -18,9 +18,11 @@ import plotly.graph_objects as go
import wikipedia
class Error(Exception):
pass
def getAllAuthors(books):
authors = set()
for book in books:
@ -131,6 +133,7 @@ def removePriv(G):
if 'priv' in node['tags']:
G.remove_node(n)
def removeWhitepapers(G):
for n in list(G.nodes):
node = G.nodes[n]
@ -146,6 +149,7 @@ def removeDangling(G, alsoBooks=False):
if not len(G.adj[n]):
G.remove_node(n)
def removeThinRecs(G, minCons=3):
for n in list(G.nodes):
node = G.nodes[n]
@ -153,6 +157,7 @@ def removeThinRecs(G, minCons=3):
if not len(G.adj[n]) >= minCons:
G.remove_node(n)
def removeEdge(G):
for n in list(G.nodes):
node = G.nodes[n]
@ -256,6 +261,7 @@ def pruneRecommenderCons(G, maxCons=5):
if foundCon < 2:
G.remove_node(m)
def pruneAuthorCons(G, maxCons=3):
for n in list(G.nodes):
node = G.nodes[n]
@ -281,6 +287,7 @@ def pruneAuthorCons(G, maxCons=3):
if foundCon < 2:
G.remove_node(m)
def removeHighSpanTags(G, maxCons=5):
for n in list(G.nodes):
node = G.nodes[n]
@ -303,24 +310,28 @@ def removeTopLists(G):
if node['t'] == 'topList':
G.remove_node(n)
def removeRecommenders(G):
for n in list(G.nodes):
node = G.nodes[n]
if node['t'] == 'recommender':
G.remove_node(n)
def removeAuthors(G):
for n in list(G.nodes):
node = G.nodes[n]
if node['t'] == 'author':
G.remove_node(n)
def removeSeries(G):
for n in list(G.nodes):
node = G.nodes[n]
if node['t'] == 'series':
G.remove_node(n)
def removeRestOfSeries(G):
for n in list(G.nodes):
node = G.nodes[n]
@ -336,6 +347,7 @@ def removeRestOfSeries(G):
if adjNode['series_index'] > seriesState + 1.0001:
G.remove_node(adj)
def removeUnusedRecommenders(G):
for n in list(G.nodes):
node = G.nodes[n]
@ -347,6 +359,7 @@ def removeUnusedRecommenders(G):
else: # No unrated recommendation
G.remove_node(n)
def removeUselessReadBooks(G):
minForce = 1.5
minContact = 2
@ -368,6 +381,7 @@ def removeUselessReadBooks(G):
if force < minForce or contacts < minContact:
G.remove_node(n)
def removeUselessTags(G, minUnread=1):
for n in list(G.nodes):
node = G.nodes[n]
@ -380,6 +394,7 @@ def removeUselessTags(G, minUnread=1):
if foundUnread < minUnread:
G.remove_node(n)
def removeUselessSeries(G, minSco=0):
for n in list(G.nodes):
node = G.nodes[n]
@ -387,6 +402,7 @@ def removeUselessSeries(G, minSco=0):
if len(G.adj[n]) < 2 or node['score'] < minSco:
G.remove_node(n)
def scoreOpinions(G, globMu, globStd):
for n in list(G.nodes):
node = G.nodes[n]
@ -406,6 +422,7 @@ def scoreOpinions(G, globMu, globStd):
else:
node['score'] = None
def scoreUnread(G, globMu, globStd):
for n in list(G.nodes):
feedbacks = [globMu]
@ -417,13 +434,15 @@ def scoreUnread(G, globMu, globStd):
for adj in adjacens:
adjNode = G.nodes[adj]
if 'score' in adjNode and adjNode['score'] != None:
w = [adjNode['t'], G[n][adj]['weight'] if 'weight' in G[n][adj] else 1]
w = [adjNode['t'], G[n][adj]['weight']
if 'weight' in G[n][adj] else 1]
for fb in adjNode['feedbacks']:
feedbacks.append(fb)
ws.append(w)
if len(feedbacks):
node['mean'], node['std'] = norm.fit(feedbacks)
node['median'] = np.percentile(feedbacks, [50], method='linear')[0]
node['median'] = np.percentile(
feedbacks, [50], method='linear')[0]
node['se'] = globStd / math.sqrt(len(feedbacks))
feedbacks.append(node['pagerank_score'])
ws.append(['pagerank'])
@ -437,15 +456,18 @@ def scoreUnread(G, globMu, globStd):
# ws.append(['se'])
feedbacks.append(globMu)
ws.append(['bias'])
node['score'] = sum([fb*getWeightForType(w[0], w[1] if len(w)>1 else 1) for fb, w in zip(feedbacks, ws)])/sum([getWeightForType(w[0], w[1] if len(w)>1 else 1) for w in ws])
node['score'] = sum([fb*getWeightForType(w[0], w[1] if len(w) > 1 else 1) for fb, w in zip(
feedbacks, ws)])/sum([getWeightForType(w[0], w[1] if len(w) > 1 else 1) for w in ws])
node['_act'] = feedbacks
node['_wgh'] = ws
else:
node['score'] = globMu + errorFac*globStd + len(feedbacks)*0.0000000001
node['score'] = globMu + errorFac * \
globStd + len(feedbacks)*0.0000000001
if 'series' in node:
if node['series_index'] == 1.0:
node['score'] += 0.000000001
def getWeightForType(nodeType, edgeWeight=1):
global weights
w = weights[nodeType]
@ -454,6 +476,7 @@ def getWeightForType(nodeType, edgeWeight=1):
else:
return w
def printBestList(G, t='book', num=-1):
bestlist = []
for n in list(G.nodes):
@ -461,10 +484,12 @@ def printBestList(G, t='book', num=-1):
if node['t'] == t:
if 'score' in node and node['score'] != None:
bestlist.append(node)
bestlist.sort(key=lambda node: node['score'] + 0.00001*(node['se'] if 'se' in node else 0), reverse=True)
bestlist.sort(key=lambda node: node['score'] + 0.00001 *
(node['se'] if 'se' in node else 0), reverse=True)
for i, book in enumerate(bestlist):
if t == 'book':
line = book['title'] + " ("+" & ".join(book['authors'])+")"+": {:.5f}".format(book['score'])
line = book['title'] + " ("+" & ".join(book['authors'])+")" + \
": {:.5f}".format(book['score'])
else:
line = book['label']
print("["+str(i+1).zfill(int((math.log10(num) if num != -1 else 3)+1))+"] "+line)
@ -478,12 +503,14 @@ def readColor(book):
else:
return 'gray'
def loadBooksFromDB():
books = calibreDB.getBooks()
infuseDataFromMRB(books)
# infuseDataFromTGB(books)
return books
def mrbGetBook(mrbdf, title, authors):
title = title.split('(')[0]
title = title.replace('*', '')
@ -499,6 +526,7 @@ def mrbGetBook(mrbdf, title, authors):
return d
return False
def tgbGetBook(df, title, authors):
title = title.split('(')[0]
title = title.replace('*', '')
@ -514,6 +542,7 @@ def tgbGetBook(df, title, authors):
return d
return False
def infuseDataFromMRB(books):
mrbdf = pd.read_csv('rec_dbs/mrb_db.csv')
for book in books:
@ -522,6 +551,7 @@ def infuseDataFromMRB(books):
for rec in str(mrb['recommender']).split('|'):
book['tags'] += [rec + ':MRB']
def infuseDataFromTGB(books):
for i in range(1, 3):
df = pd.read_csv('rec_dbs/tgb_'+str(i)+'.csv')
@ -530,12 +560,14 @@ def infuseDataFromTGB(books):
if tgb:
book['tgb_rank'] = int(tgb['id'])
class calibreDB():
@classmethod
def _getTxt(cls, request):
ret = os.popen("calibredb "+request).read()
if not ret:
raise Error('Unable to connect to CalibreDB. Please close all open instances of Calibre.')
raise Error(
'Unable to connect to CalibreDB. Please close all open instances of Calibre.')
return ret
@classmethod
@ -557,7 +589,8 @@ class calibreDB():
cols = cls.getCustomColumns()
avai = ['calice_score' in cols, 'calice_rating' in cols]
if not any(avai):
raise Error('Custom Columns missing from CalibreDB. Create columns for "Calice Score" and/or "Calice Rating" using the "createCaliceColumn" command.')
raise Error(
'Custom Columns missing from CalibreDB. Create columns for "Calice Score" and/or "Calice Rating" using the "createCaliceColumn" command.')
return avai
@classmethod
@ -586,9 +619,12 @@ class calibreDB():
cls._getTxt('set_custom calice_score '+str(bookId)+' ""')
else:
if sco:
cls._getTxt('set_custom calice_score '+str(bookId)+' '+str(round(score,5)))
cls._getTxt('set_custom calice_score ' +
str(bookId)+' '+str(round(score, 5)))
if rat:
cls._getTxt('set_custom calice_rating '+str(bookId)+' '+str(int(round(score))))
cls._getTxt('set_custom calice_rating ' +
str(bookId)+' '+str(int(round(score))))
def calice(G):
scores = {}
@ -602,10 +638,12 @@ def calice(G):
calibreDB.writeCaliceColumnMultiple(scores)
print('Done.')
def remove_html_tags(text):
clean = re.compile('<.*?>')
return re.sub(clean, '', text)
def getKeywords(txt, rake):
txt = remove_html_tags(txt)
k = []
@ -624,6 +662,7 @@ def getKeywords(txt,rake):
return k
return []
def runPagerank(G):
try:
scores = nx.pagerank(G=G)
@ -634,6 +673,7 @@ def runPagerank(G):
for n in list(G.nodes):
G.nodes[n]['pagerank_score'] = scores[n] if n in scores else 0
def buildBookGraph(books, darkMode=False, extractKeywords=True, mergeTags=True):
G = nx.Graph()
if extractKeywords:
@ -652,7 +692,8 @@ def buildBookGraph(books, darkMode=False, extractKeywords=True, mergeTags=True):
else:
desc = ''
if 'comments' in book and extractKeywords:
sanitized = re.sub(r'[^a-zA-Z0-9\s\.äöü]+', '', book['comments']).replace('\n',' ')
sanitized = re.sub(r'[^a-zA-Z0-9\s\.äöü]+',
'', book['comments']).replace('\n', ' ')
keywords = getKeywords(sanitized, rake)
else:
keywords = []
@ -664,10 +705,12 @@ def buildBookGraph(books, darkMode=False, extractKeywords=True, mergeTags=True):
else:
series = None
series_index = None
G.add_node(book['id'], t='book', label=book['title'], title=book['title'], shape='image', image=book['cover'], rating=rating, tags=tags, keywords=keywords, desc=desc, isbn=book['isbn'], files=book['formats'], authors=getAuthors(book), series=series, series_index=series_index, calibreID=book['id'])
G.add_node(book['id'], t='book', label=book['title'], title=book['title'], shape='image', image=book['cover'], rating=rating, tags=tags, keywords=keywords,
desc=desc, isbn=book['isbn'], files=book['formats'], authors=getAuthors(book), series=series, series_index=series_index, calibreID=book['id'])
return G
def getWikiImage(search_term):
from fuzzywuzzy import fuzz
WIKI_REQUEST = 'http://en.wikipedia.org/w/api.php?action=query&prop=pageimages&format=json&piprop=original&titles='
@ -681,12 +724,14 @@ def getWikiImage(search_term):
title = wkpage.title
response = requests.get(WIKI_REQUEST+title)
json_data = json.loads(response.text)
img_link = list(json_data['query']['pages'].values())[0]['original']['source']
img_link = list(json_data['query']['pages'].values())[
0]['original']['source']
return img_link
except:
print('[!] No match for '+search_term+' on WikiPedia...')
return None
def graphAddAuthors(G, books, darkMode=False):
for author in getAllAuthors(books):
G.add_node('a/'+author, color='green', t='author', label=author)
@ -695,6 +740,7 @@ def graphAddAuthors(G, books, darkMode=False):
G.add_edge('a/'+author, book['id'], color=readColor(book))
return G
def graphAddRecommenders(G, books, darkMode=False):
for rec in getAllRecommenders(books):
G.add_node('r/'+rec, color='orange', t='recommender', label=rec)
@ -703,6 +749,7 @@ def graphAddRecommenders(G, books, darkMode=False):
G.add_edge('r/'+rec, book['id'], color=readColor(book))
return G
def graphAddTopLists(G, books, darkMode=False):
for tl in getAllTopLists(books):
G.add_node('t/'+tl, color='yellow', t='topList', label=tl)
@ -715,7 +762,8 @@ def graphAddTopLists(G, books, darkMode=False):
def graphAddSeries(G, books, darkMode=False):
for series in getAllSeries(books):
G.add_node('s/'+series, color='red', t='series', label=series, shape='triangle')
G.add_node('s/'+series, color='red', t='series',
label=series, shape='triangle')
for book in books:
if 'series' in book:
G.add_edge('s/'+book['series'], book['id'], color=readColor(book))
@ -724,7 +772,8 @@ def graphAddSeries(G, books, darkMode=False):
def graphAddTags(G, books, darkMode=False):
for tag in getAllTags(books):
G.add_node('t/'+tag, color=['lightGray','darkgray'][darkMode], t='tag', label=tag, shape='box')
G.add_node('t/'+tag, color=['lightGray', 'darkgray']
[darkMode], t='tag', label=tag, shape='box')
for book in books:
for tag in getTags(book):
G.add_edge('t/'+tag, book['id'], color=readColor(book))
@ -770,7 +819,8 @@ def addScoreToLabels(G):
node['label'] += " ("+str(node['rating'])+")"
else:
if 'score' in node and node['score'] != None and 'se' in node:
node['label'] += " ({:.2f}±{:.1f})".format(node['score'], node['se'])
node['label'] += " ({:.2f}±{:.1f})".format(
node['score'], node['se'])
else:
node['label'] += " (0±∞)"
@ -870,6 +920,7 @@ def genAndShow3D(G, darkMode=False):
fig.show()
def buildFullGraph(darkMode=False):
books = loadBooksFromDB()
G = buildBookGraph(books, darkMode=darkMode)
@ -890,6 +941,7 @@ def genScores(G, books, calcPagerank=True):
scoreUnread(G, globMu, globStd)
return globMu, globStd
def addImageToNode(node, cache, shape='circularImage'):
name = node['label'].split(' (')[0].replace('*', '')
if not name in cache or (cache[name] == False and random.random() < 0.05):
@ -906,6 +958,7 @@ def addImageToNode(node, cache, shape='circularImage'):
node['image'] = img
node['shape'] = shape
def addImagesToNodes(G):
try:
with open('.imgLinkCache.json', 'r') as cf:
@ -915,10 +968,12 @@ def addImagesToNodes(G):
for n in list(G.nodes):
node = G.nodes[n]
if node['t'] in ['recommender', 'author']:
addImageToNode(node, cache, ['circularImage','image'][node['t']=='author'])
addImageToNode(
node, cache, ['circularImage', 'image'][node['t'] == 'author'])
with open('.imgLinkCache.json', 'w') as cf:
cf.write(json.dumps(cache))
def recommendNBooksRecommenderBased(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True):
removeRestOfSeries(G)
removeBad(G, mu-std*2-1)
@ -976,6 +1031,7 @@ def recommendNBooksTagBased(G, mu, std, n, removeTopListsB=True):
scaleOpinionsByRating(G)
addScoreToLabels(G)
def recommendNBooks(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True, v3d=False):
removeRestOfSeries(G)
removeBad(G, mu-std-0.5)
@ -1035,6 +1091,7 @@ def fullGraph(G, removeTopListsB=True):
scaleOpinionsByRating(G)
addScoreToLabels(G)
def recommenderCompetence(G):
# removeRead(G)
removeUnread(G)
@ -1060,6 +1117,7 @@ def recommenderCompetence(G):
node['score'] = 0
node['score'] /= 2
def readBooksAnalysis(G, minRating=0, showAllTags=True, removeUnconnected=False, removeTopListsB=True):
removeUnread(G)
removeBad(G, minRating)
@ -1075,6 +1133,7 @@ def readBooksAnalysis(G, minRating=0, showAllTags=True, removeUnconnected=False,
scaleOpinionsByRating(G)
addScoreToLabels(G)
def progress(G, books, mu, minimum=3.5):
findNewBooks(G, books, mu, -1, minRecSco=minimum)
bookCount = 0
@ -1138,6 +1197,7 @@ def analyze(G, books, mu, type_name, name, dist=2.1):
addScoreToLabels(G)
match['label'] = "*"+match['label']+"*"
def waveFlow(G, node, n, dist, menge, firstEdge=False):
if dist <= 0:
return
@ -1167,7 +1227,8 @@ def waveFlow(G, node, n, dist, menge, firstEdge=False):
book['score'] = 0
bestlist.append(book)
bestlist.sort(key=lambda node: node['score'], reverse=True)
toKeep = min(int(dist*10), math.ceil(len(bestlist) * dist - len(keeplist)*0.5))
toKeep = min(int(dist*10), math.ceil(len(bestlist)
* dist - len(keeplist)*0.5))
if toKeep <= 0:
keeplist.sort(key=lambda node: node['rating'], reverse=True)
keeplist = keeplist[:min(int(dist*10), int(len(keeplist) * dist))]
@ -1180,6 +1241,7 @@ def waveFlow(G, node, n, dist, menge, firstEdge=False):
if node in bestlist or node in keeplist:
waveFlow(G, node, m, dist, menge, firstEdge=firstEdge)
def gensimTokensForLines(lines):
for i, line in enumerate(lines):
tokens = gensim.utils.simple_preprocess(line)
@ -1189,6 +1251,7 @@ def gensimTokensForLines(lines):
# For training data, add tags
yield gensim.models.doc2vec.TaggedDocument(tokens, [i])
def buildDoc2Vec(books):
import gensim
for n in list(G.nodes):
@ -1197,10 +1260,12 @@ def buildDoc2Vec(books):
pass
gensimTokensForLines(lines)
def shell(G, books, mu, std):
from ptpython.repl import embed
embed(globals(), locals())
def newBooks(G, books, num, mu, std):
removeBad(G, mu-std*2)
findNewBooks(G, books, mu, num, minRecSco=mu-std)
@ -1226,21 +1291,29 @@ def findNewBooks(G, books, mu, num=-1, minRecSco=5):
if node['t'] == 'recommender' and 'score' in node:
oldBooks = []
newBooks = []
recBooks = mrbdf[mrbdf['recommender'].str.contains(node['label'])].to_dict(orient='records')
recBooks = mrbdf[mrbdf['recommender'].str.contains(
node['label'])].to_dict(orient='records')
for book in recBooks:
if book['title'] in [b['title'] for b in books]:
oldBooks.append({'title': book['title'], 'author': book['author']})
oldBooks.append(
{'title': book['title'], 'author': book['author']})
else:
newBooks.append({'title': book['title'], 'author': book['author']})
recs.append({'name': node['label'], 'rec': node, 'newBooks': newBooks, 'oldBooks': oldBooks})
newBooks.append(
{'title': book['title'], 'author': book['author']})
recs.append({'name': node['label'], 'rec': node,
'newBooks': newBooks, 'oldBooks': oldBooks})
for rec in recs:
for book in rec['newBooks']:
G.add_node('n/'+book['title'], color='blue', t='newBook', label=book['title'], author=book['author'])
G.add_node('n/'+book['title'], color='blue', t='newBook',
label=book['title'], author=book['author'])
G.add_node('r/'+rec['rec']['label'], color='orange', t='recommender', label=rec['rec']['label'], score=rec['rec']['score'])
G.add_edge('r/'+rec['rec']['label'], 'n/'+book['title'], color='blue')
G.add_node('r/'+rec['rec']['label'], color='orange', t='recommender',
label=rec['rec']['label'], score=rec['rec']['score'])
G.add_edge('r/'+rec['rec']['label'], 'n/' +
book['title'], color='blue')
G.add_node('a/'+book['author'], color='green', t='author', label=book['author'])
G.add_node('a/'+book['author'], color='green',
t='author', label=book['author'])
G.add_edge('a/'+book['author'], 'n/'+book['title'], color='blue')
for n in list(G.nodes):
node = G.nodes[n]
@ -1257,12 +1330,16 @@ def findNewBooks(G, books, mu, num=-1, minRecSco=5):
else:
ses.append(min(ses))
scores.append(mu)
node['fake_se'] = sum(ses)/(len(ses)**1.2) + 0.5 + 0.5 * (len(scores)==2) # This is not how SE works. DILLIGAF?
node['score'] = sum(scores)/len(scores)*1.2 - node['fake_se']*1.6 + 0.5 - 0.1/math.sqrt(len(scores))
# This is not how SE works. DILLIGAF?
node['fake_se'] = sum(ses)/(len(ses)**1.2) + \
0.5 + 0.5 * (len(scores) == 2)
node['score'] = sum(
scores)/len(scores)*1.2 - node['fake_se']*1.6 + 0.5 - 0.1/math.sqrt(len(scores))
if len(scores) == 2:
node['score'] *= 0.80
node['value'] = 20 + 5 * float(node['score'])
node['label'] += " ({:.2f}±{:.1f})".format(node['score'], node['fake_se'])
node['label'] += " ({:.2f}±{:.1f})".format(node['score'],
node['fake_se'])
node['label'] += '\n ' + node['author']
if num != -1:
removeKeepBest(G, num, 10, 'newBook')
@ -1270,6 +1347,8 @@ def findNewBooks(G, books, mu, num=-1, minRecSco=5):
# while batchSize is implemented, we only get a good gonvergence when we disable it (batchSize=-1)
# but might be necessary to enable later for a larger libary for better training performance...
# maybe try again for 128 books?
def evaluateFitness(books, batchSize=-1, debugPrint=False):
global weights
G = buildBookGraph(books)
@ -1280,7 +1359,8 @@ def evaluateFitness(books, batchSize=-1, debugPrint=False):
graphAddTags(G, books)
runPagerank(G)
ratedBooks = [n for n in list(G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None]
ratedBooks = [n for n in list(
G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None]
boundsLoss = 0
linSepLoss = []
errSq = []
@ -1289,7 +1369,8 @@ def evaluateFitness(books, batchSize=-1, debugPrint=False):
gradient[wt] = 0
mu, sigma = genScores(G, books)
for b in G.nodes:
batch = random.sample(ratedBooks, batchSize) if batchSize!=-1 and len(ratedBooks) > batchSize else ratedBooks
batch = random.sample(ratedBooks, batchSize) if batchSize != - \
1 and len(ratedBooks) > batchSize else ratedBooks
if b in batch:
rating = G.nodes[b]['rating']
G.nodes[b]['rating'] = None
@ -1300,9 +1381,12 @@ def evaluateFitness(books, batchSize=-1, debugPrint=False):
errSq.append((rating - G.nodes[b]['score'])**2)
G.nodes[b]['rating'] = rating
for wt in weights:
scoreB = sum([a*(1.001 if wt==w[0] else 1)*weights[w[0]]*(w[1] if len(w)>1 else 1) for a,w in zip(G.nodes[b]['_act'], G.nodes[b]['_wgh'])])/sum([(1.001 if wt==w[0] else 1)*weights[w[0]]*(w[1] if len(w)>1 else 1) for w in G.nodes[b]['_wgh']])
gradient[wt] += ((rating - G.nodes[b]['score'])**2 - (rating - scoreB)**2)*1000
regressionLoss = sum([max(0,abs(w)-1)**2 for w in weights.values()]) # no punishment if w within -1 and 1
scoreB = sum([a*(1.001 if wt == w[0] else 1)*weights[w[0]]*(w[1] if len(w) > 1 else 1) for a, w in zip(G.nodes[b]['_act'],
G.nodes[b]['_wgh'])])/sum([(1.001 if wt == w[0] else 1)*weights[w[0]]*(w[1] if len(w) > 1 else 1) for w in G.nodes[b]['_wgh']])
gradient[wt] += ((rating - G.nodes[b]['score'])
** 2 - (rating - scoreB)**2)*1000
# no punishment if w within -1 and 1
regressionLoss = sum([max(0, abs(w)-1)**2 for w in weights.values()])
for wt in weights:
if abs(weights[wt]) > 1.0:
gradient[wt] -= weights[wt]*10
@ -1315,6 +1399,55 @@ def evaluateFitness(books, batchSize=-1, debugPrint=False):
fit = sum(errSq)/len(errSq) + 0.001*regressionLoss
return fit, gradient
def calcDissonance(books):
global weights
G = buildBookGraph(books)
graphAddAuthors(G, books)
graphAddRecommenders(G, books)
graphAddTopLists(G, books)
graphAddSeries(G, books)
graphAddTags(G, books)
runPagerank(G)
ratedBooks = [n for n in list(
G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None]
errSq = []
gradient = {}
for wt in weights:
gradient[wt] = 0
mu, sigma = genScores(G, books)
for b in G.nodes:
batch = ratedBooks
if b in batch:
rating = G.nodes[b]['rating']
G.nodes[b]['rating'] = None
_, _ = genScores(G, books, calcPagerank=False)
G.nodes[b]['_test_score'] = G.nodes[b]['score']
G.nodes[b]['rating'] = rating
G.nodes[b]['dissonance_off'] = rating - G.nodes[b]['score']
G.nodes[b]['dissonance_abs'] = abs(rating - G.nodes[b]['score'])
return G
def describeDissonance(books, num=-1, sortKey='dissonance_abs', sortDir=True):
bestlist = []
G = calcDissonance(books)
for n in list(G.nodes):
node = G.nodes[n]
if'dissonance_abs' in node:
bestlist.append(node)
bestlist.sort(key=lambda node: node[sortKey], reverse=sortDir)
for i, book in enumerate(bestlist):
line = book['title'] + " ("+" & ".join(book['authors'])+")" + \
": You: {:.5f}, AI: {:.5f}, Delta: {:.5f}".format(
book['rating'], book['_test_score'], book['dissonance_off'])
print("["+str(i+1).zfill(int((math.log10(num) if num != -1 else 3)+1))+"] "+line)
if num != -1 and i == num-1:
break
def train(initGamma, full=True):
global weights
if full:
@ -1367,18 +1500,22 @@ def train(initGamma, full=True):
break
print('Done.')
def saveWeights(weights):
with open('neuralWeights.json', 'w') as f:
f.write(json.dumps(weights))
def loadWeights():
try:
with open('neuralWeights.json', 'r') as f:
weights = json.loads(f.read())
except IOError:
weights = {"topList": 0.15, "recommender": 0.30, "author": 0.70, "series": 0.05, "tag": 0.05, "pagerank": 0.05, "mu": 0.50, "sigma": 0.30, "bias": 0.25, "median": 0.10} #, "tgb_rank": 0.10}
weights = {"topList": 0.15, "recommender": 0.30, "author": 0.70, "series": 0.05, "tag": 0.05,
"pagerank": 0.05, "mu": 0.50, "sigma": 0.30, "bias": 0.25, "median": 0.10} # , "tgb_rank": 0.10}
return weights
def cliInterface(imgDef=False):
import argparse
@ -1403,13 +1540,16 @@ def cliInterface(imgDef=False):
cmds = parser.add_subparsers(required=True, dest='cmd')
p_rec = cmds.add_parser('recommend', description="TODO", aliases=['rec'])
p_rec.add_argument('-n', type=int, default=20, help='number of books to recommend')
p_rec.add_argument('-n', type=int, default=20,
help='number of books to recommend')
p_rec.add_argument('--tag-based', action="store_true")
p_rec.add_argument('--recommender-based', action="store_true")
p_rec.add_argument('--new', type=int, default=-1, help='number of new books to recommend')
p_rec.add_argument('--new', type=int, default=-1,
help='number of new books to recommend')
p_rec = cmds.add_parser('listScores', description="TODO", aliases=['ls'])
p_rec.add_argument('-n', type=int, default=50, help='number of books to recommend')
p_rec.add_argument('-n', type=int, default=50,
help='number of books to recommend')
p_read = cmds.add_parser('read', description="TODO", aliases=[])
p_read.add_argument('--min-rating', type=int, default=0)
@ -1417,27 +1557,40 @@ def cliInterface(imgDef=False):
p_read.add_argument('--only-connected', action="store_true")
p_show = cmds.add_parser('analyze', description="TODO", aliases=[])
p_show.add_argument('type', choices=['any', 'book', 'recommender', 'author', 'series', 'tag'])
p_show.add_argument(
'type', choices=['any', 'book', 'recommender', 'author', 'series', 'tag'])
p_show.add_argument('name', type=str)
p_show.add_argument('-d', type=float, default=2.1, help='depth of expansion')
p_show.add_argument('-d', type=float, default=2.1,
help='depth of expansion')
p_train = cmds.add_parser('train', description="TODO", aliases=[])
p_train.add_argument('-g', type=float, default=0.2, help='learning rate gamma')
p_train.add_argument('-g', type=float, default=0.2,
help='learning rate gamma')
p_train.add_argument('--full', action="store_true")
p_prog = cmds.add_parser('progress', description="TODO", aliases=[])
p_prog.add_argument('-m', type=float, default=7, help='Mimimum Score to read')
p_prog.add_argument('-m', type=float, default=7,
help='Mimimum Score to read')
p_comp = cmds.add_parser('competence', description="TODO", aliases=[])
p_shell = cmds.add_parser('shell', description="TODO", aliases=[])
p_new = cmds.add_parser('newBooks', description="TODO", aliases=[])
p_new.add_argument('-n', type=int, default=10, help='number of books to recommend')
p_new.add_argument('-n', type=int, default=10,
help='number of books to recommend')
p_col = cmds.add_parser('calice', description="TODO", aliases=[])
p_cal = cmds.add_parser('calice', description="TODO", aliases=[])
p_createCol = cmds.add_parser('createCaliceColumn', description="TODO", aliases=[])
p_dis = cmds.add_parser('dissonance', description="TODO", aliases=['dis'])
p_dis.add_argument('-n', type=int, default=-1,
help='Maximum number of books to lost')
p_dis.add_argument(
'--sort', choices=['dissonance_abs', 'dissonance_off', 'score'], default='dissonance_abs', const='dissonance_abs', nargs='?')
p_dis.add_argument('--reversed', action="store_true")
p_createCol = cmds.add_parser(
'createCaliceColumn', description="TODO", aliases=[])
p_createCol.add_argument('type', choices=['score', 'rating', 'both'])
p_full = cmds.add_parser('full', description="TODO", aliases=[])
@ -1452,6 +1605,7 @@ def cliInterface(imgDef=False):
else:
mainCLI(args)
def perfTestCLI(args):
import time
from pycallgraph import PyCallGraph
@ -1466,6 +1620,7 @@ def perfTestCLI(args):
with PyCallGraph(output=GraphvizOutput(output_file='perfTests/' + str(int(time.time())) + '.png'), config=config):
mainCLI(args)
def mainCLI(args):
if args.cmd == "train":
train(args.g, args.full)
@ -1482,7 +1637,6 @@ def mainCLI(args):
if not args.keep_whitepapers:
removeWhitepapers(G)
if args.cmd == "recommend":
if args.new == -1:
args.new = int(args.n / 5)
@ -1490,16 +1644,21 @@ def mainCLI(args):
findNewBooks(G, books, mu, args.new, minRecSco=mu-std)
if args.tag_based:
if args.recommender_based:
raise Exception('tag-based and recommender-based can not be be combined')
recommendNBooksTagBased(G, mu, std, args.n, not args.keep_top_lists)
raise Exception(
'tag-based and recommender-based can not be be combined')
recommendNBooksTagBased(
G, mu, std, args.n, not args.keep_top_lists)
elif args.recommender_based:
recommendNBooksRecommenderBased(G, mu, std, args.n, not args.keep_top_lists, not args.keep_useless_recommenders)
recommendNBooksRecommenderBased(
G, mu, std, args.n, not args.keep_top_lists, not args.keep_useless_recommenders)
else:
recommendNBooks(G, mu, std, args.n, not args.keep_top_lists, not args.keep_useless_recommenders, args.v3d)
recommendNBooks(G, mu, std, args.n, not args.keep_top_lists,
not args.keep_useless_recommenders, args.v3d)
elif args.cmd == "listScores":
listScores(G, mu, std, args.n)
elif args.cmd == "read":
readBooksAnalysis(G, args.min_rating, args.all_tags, args.only_connected, not args.keep_top_lists)
readBooksAnalysis(G, args.min_rating, args.all_tags,
args.only_connected, not args.keep_top_lists)
elif args.cmd == "analyze":
analyze(G, books, mu, args.type, args.name, args.d)
elif args.cmd == "full":
@ -1518,6 +1677,9 @@ def mainCLI(args):
elif args.cmd == "calice":
calice(G)
exit()
elif args.cmd == "dissonance":
describeDissonance(books, args.n, args.sort, not args.reversed)
exit()
elif args.cmd == "createCaliceColumn":
if args.type in ['score', 'both']:
calibreDB.createCaliceScoreColumn()
@ -1525,12 +1687,12 @@ def mainCLI(args):
if args.type in ['rating', 'both']:
calibreDB.createCaliceRatingColumn()
print('[*] Column "Calice Rating" was created.')
print('[i] To allow displaying half-stars, please active them manually in the calibre-settings.')
print(
'[i] To allow displaying half-stars, please active them manually in the calibre-settings.')
exit()
else:
raise Exception("Bad")
if not args.keep_priv:
removePriv(G)
if args.remove_read: