New feature: Dissonance
This commit is contained in:
parent
6af38c686f
commit
1c34d2876f
268
caliGraph.py
268
caliGraph.py
@ -18,9 +18,11 @@ import plotly.graph_objects as go
|
||||
|
||||
import wikipedia
|
||||
|
||||
|
||||
class Error(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def getAllAuthors(books):
|
||||
authors = set()
|
||||
for book in books:
|
||||
@ -131,6 +133,7 @@ def removePriv(G):
|
||||
if 'priv' in node['tags']:
|
||||
G.remove_node(n)
|
||||
|
||||
|
||||
def removeWhitepapers(G):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
@ -146,6 +149,7 @@ def removeDangling(G, alsoBooks=False):
|
||||
if not len(G.adj[n]):
|
||||
G.remove_node(n)
|
||||
|
||||
|
||||
def removeThinRecs(G, minCons=3):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
@ -153,6 +157,7 @@ def removeThinRecs(G, minCons=3):
|
||||
if not len(G.adj[n]) >= minCons:
|
||||
G.remove_node(n)
|
||||
|
||||
|
||||
def removeEdge(G):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
@ -256,6 +261,7 @@ def pruneRecommenderCons(G, maxCons=5):
|
||||
if foundCon < 2:
|
||||
G.remove_node(m)
|
||||
|
||||
|
||||
def pruneAuthorCons(G, maxCons=3):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
@ -281,6 +287,7 @@ def pruneAuthorCons(G, maxCons=3):
|
||||
if foundCon < 2:
|
||||
G.remove_node(m)
|
||||
|
||||
|
||||
def removeHighSpanTags(G, maxCons=5):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
@ -303,24 +310,28 @@ def removeTopLists(G):
|
||||
if node['t'] == 'topList':
|
||||
G.remove_node(n)
|
||||
|
||||
|
||||
def removeRecommenders(G):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
if node['t'] == 'recommender':
|
||||
G.remove_node(n)
|
||||
|
||||
|
||||
def removeAuthors(G):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
if node['t'] == 'author':
|
||||
G.remove_node(n)
|
||||
|
||||
|
||||
def removeSeries(G):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
if node['t'] == 'series':
|
||||
G.remove_node(n)
|
||||
|
||||
|
||||
def removeRestOfSeries(G):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
@ -336,6 +347,7 @@ def removeRestOfSeries(G):
|
||||
if adjNode['series_index'] > seriesState + 1.0001:
|
||||
G.remove_node(adj)
|
||||
|
||||
|
||||
def removeUnusedRecommenders(G):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
@ -347,6 +359,7 @@ def removeUnusedRecommenders(G):
|
||||
else: # No unrated recommendation
|
||||
G.remove_node(n)
|
||||
|
||||
|
||||
def removeUselessReadBooks(G):
|
||||
minForce = 1.5
|
||||
minContact = 2
|
||||
@ -368,6 +381,7 @@ def removeUselessReadBooks(G):
|
||||
if force < minForce or contacts < minContact:
|
||||
G.remove_node(n)
|
||||
|
||||
|
||||
def removeUselessTags(G, minUnread=1):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
@ -380,6 +394,7 @@ def removeUselessTags(G, minUnread=1):
|
||||
if foundUnread < minUnread:
|
||||
G.remove_node(n)
|
||||
|
||||
|
||||
def removeUselessSeries(G, minSco=0):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
@ -387,6 +402,7 @@ def removeUselessSeries(G, minSco=0):
|
||||
if len(G.adj[n]) < 2 or node['score'] < minSco:
|
||||
G.remove_node(n)
|
||||
|
||||
|
||||
def scoreOpinions(G, globMu, globStd):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
@ -406,6 +422,7 @@ def scoreOpinions(G, globMu, globStd):
|
||||
else:
|
||||
node['score'] = None
|
||||
|
||||
|
||||
def scoreUnread(G, globMu, globStd):
|
||||
for n in list(G.nodes):
|
||||
feedbacks = [globMu]
|
||||
@ -417,13 +434,15 @@ def scoreUnread(G, globMu, globStd):
|
||||
for adj in adjacens:
|
||||
adjNode = G.nodes[adj]
|
||||
if 'score' in adjNode and adjNode['score'] != None:
|
||||
w = [adjNode['t'], G[n][adj]['weight'] if 'weight' in G[n][adj] else 1]
|
||||
w = [adjNode['t'], G[n][adj]['weight']
|
||||
if 'weight' in G[n][adj] else 1]
|
||||
for fb in adjNode['feedbacks']:
|
||||
feedbacks.append(fb)
|
||||
ws.append(w)
|
||||
if len(feedbacks):
|
||||
node['mean'], node['std'] = norm.fit(feedbacks)
|
||||
node['median'] = np.percentile(feedbacks, [50], method='linear')[0]
|
||||
node['median'] = np.percentile(
|
||||
feedbacks, [50], method='linear')[0]
|
||||
node['se'] = globStd / math.sqrt(len(feedbacks))
|
||||
feedbacks.append(node['pagerank_score'])
|
||||
ws.append(['pagerank'])
|
||||
@ -437,15 +456,18 @@ def scoreUnread(G, globMu, globStd):
|
||||
# ws.append(['se'])
|
||||
feedbacks.append(globMu)
|
||||
ws.append(['bias'])
|
||||
node['score'] = sum([fb*getWeightForType(w[0], w[1] if len(w)>1 else 1) for fb, w in zip(feedbacks, ws)])/sum([getWeightForType(w[0], w[1] if len(w)>1 else 1) for w in ws])
|
||||
node['score'] = sum([fb*getWeightForType(w[0], w[1] if len(w) > 1 else 1) for fb, w in zip(
|
||||
feedbacks, ws)])/sum([getWeightForType(w[0], w[1] if len(w) > 1 else 1) for w in ws])
|
||||
node['_act'] = feedbacks
|
||||
node['_wgh'] = ws
|
||||
else:
|
||||
node['score'] = globMu + errorFac*globStd + len(feedbacks)*0.0000000001
|
||||
node['score'] = globMu + errorFac * \
|
||||
globStd + len(feedbacks)*0.0000000001
|
||||
if 'series' in node:
|
||||
if node['series_index'] == 1.0:
|
||||
node['score'] += 0.000000001
|
||||
|
||||
|
||||
def getWeightForType(nodeType, edgeWeight=1):
|
||||
global weights
|
||||
w = weights[nodeType]
|
||||
@ -454,6 +476,7 @@ def getWeightForType(nodeType, edgeWeight=1):
|
||||
else:
|
||||
return w
|
||||
|
||||
|
||||
def printBestList(G, t='book', num=-1):
|
||||
bestlist = []
|
||||
for n in list(G.nodes):
|
||||
@ -461,10 +484,12 @@ def printBestList(G, t='book', num=-1):
|
||||
if node['t'] == t:
|
||||
if 'score' in node and node['score'] != None:
|
||||
bestlist.append(node)
|
||||
bestlist.sort(key=lambda node: node['score'] + 0.00001*(node['se'] if 'se' in node else 0), reverse=True)
|
||||
bestlist.sort(key=lambda node: node['score'] + 0.00001 *
|
||||
(node['se'] if 'se' in node else 0), reverse=True)
|
||||
for i, book in enumerate(bestlist):
|
||||
if t == 'book':
|
||||
line = book['title'] + " ("+" & ".join(book['authors'])+")"+": {:.5f}".format(book['score'])
|
||||
line = book['title'] + " ("+" & ".join(book['authors'])+")" + \
|
||||
": {:.5f}".format(book['score'])
|
||||
else:
|
||||
line = book['label']
|
||||
print("["+str(i+1).zfill(int((math.log10(num) if num != -1 else 3)+1))+"] "+line)
|
||||
@ -478,12 +503,14 @@ def readColor(book):
|
||||
else:
|
||||
return 'gray'
|
||||
|
||||
|
||||
def loadBooksFromDB():
|
||||
books = calibreDB.getBooks()
|
||||
infuseDataFromMRB(books)
|
||||
# infuseDataFromTGB(books)
|
||||
return books
|
||||
|
||||
|
||||
def mrbGetBook(mrbdf, title, authors):
|
||||
title = title.split('(')[0]
|
||||
title = title.replace('*', '')
|
||||
@ -499,6 +526,7 @@ def mrbGetBook(mrbdf, title, authors):
|
||||
return d
|
||||
return False
|
||||
|
||||
|
||||
def tgbGetBook(df, title, authors):
|
||||
title = title.split('(')[0]
|
||||
title = title.replace('*', '')
|
||||
@ -514,6 +542,7 @@ def tgbGetBook(df, title, authors):
|
||||
return d
|
||||
return False
|
||||
|
||||
|
||||
def infuseDataFromMRB(books):
|
||||
mrbdf = pd.read_csv('rec_dbs/mrb_db.csv')
|
||||
for book in books:
|
||||
@ -522,6 +551,7 @@ def infuseDataFromMRB(books):
|
||||
for rec in str(mrb['recommender']).split('|'):
|
||||
book['tags'] += [rec + ':MRB']
|
||||
|
||||
|
||||
def infuseDataFromTGB(books):
|
||||
for i in range(1, 3):
|
||||
df = pd.read_csv('rec_dbs/tgb_'+str(i)+'.csv')
|
||||
@ -530,12 +560,14 @@ def infuseDataFromTGB(books):
|
||||
if tgb:
|
||||
book['tgb_rank'] = int(tgb['id'])
|
||||
|
||||
|
||||
class calibreDB():
|
||||
@classmethod
|
||||
def _getTxt(cls, request):
|
||||
ret = os.popen("calibredb "+request).read()
|
||||
if not ret:
|
||||
raise Error('Unable to connect to CalibreDB. Please close all open instances of Calibre.')
|
||||
raise Error(
|
||||
'Unable to connect to CalibreDB. Please close all open instances of Calibre.')
|
||||
return ret
|
||||
|
||||
@classmethod
|
||||
@ -557,7 +589,8 @@ class calibreDB():
|
||||
cols = cls.getCustomColumns()
|
||||
avai = ['calice_score' in cols, 'calice_rating' in cols]
|
||||
if not any(avai):
|
||||
raise Error('Custom Columns missing from CalibreDB. Create columns for "Calice Score" and/or "Calice Rating" using the "createCaliceColumn" command.')
|
||||
raise Error(
|
||||
'Custom Columns missing from CalibreDB. Create columns for "Calice Score" and/or "Calice Rating" using the "createCaliceColumn" command.')
|
||||
return avai
|
||||
|
||||
@classmethod
|
||||
@ -586,9 +619,12 @@ class calibreDB():
|
||||
cls._getTxt('set_custom calice_score '+str(bookId)+' ""')
|
||||
else:
|
||||
if sco:
|
||||
cls._getTxt('set_custom calice_score '+str(bookId)+' '+str(round(score,5)))
|
||||
cls._getTxt('set_custom calice_score ' +
|
||||
str(bookId)+' '+str(round(score, 5)))
|
||||
if rat:
|
||||
cls._getTxt('set_custom calice_rating '+str(bookId)+' '+str(int(round(score))))
|
||||
cls._getTxt('set_custom calice_rating ' +
|
||||
str(bookId)+' '+str(int(round(score))))
|
||||
|
||||
|
||||
def calice(G):
|
||||
scores = {}
|
||||
@ -602,10 +638,12 @@ def calice(G):
|
||||
calibreDB.writeCaliceColumnMultiple(scores)
|
||||
print('Done.')
|
||||
|
||||
|
||||
def remove_html_tags(text):
|
||||
clean = re.compile('<.*?>')
|
||||
return re.sub(clean, '', text)
|
||||
|
||||
|
||||
def getKeywords(txt, rake):
|
||||
txt = remove_html_tags(txt)
|
||||
k = []
|
||||
@ -624,6 +662,7 @@ def getKeywords(txt,rake):
|
||||
return k
|
||||
return []
|
||||
|
||||
|
||||
def runPagerank(G):
|
||||
try:
|
||||
scores = nx.pagerank(G=G)
|
||||
@ -634,6 +673,7 @@ def runPagerank(G):
|
||||
for n in list(G.nodes):
|
||||
G.nodes[n]['pagerank_score'] = scores[n] if n in scores else 0
|
||||
|
||||
|
||||
def buildBookGraph(books, darkMode=False, extractKeywords=True, mergeTags=True):
|
||||
G = nx.Graph()
|
||||
if extractKeywords:
|
||||
@ -652,7 +692,8 @@ def buildBookGraph(books, darkMode=False, extractKeywords=True, mergeTags=True):
|
||||
else:
|
||||
desc = ''
|
||||
if 'comments' in book and extractKeywords:
|
||||
sanitized = re.sub(r'[^a-zA-Z0-9\s\.äöü]+', '', book['comments']).replace('\n',' ')
|
||||
sanitized = re.sub(r'[^a-zA-Z0-9\s\.äöü]+',
|
||||
'', book['comments']).replace('\n', ' ')
|
||||
keywords = getKeywords(sanitized, rake)
|
||||
else:
|
||||
keywords = []
|
||||
@ -664,10 +705,12 @@ def buildBookGraph(books, darkMode=False, extractKeywords=True, mergeTags=True):
|
||||
else:
|
||||
series = None
|
||||
series_index = None
|
||||
G.add_node(book['id'], t='book', label=book['title'], title=book['title'], shape='image', image=book['cover'], rating=rating, tags=tags, keywords=keywords, desc=desc, isbn=book['isbn'], files=book['formats'], authors=getAuthors(book), series=series, series_index=series_index, calibreID=book['id'])
|
||||
G.add_node(book['id'], t='book', label=book['title'], title=book['title'], shape='image', image=book['cover'], rating=rating, tags=tags, keywords=keywords,
|
||||
desc=desc, isbn=book['isbn'], files=book['formats'], authors=getAuthors(book), series=series, series_index=series_index, calibreID=book['id'])
|
||||
|
||||
return G
|
||||
|
||||
|
||||
def getWikiImage(search_term):
|
||||
from fuzzywuzzy import fuzz
|
||||
WIKI_REQUEST = 'http://en.wikipedia.org/w/api.php?action=query&prop=pageimages&format=json&piprop=original&titles='
|
||||
@ -681,12 +724,14 @@ def getWikiImage(search_term):
|
||||
title = wkpage.title
|
||||
response = requests.get(WIKI_REQUEST+title)
|
||||
json_data = json.loads(response.text)
|
||||
img_link = list(json_data['query']['pages'].values())[0]['original']['source']
|
||||
img_link = list(json_data['query']['pages'].values())[
|
||||
0]['original']['source']
|
||||
return img_link
|
||||
except:
|
||||
print('[!] No match for '+search_term+' on WikiPedia...')
|
||||
return None
|
||||
|
||||
|
||||
def graphAddAuthors(G, books, darkMode=False):
|
||||
for author in getAllAuthors(books):
|
||||
G.add_node('a/'+author, color='green', t='author', label=author)
|
||||
@ -695,6 +740,7 @@ def graphAddAuthors(G, books, darkMode=False):
|
||||
G.add_edge('a/'+author, book['id'], color=readColor(book))
|
||||
return G
|
||||
|
||||
|
||||
def graphAddRecommenders(G, books, darkMode=False):
|
||||
for rec in getAllRecommenders(books):
|
||||
G.add_node('r/'+rec, color='orange', t='recommender', label=rec)
|
||||
@ -703,6 +749,7 @@ def graphAddRecommenders(G, books, darkMode=False):
|
||||
G.add_edge('r/'+rec, book['id'], color=readColor(book))
|
||||
return G
|
||||
|
||||
|
||||
def graphAddTopLists(G, books, darkMode=False):
|
||||
for tl in getAllTopLists(books):
|
||||
G.add_node('t/'+tl, color='yellow', t='topList', label=tl)
|
||||
@ -715,7 +762,8 @@ def graphAddTopLists(G, books, darkMode=False):
|
||||
|
||||
def graphAddSeries(G, books, darkMode=False):
|
||||
for series in getAllSeries(books):
|
||||
G.add_node('s/'+series, color='red', t='series', label=series, shape='triangle')
|
||||
G.add_node('s/'+series, color='red', t='series',
|
||||
label=series, shape='triangle')
|
||||
for book in books:
|
||||
if 'series' in book:
|
||||
G.add_edge('s/'+book['series'], book['id'], color=readColor(book))
|
||||
@ -724,7 +772,8 @@ def graphAddSeries(G, books, darkMode=False):
|
||||
|
||||
def graphAddTags(G, books, darkMode=False):
|
||||
for tag in getAllTags(books):
|
||||
G.add_node('t/'+tag, color=['lightGray','darkgray'][darkMode], t='tag', label=tag, shape='box')
|
||||
G.add_node('t/'+tag, color=['lightGray', 'darkgray']
|
||||
[darkMode], t='tag', label=tag, shape='box')
|
||||
for book in books:
|
||||
for tag in getTags(book):
|
||||
G.add_edge('t/'+tag, book['id'], color=readColor(book))
|
||||
@ -770,7 +819,8 @@ def addScoreToLabels(G):
|
||||
node['label'] += " ("+str(node['rating'])+")"
|
||||
else:
|
||||
if 'score' in node and node['score'] != None and 'se' in node:
|
||||
node['label'] += " ({:.2f}±{:.1f})".format(node['score'], node['se'])
|
||||
node['label'] += " ({:.2f}±{:.1f})".format(
|
||||
node['score'], node['se'])
|
||||
else:
|
||||
node['label'] += " (0±∞)"
|
||||
|
||||
@ -870,6 +920,7 @@ def genAndShow3D(G, darkMode=False):
|
||||
|
||||
fig.show()
|
||||
|
||||
|
||||
def buildFullGraph(darkMode=False):
|
||||
books = loadBooksFromDB()
|
||||
G = buildBookGraph(books, darkMode=darkMode)
|
||||
@ -890,6 +941,7 @@ def genScores(G, books, calcPagerank=True):
|
||||
scoreUnread(G, globMu, globStd)
|
||||
return globMu, globStd
|
||||
|
||||
|
||||
def addImageToNode(node, cache, shape='circularImage'):
|
||||
name = node['label'].split(' (')[0].replace('*', '')
|
||||
if not name in cache or (cache[name] == False and random.random() < 0.05):
|
||||
@ -906,6 +958,7 @@ def addImageToNode(node, cache, shape='circularImage'):
|
||||
node['image'] = img
|
||||
node['shape'] = shape
|
||||
|
||||
|
||||
def addImagesToNodes(G):
|
||||
try:
|
||||
with open('.imgLinkCache.json', 'r') as cf:
|
||||
@ -915,10 +968,12 @@ def addImagesToNodes(G):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
if node['t'] in ['recommender', 'author']:
|
||||
addImageToNode(node, cache, ['circularImage','image'][node['t']=='author'])
|
||||
addImageToNode(
|
||||
node, cache, ['circularImage', 'image'][node['t'] == 'author'])
|
||||
with open('.imgLinkCache.json', 'w') as cf:
|
||||
cf.write(json.dumps(cache))
|
||||
|
||||
|
||||
def recommendNBooksRecommenderBased(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True):
|
||||
removeRestOfSeries(G)
|
||||
removeBad(G, mu-std*2-1)
|
||||
@ -976,6 +1031,7 @@ def recommendNBooksTagBased(G, mu, std, n, removeTopListsB=True):
|
||||
scaleOpinionsByRating(G)
|
||||
addScoreToLabels(G)
|
||||
|
||||
|
||||
def recommendNBooks(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True, v3d=False):
|
||||
removeRestOfSeries(G)
|
||||
removeBad(G, mu-std-0.5)
|
||||
@ -1035,6 +1091,7 @@ def fullGraph(G, removeTopListsB=True):
|
||||
scaleOpinionsByRating(G)
|
||||
addScoreToLabels(G)
|
||||
|
||||
|
||||
def recommenderCompetence(G):
|
||||
# removeRead(G)
|
||||
removeUnread(G)
|
||||
@ -1060,6 +1117,7 @@ def recommenderCompetence(G):
|
||||
node['score'] = 0
|
||||
node['score'] /= 2
|
||||
|
||||
|
||||
def readBooksAnalysis(G, minRating=0, showAllTags=True, removeUnconnected=False, removeTopListsB=True):
|
||||
removeUnread(G)
|
||||
removeBad(G, minRating)
|
||||
@ -1075,6 +1133,7 @@ def readBooksAnalysis(G, minRating=0, showAllTags=True, removeUnconnected=False,
|
||||
scaleOpinionsByRating(G)
|
||||
addScoreToLabels(G)
|
||||
|
||||
|
||||
def progress(G, books, mu, minimum=3.5):
|
||||
findNewBooks(G, books, mu, -1, minRecSco=minimum)
|
||||
bookCount = 0
|
||||
@ -1138,6 +1197,7 @@ def analyze(G, books, mu, type_name, name, dist=2.1):
|
||||
addScoreToLabels(G)
|
||||
match['label'] = "*"+match['label']+"*"
|
||||
|
||||
|
||||
def waveFlow(G, node, n, dist, menge, firstEdge=False):
|
||||
if dist <= 0:
|
||||
return
|
||||
@ -1167,7 +1227,8 @@ def waveFlow(G, node, n, dist, menge, firstEdge=False):
|
||||
book['score'] = 0
|
||||
bestlist.append(book)
|
||||
bestlist.sort(key=lambda node: node['score'], reverse=True)
|
||||
toKeep = min(int(dist*10), math.ceil(len(bestlist) * dist - len(keeplist)*0.5))
|
||||
toKeep = min(int(dist*10), math.ceil(len(bestlist)
|
||||
* dist - len(keeplist)*0.5))
|
||||
if toKeep <= 0:
|
||||
keeplist.sort(key=lambda node: node['rating'], reverse=True)
|
||||
keeplist = keeplist[:min(int(dist*10), int(len(keeplist) * dist))]
|
||||
@ -1180,6 +1241,7 @@ def waveFlow(G, node, n, dist, menge, firstEdge=False):
|
||||
if node in bestlist or node in keeplist:
|
||||
waveFlow(G, node, m, dist, menge, firstEdge=firstEdge)
|
||||
|
||||
|
||||
def gensimTokensForLines(lines):
|
||||
for i, line in enumerate(lines):
|
||||
tokens = gensim.utils.simple_preprocess(line)
|
||||
@ -1189,6 +1251,7 @@ def gensimTokensForLines(lines):
|
||||
# For training data, add tags
|
||||
yield gensim.models.doc2vec.TaggedDocument(tokens, [i])
|
||||
|
||||
|
||||
def buildDoc2Vec(books):
|
||||
import gensim
|
||||
for n in list(G.nodes):
|
||||
@ -1197,10 +1260,12 @@ def buildDoc2Vec(books):
|
||||
pass
|
||||
gensimTokensForLines(lines)
|
||||
|
||||
|
||||
def shell(G, books, mu, std):
|
||||
from ptpython.repl import embed
|
||||
embed(globals(), locals())
|
||||
|
||||
|
||||
def newBooks(G, books, num, mu, std):
|
||||
removeBad(G, mu-std*2)
|
||||
findNewBooks(G, books, mu, num, minRecSco=mu-std)
|
||||
@ -1226,21 +1291,29 @@ def findNewBooks(G, books, mu, num=-1, minRecSco=5):
|
||||
if node['t'] == 'recommender' and 'score' in node:
|
||||
oldBooks = []
|
||||
newBooks = []
|
||||
recBooks = mrbdf[mrbdf['recommender'].str.contains(node['label'])].to_dict(orient='records')
|
||||
recBooks = mrbdf[mrbdf['recommender'].str.contains(
|
||||
node['label'])].to_dict(orient='records')
|
||||
for book in recBooks:
|
||||
if book['title'] in [b['title'] for b in books]:
|
||||
oldBooks.append({'title': book['title'], 'author': book['author']})
|
||||
oldBooks.append(
|
||||
{'title': book['title'], 'author': book['author']})
|
||||
else:
|
||||
newBooks.append({'title': book['title'], 'author': book['author']})
|
||||
recs.append({'name': node['label'], 'rec': node, 'newBooks': newBooks, 'oldBooks': oldBooks})
|
||||
newBooks.append(
|
||||
{'title': book['title'], 'author': book['author']})
|
||||
recs.append({'name': node['label'], 'rec': node,
|
||||
'newBooks': newBooks, 'oldBooks': oldBooks})
|
||||
for rec in recs:
|
||||
for book in rec['newBooks']:
|
||||
G.add_node('n/'+book['title'], color='blue', t='newBook', label=book['title'], author=book['author'])
|
||||
G.add_node('n/'+book['title'], color='blue', t='newBook',
|
||||
label=book['title'], author=book['author'])
|
||||
|
||||
G.add_node('r/'+rec['rec']['label'], color='orange', t='recommender', label=rec['rec']['label'], score=rec['rec']['score'])
|
||||
G.add_edge('r/'+rec['rec']['label'], 'n/'+book['title'], color='blue')
|
||||
G.add_node('r/'+rec['rec']['label'], color='orange', t='recommender',
|
||||
label=rec['rec']['label'], score=rec['rec']['score'])
|
||||
G.add_edge('r/'+rec['rec']['label'], 'n/' +
|
||||
book['title'], color='blue')
|
||||
|
||||
G.add_node('a/'+book['author'], color='green', t='author', label=book['author'])
|
||||
G.add_node('a/'+book['author'], color='green',
|
||||
t='author', label=book['author'])
|
||||
G.add_edge('a/'+book['author'], 'n/'+book['title'], color='blue')
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
@ -1257,12 +1330,16 @@ def findNewBooks(G, books, mu, num=-1, minRecSco=5):
|
||||
else:
|
||||
ses.append(min(ses))
|
||||
scores.append(mu)
|
||||
node['fake_se'] = sum(ses)/(len(ses)**1.2) + 0.5 + 0.5 * (len(scores)==2) # This is not how SE works. DILLIGAF?
|
||||
node['score'] = sum(scores)/len(scores)*1.2 - node['fake_se']*1.6 + 0.5 - 0.1/math.sqrt(len(scores))
|
||||
# This is not how SE works. DILLIGAF?
|
||||
node['fake_se'] = sum(ses)/(len(ses)**1.2) + \
|
||||
0.5 + 0.5 * (len(scores) == 2)
|
||||
node['score'] = sum(
|
||||
scores)/len(scores)*1.2 - node['fake_se']*1.6 + 0.5 - 0.1/math.sqrt(len(scores))
|
||||
if len(scores) == 2:
|
||||
node['score'] *= 0.80
|
||||
node['value'] = 20 + 5 * float(node['score'])
|
||||
node['label'] += " ({:.2f}±{:.1f})".format(node['score'], node['fake_se'])
|
||||
node['label'] += " ({:.2f}±{:.1f})".format(node['score'],
|
||||
node['fake_se'])
|
||||
node['label'] += '\n ' + node['author']
|
||||
if num != -1:
|
||||
removeKeepBest(G, num, 10, 'newBook')
|
||||
@ -1270,6 +1347,8 @@ def findNewBooks(G, books, mu, num=-1, minRecSco=5):
|
||||
# while batchSize is implemented, we only get a good gonvergence when we disable it (batchSize=-1)
|
||||
# but might be necessary to enable later for a larger libary for better training performance...
|
||||
# maybe try again for 128 books?
|
||||
|
||||
|
||||
def evaluateFitness(books, batchSize=-1, debugPrint=False):
|
||||
global weights
|
||||
G = buildBookGraph(books)
|
||||
@ -1280,7 +1359,8 @@ def evaluateFitness(books, batchSize=-1, debugPrint=False):
|
||||
graphAddTags(G, books)
|
||||
runPagerank(G)
|
||||
|
||||
ratedBooks = [n for n in list(G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None]
|
||||
ratedBooks = [n for n in list(
|
||||
G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None]
|
||||
boundsLoss = 0
|
||||
linSepLoss = []
|
||||
errSq = []
|
||||
@ -1289,7 +1369,8 @@ def evaluateFitness(books, batchSize=-1, debugPrint=False):
|
||||
gradient[wt] = 0
|
||||
mu, sigma = genScores(G, books)
|
||||
for b in G.nodes:
|
||||
batch = random.sample(ratedBooks, batchSize) if batchSize!=-1 and len(ratedBooks) > batchSize else ratedBooks
|
||||
batch = random.sample(ratedBooks, batchSize) if batchSize != - \
|
||||
1 and len(ratedBooks) > batchSize else ratedBooks
|
||||
if b in batch:
|
||||
rating = G.nodes[b]['rating']
|
||||
G.nodes[b]['rating'] = None
|
||||
@ -1300,9 +1381,12 @@ def evaluateFitness(books, batchSize=-1, debugPrint=False):
|
||||
errSq.append((rating - G.nodes[b]['score'])**2)
|
||||
G.nodes[b]['rating'] = rating
|
||||
for wt in weights:
|
||||
scoreB = sum([a*(1.001 if wt==w[0] else 1)*weights[w[0]]*(w[1] if len(w)>1 else 1) for a,w in zip(G.nodes[b]['_act'], G.nodes[b]['_wgh'])])/sum([(1.001 if wt==w[0] else 1)*weights[w[0]]*(w[1] if len(w)>1 else 1) for w in G.nodes[b]['_wgh']])
|
||||
gradient[wt] += ((rating - G.nodes[b]['score'])**2 - (rating - scoreB)**2)*1000
|
||||
regressionLoss = sum([max(0,abs(w)-1)**2 for w in weights.values()]) # no punishment if w within -1 and 1
|
||||
scoreB = sum([a*(1.001 if wt == w[0] else 1)*weights[w[0]]*(w[1] if len(w) > 1 else 1) for a, w in zip(G.nodes[b]['_act'],
|
||||
G.nodes[b]['_wgh'])])/sum([(1.001 if wt == w[0] else 1)*weights[w[0]]*(w[1] if len(w) > 1 else 1) for w in G.nodes[b]['_wgh']])
|
||||
gradient[wt] += ((rating - G.nodes[b]['score'])
|
||||
** 2 - (rating - scoreB)**2)*1000
|
||||
# no punishment if w within -1 and 1
|
||||
regressionLoss = sum([max(0, abs(w)-1)**2 for w in weights.values()])
|
||||
for wt in weights:
|
||||
if abs(weights[wt]) > 1.0:
|
||||
gradient[wt] -= weights[wt]*10
|
||||
@ -1315,6 +1399,55 @@ def evaluateFitness(books, batchSize=-1, debugPrint=False):
|
||||
fit = sum(errSq)/len(errSq) + 0.001*regressionLoss
|
||||
return fit, gradient
|
||||
|
||||
|
||||
def calcDissonance(books):
|
||||
global weights
|
||||
G = buildBookGraph(books)
|
||||
graphAddAuthors(G, books)
|
||||
graphAddRecommenders(G, books)
|
||||
graphAddTopLists(G, books)
|
||||
graphAddSeries(G, books)
|
||||
graphAddTags(G, books)
|
||||
runPagerank(G)
|
||||
|
||||
ratedBooks = [n for n in list(
|
||||
G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None]
|
||||
errSq = []
|
||||
gradient = {}
|
||||
for wt in weights:
|
||||
gradient[wt] = 0
|
||||
mu, sigma = genScores(G, books)
|
||||
for b in G.nodes:
|
||||
batch = ratedBooks
|
||||
if b in batch:
|
||||
rating = G.nodes[b]['rating']
|
||||
G.nodes[b]['rating'] = None
|
||||
_, _ = genScores(G, books, calcPagerank=False)
|
||||
G.nodes[b]['_test_score'] = G.nodes[b]['score']
|
||||
G.nodes[b]['rating'] = rating
|
||||
G.nodes[b]['dissonance_off'] = rating - G.nodes[b]['score']
|
||||
G.nodes[b]['dissonance_abs'] = abs(rating - G.nodes[b]['score'])
|
||||
|
||||
return G
|
||||
|
||||
|
||||
def describeDissonance(books, num=-1, sortKey='dissonance_abs', sortDir=True):
|
||||
bestlist = []
|
||||
G = calcDissonance(books)
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
if'dissonance_abs' in node:
|
||||
bestlist.append(node)
|
||||
bestlist.sort(key=lambda node: node[sortKey], reverse=sortDir)
|
||||
for i, book in enumerate(bestlist):
|
||||
line = book['title'] + " ("+" & ".join(book['authors'])+")" + \
|
||||
": You: {:.5f}, AI: {:.5f}, Delta: {:.5f}".format(
|
||||
book['rating'], book['_test_score'], book['dissonance_off'])
|
||||
print("["+str(i+1).zfill(int((math.log10(num) if num != -1 else 3)+1))+"] "+line)
|
||||
if num != -1 and i == num-1:
|
||||
break
|
||||
|
||||
|
||||
def train(initGamma, full=True):
|
||||
global weights
|
||||
if full:
|
||||
@ -1367,18 +1500,22 @@ def train(initGamma, full=True):
|
||||
break
|
||||
print('Done.')
|
||||
|
||||
|
||||
def saveWeights(weights):
|
||||
with open('neuralWeights.json', 'w') as f:
|
||||
f.write(json.dumps(weights))
|
||||
|
||||
|
||||
def loadWeights():
|
||||
try:
|
||||
with open('neuralWeights.json', 'r') as f:
|
||||
weights = json.loads(f.read())
|
||||
except IOError:
|
||||
weights = {"topList": 0.15, "recommender": 0.30, "author": 0.70, "series": 0.05, "tag": 0.05, "pagerank": 0.05, "mu": 0.50, "sigma": 0.30, "bias": 0.25, "median": 0.10} #, "tgb_rank": 0.10}
|
||||
weights = {"topList": 0.15, "recommender": 0.30, "author": 0.70, "series": 0.05, "tag": 0.05,
|
||||
"pagerank": 0.05, "mu": 0.50, "sigma": 0.30, "bias": 0.25, "median": 0.10} # , "tgb_rank": 0.10}
|
||||
return weights
|
||||
|
||||
|
||||
def cliInterface(imgDef=False):
|
||||
import argparse
|
||||
|
||||
@ -1403,13 +1540,16 @@ def cliInterface(imgDef=False):
|
||||
cmds = parser.add_subparsers(required=True, dest='cmd')
|
||||
|
||||
p_rec = cmds.add_parser('recommend', description="TODO", aliases=['rec'])
|
||||
p_rec.add_argument('-n', type=int, default=20, help='number of books to recommend')
|
||||
p_rec.add_argument('-n', type=int, default=20,
|
||||
help='number of books to recommend')
|
||||
p_rec.add_argument('--tag-based', action="store_true")
|
||||
p_rec.add_argument('--recommender-based', action="store_true")
|
||||
p_rec.add_argument('--new', type=int, default=-1, help='number of new books to recommend')
|
||||
p_rec.add_argument('--new', type=int, default=-1,
|
||||
help='number of new books to recommend')
|
||||
|
||||
p_rec = cmds.add_parser('listScores', description="TODO", aliases=['ls'])
|
||||
p_rec.add_argument('-n', type=int, default=50, help='number of books to recommend')
|
||||
p_rec.add_argument('-n', type=int, default=50,
|
||||
help='number of books to recommend')
|
||||
|
||||
p_read = cmds.add_parser('read', description="TODO", aliases=[])
|
||||
p_read.add_argument('--min-rating', type=int, default=0)
|
||||
@ -1417,27 +1557,40 @@ def cliInterface(imgDef=False):
|
||||
p_read.add_argument('--only-connected', action="store_true")
|
||||
|
||||
p_show = cmds.add_parser('analyze', description="TODO", aliases=[])
|
||||
p_show.add_argument('type', choices=['any', 'book', 'recommender', 'author', 'series', 'tag'])
|
||||
p_show.add_argument(
|
||||
'type', choices=['any', 'book', 'recommender', 'author', 'series', 'tag'])
|
||||
p_show.add_argument('name', type=str)
|
||||
p_show.add_argument('-d', type=float, default=2.1, help='depth of expansion')
|
||||
p_show.add_argument('-d', type=float, default=2.1,
|
||||
help='depth of expansion')
|
||||
|
||||
p_train = cmds.add_parser('train', description="TODO", aliases=[])
|
||||
p_train.add_argument('-g', type=float, default=0.2, help='learning rate gamma')
|
||||
p_train.add_argument('-g', type=float, default=0.2,
|
||||
help='learning rate gamma')
|
||||
p_train.add_argument('--full', action="store_true")
|
||||
|
||||
p_prog = cmds.add_parser('progress', description="TODO", aliases=[])
|
||||
p_prog.add_argument('-m', type=float, default=7, help='Mimimum Score to read')
|
||||
p_prog.add_argument('-m', type=float, default=7,
|
||||
help='Mimimum Score to read')
|
||||
|
||||
p_comp = cmds.add_parser('competence', description="TODO", aliases=[])
|
||||
|
||||
p_shell = cmds.add_parser('shell', description="TODO", aliases=[])
|
||||
|
||||
p_new = cmds.add_parser('newBooks', description="TODO", aliases=[])
|
||||
p_new.add_argument('-n', type=int, default=10, help='number of books to recommend')
|
||||
p_new.add_argument('-n', type=int, default=10,
|
||||
help='number of books to recommend')
|
||||
|
||||
p_col = cmds.add_parser('calice', description="TODO", aliases=[])
|
||||
p_cal = cmds.add_parser('calice', description="TODO", aliases=[])
|
||||
|
||||
p_createCol = cmds.add_parser('createCaliceColumn', description="TODO", aliases=[])
|
||||
p_dis = cmds.add_parser('dissonance', description="TODO", aliases=['dis'])
|
||||
p_dis.add_argument('-n', type=int, default=-1,
|
||||
help='Maximum number of books to lost')
|
||||
p_dis.add_argument(
|
||||
'--sort', choices=['dissonance_abs', 'dissonance_off', 'score'], default='dissonance_abs', const='dissonance_abs', nargs='?')
|
||||
p_dis.add_argument('--reversed', action="store_true")
|
||||
|
||||
p_createCol = cmds.add_parser(
|
||||
'createCaliceColumn', description="TODO", aliases=[])
|
||||
p_createCol.add_argument('type', choices=['score', 'rating', 'both'])
|
||||
|
||||
p_full = cmds.add_parser('full', description="TODO", aliases=[])
|
||||
@ -1452,6 +1605,7 @@ def cliInterface(imgDef=False):
|
||||
else:
|
||||
mainCLI(args)
|
||||
|
||||
|
||||
def perfTestCLI(args):
|
||||
import time
|
||||
from pycallgraph import PyCallGraph
|
||||
@ -1466,6 +1620,7 @@ def perfTestCLI(args):
|
||||
with PyCallGraph(output=GraphvizOutput(output_file='perfTests/' + str(int(time.time())) + '.png'), config=config):
|
||||
mainCLI(args)
|
||||
|
||||
|
||||
def mainCLI(args):
|
||||
if args.cmd == "train":
|
||||
train(args.g, args.full)
|
||||
@ -1482,7 +1637,6 @@ def mainCLI(args):
|
||||
if not args.keep_whitepapers:
|
||||
removeWhitepapers(G)
|
||||
|
||||
|
||||
if args.cmd == "recommend":
|
||||
if args.new == -1:
|
||||
args.new = int(args.n / 5)
|
||||
@ -1490,16 +1644,21 @@ def mainCLI(args):
|
||||
findNewBooks(G, books, mu, args.new, minRecSco=mu-std)
|
||||
if args.tag_based:
|
||||
if args.recommender_based:
|
||||
raise Exception('tag-based and recommender-based can not be be combined')
|
||||
recommendNBooksTagBased(G, mu, std, args.n, not args.keep_top_lists)
|
||||
raise Exception(
|
||||
'tag-based and recommender-based can not be be combined')
|
||||
recommendNBooksTagBased(
|
||||
G, mu, std, args.n, not args.keep_top_lists)
|
||||
elif args.recommender_based:
|
||||
recommendNBooksRecommenderBased(G, mu, std, args.n, not args.keep_top_lists, not args.keep_useless_recommenders)
|
||||
recommendNBooksRecommenderBased(
|
||||
G, mu, std, args.n, not args.keep_top_lists, not args.keep_useless_recommenders)
|
||||
else:
|
||||
recommendNBooks(G, mu, std, args.n, not args.keep_top_lists, not args.keep_useless_recommenders, args.v3d)
|
||||
recommendNBooks(G, mu, std, args.n, not args.keep_top_lists,
|
||||
not args.keep_useless_recommenders, args.v3d)
|
||||
elif args.cmd == "listScores":
|
||||
listScores(G, mu, std, args.n)
|
||||
elif args.cmd == "read":
|
||||
readBooksAnalysis(G, args.min_rating, args.all_tags, args.only_connected, not args.keep_top_lists)
|
||||
readBooksAnalysis(G, args.min_rating, args.all_tags,
|
||||
args.only_connected, not args.keep_top_lists)
|
||||
elif args.cmd == "analyze":
|
||||
analyze(G, books, mu, args.type, args.name, args.d)
|
||||
elif args.cmd == "full":
|
||||
@ -1518,6 +1677,9 @@ def mainCLI(args):
|
||||
elif args.cmd == "calice":
|
||||
calice(G)
|
||||
exit()
|
||||
elif args.cmd == "dissonance":
|
||||
describeDissonance(books, args.n, args.sort, not args.reversed)
|
||||
exit()
|
||||
elif args.cmd == "createCaliceColumn":
|
||||
if args.type in ['score', 'both']:
|
||||
calibreDB.createCaliceScoreColumn()
|
||||
@ -1525,12 +1687,12 @@ def mainCLI(args):
|
||||
if args.type in ['rating', 'both']:
|
||||
calibreDB.createCaliceRatingColumn()
|
||||
print('[*] Column "Calice Rating" was created.')
|
||||
print('[i] To allow displaying half-stars, please active them manually in the calibre-settings.')
|
||||
print(
|
||||
'[i] To allow displaying half-stars, please active them manually in the calibre-settings.')
|
||||
exit()
|
||||
else:
|
||||
raise Exception("Bad")
|
||||
|
||||
|
||||
if not args.keep_priv:
|
||||
removePriv(G)
|
||||
if args.remove_read:
|
||||
|
Loading…
Reference in New Issue
Block a user