New feature: Dissonance
This commit is contained in:
parent
6af38c686f
commit
1c34d2876f
268
caliGraph.py
268
caliGraph.py
@ -18,9 +18,11 @@ import plotly.graph_objects as go
|
|||||||
|
|
||||||
import wikipedia
|
import wikipedia
|
||||||
|
|
||||||
|
|
||||||
class Error(Exception):
|
class Error(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def getAllAuthors(books):
|
def getAllAuthors(books):
|
||||||
authors = set()
|
authors = set()
|
||||||
for book in books:
|
for book in books:
|
||||||
@ -131,6 +133,7 @@ def removePriv(G):
|
|||||||
if 'priv' in node['tags']:
|
if 'priv' in node['tags']:
|
||||||
G.remove_node(n)
|
G.remove_node(n)
|
||||||
|
|
||||||
|
|
||||||
def removeWhitepapers(G):
|
def removeWhitepapers(G):
|
||||||
for n in list(G.nodes):
|
for n in list(G.nodes):
|
||||||
node = G.nodes[n]
|
node = G.nodes[n]
|
||||||
@ -146,6 +149,7 @@ def removeDangling(G, alsoBooks=False):
|
|||||||
if not len(G.adj[n]):
|
if not len(G.adj[n]):
|
||||||
G.remove_node(n)
|
G.remove_node(n)
|
||||||
|
|
||||||
|
|
||||||
def removeThinRecs(G, minCons=3):
|
def removeThinRecs(G, minCons=3):
|
||||||
for n in list(G.nodes):
|
for n in list(G.nodes):
|
||||||
node = G.nodes[n]
|
node = G.nodes[n]
|
||||||
@ -153,6 +157,7 @@ def removeThinRecs(G, minCons=3):
|
|||||||
if not len(G.adj[n]) >= minCons:
|
if not len(G.adj[n]) >= minCons:
|
||||||
G.remove_node(n)
|
G.remove_node(n)
|
||||||
|
|
||||||
|
|
||||||
def removeEdge(G):
|
def removeEdge(G):
|
||||||
for n in list(G.nodes):
|
for n in list(G.nodes):
|
||||||
node = G.nodes[n]
|
node = G.nodes[n]
|
||||||
@ -256,6 +261,7 @@ def pruneRecommenderCons(G, maxCons=5):
|
|||||||
if foundCon < 2:
|
if foundCon < 2:
|
||||||
G.remove_node(m)
|
G.remove_node(m)
|
||||||
|
|
||||||
|
|
||||||
def pruneAuthorCons(G, maxCons=3):
|
def pruneAuthorCons(G, maxCons=3):
|
||||||
for n in list(G.nodes):
|
for n in list(G.nodes):
|
||||||
node = G.nodes[n]
|
node = G.nodes[n]
|
||||||
@ -281,6 +287,7 @@ def pruneAuthorCons(G, maxCons=3):
|
|||||||
if foundCon < 2:
|
if foundCon < 2:
|
||||||
G.remove_node(m)
|
G.remove_node(m)
|
||||||
|
|
||||||
|
|
||||||
def removeHighSpanTags(G, maxCons=5):
|
def removeHighSpanTags(G, maxCons=5):
|
||||||
for n in list(G.nodes):
|
for n in list(G.nodes):
|
||||||
node = G.nodes[n]
|
node = G.nodes[n]
|
||||||
@ -303,24 +310,28 @@ def removeTopLists(G):
|
|||||||
if node['t'] == 'topList':
|
if node['t'] == 'topList':
|
||||||
G.remove_node(n)
|
G.remove_node(n)
|
||||||
|
|
||||||
|
|
||||||
def removeRecommenders(G):
|
def removeRecommenders(G):
|
||||||
for n in list(G.nodes):
|
for n in list(G.nodes):
|
||||||
node = G.nodes[n]
|
node = G.nodes[n]
|
||||||
if node['t'] == 'recommender':
|
if node['t'] == 'recommender':
|
||||||
G.remove_node(n)
|
G.remove_node(n)
|
||||||
|
|
||||||
|
|
||||||
def removeAuthors(G):
|
def removeAuthors(G):
|
||||||
for n in list(G.nodes):
|
for n in list(G.nodes):
|
||||||
node = G.nodes[n]
|
node = G.nodes[n]
|
||||||
if node['t'] == 'author':
|
if node['t'] == 'author':
|
||||||
G.remove_node(n)
|
G.remove_node(n)
|
||||||
|
|
||||||
|
|
||||||
def removeSeries(G):
|
def removeSeries(G):
|
||||||
for n in list(G.nodes):
|
for n in list(G.nodes):
|
||||||
node = G.nodes[n]
|
node = G.nodes[n]
|
||||||
if node['t'] == 'series':
|
if node['t'] == 'series':
|
||||||
G.remove_node(n)
|
G.remove_node(n)
|
||||||
|
|
||||||
|
|
||||||
def removeRestOfSeries(G):
|
def removeRestOfSeries(G):
|
||||||
for n in list(G.nodes):
|
for n in list(G.nodes):
|
||||||
node = G.nodes[n]
|
node = G.nodes[n]
|
||||||
@ -336,6 +347,7 @@ def removeRestOfSeries(G):
|
|||||||
if adjNode['series_index'] > seriesState + 1.0001:
|
if adjNode['series_index'] > seriesState + 1.0001:
|
||||||
G.remove_node(adj)
|
G.remove_node(adj)
|
||||||
|
|
||||||
|
|
||||||
def removeUnusedRecommenders(G):
|
def removeUnusedRecommenders(G):
|
||||||
for n in list(G.nodes):
|
for n in list(G.nodes):
|
||||||
node = G.nodes[n]
|
node = G.nodes[n]
|
||||||
@ -347,6 +359,7 @@ def removeUnusedRecommenders(G):
|
|||||||
else: # No unrated recommendation
|
else: # No unrated recommendation
|
||||||
G.remove_node(n)
|
G.remove_node(n)
|
||||||
|
|
||||||
|
|
||||||
def removeUselessReadBooks(G):
|
def removeUselessReadBooks(G):
|
||||||
minForce = 1.5
|
minForce = 1.5
|
||||||
minContact = 2
|
minContact = 2
|
||||||
@ -368,6 +381,7 @@ def removeUselessReadBooks(G):
|
|||||||
if force < minForce or contacts < minContact:
|
if force < minForce or contacts < minContact:
|
||||||
G.remove_node(n)
|
G.remove_node(n)
|
||||||
|
|
||||||
|
|
||||||
def removeUselessTags(G, minUnread=1):
|
def removeUselessTags(G, minUnread=1):
|
||||||
for n in list(G.nodes):
|
for n in list(G.nodes):
|
||||||
node = G.nodes[n]
|
node = G.nodes[n]
|
||||||
@ -380,6 +394,7 @@ def removeUselessTags(G, minUnread=1):
|
|||||||
if foundUnread < minUnread:
|
if foundUnread < minUnread:
|
||||||
G.remove_node(n)
|
G.remove_node(n)
|
||||||
|
|
||||||
|
|
||||||
def removeUselessSeries(G, minSco=0):
|
def removeUselessSeries(G, minSco=0):
|
||||||
for n in list(G.nodes):
|
for n in list(G.nodes):
|
||||||
node = G.nodes[n]
|
node = G.nodes[n]
|
||||||
@ -387,6 +402,7 @@ def removeUselessSeries(G, minSco=0):
|
|||||||
if len(G.adj[n]) < 2 or node['score'] < minSco:
|
if len(G.adj[n]) < 2 or node['score'] < minSco:
|
||||||
G.remove_node(n)
|
G.remove_node(n)
|
||||||
|
|
||||||
|
|
||||||
def scoreOpinions(G, globMu, globStd):
|
def scoreOpinions(G, globMu, globStd):
|
||||||
for n in list(G.nodes):
|
for n in list(G.nodes):
|
||||||
node = G.nodes[n]
|
node = G.nodes[n]
|
||||||
@ -406,6 +422,7 @@ def scoreOpinions(G, globMu, globStd):
|
|||||||
else:
|
else:
|
||||||
node['score'] = None
|
node['score'] = None
|
||||||
|
|
||||||
|
|
||||||
def scoreUnread(G, globMu, globStd):
|
def scoreUnread(G, globMu, globStd):
|
||||||
for n in list(G.nodes):
|
for n in list(G.nodes):
|
||||||
feedbacks = [globMu]
|
feedbacks = [globMu]
|
||||||
@ -417,13 +434,15 @@ def scoreUnread(G, globMu, globStd):
|
|||||||
for adj in adjacens:
|
for adj in adjacens:
|
||||||
adjNode = G.nodes[adj]
|
adjNode = G.nodes[adj]
|
||||||
if 'score' in adjNode and adjNode['score'] != None:
|
if 'score' in adjNode and adjNode['score'] != None:
|
||||||
w = [adjNode['t'], G[n][adj]['weight'] if 'weight' in G[n][adj] else 1]
|
w = [adjNode['t'], G[n][adj]['weight']
|
||||||
|
if 'weight' in G[n][adj] else 1]
|
||||||
for fb in adjNode['feedbacks']:
|
for fb in adjNode['feedbacks']:
|
||||||
feedbacks.append(fb)
|
feedbacks.append(fb)
|
||||||
ws.append(w)
|
ws.append(w)
|
||||||
if len(feedbacks):
|
if len(feedbacks):
|
||||||
node['mean'], node['std'] = norm.fit(feedbacks)
|
node['mean'], node['std'] = norm.fit(feedbacks)
|
||||||
node['median'] = np.percentile(feedbacks, [50], method='linear')[0]
|
node['median'] = np.percentile(
|
||||||
|
feedbacks, [50], method='linear')[0]
|
||||||
node['se'] = globStd / math.sqrt(len(feedbacks))
|
node['se'] = globStd / math.sqrt(len(feedbacks))
|
||||||
feedbacks.append(node['pagerank_score'])
|
feedbacks.append(node['pagerank_score'])
|
||||||
ws.append(['pagerank'])
|
ws.append(['pagerank'])
|
||||||
@ -437,15 +456,18 @@ def scoreUnread(G, globMu, globStd):
|
|||||||
# ws.append(['se'])
|
# ws.append(['se'])
|
||||||
feedbacks.append(globMu)
|
feedbacks.append(globMu)
|
||||||
ws.append(['bias'])
|
ws.append(['bias'])
|
||||||
node['score'] = sum([fb*getWeightForType(w[0], w[1] if len(w)>1 else 1) for fb, w in zip(feedbacks, ws)])/sum([getWeightForType(w[0], w[1] if len(w)>1 else 1) for w in ws])
|
node['score'] = sum([fb*getWeightForType(w[0], w[1] if len(w) > 1 else 1) for fb, w in zip(
|
||||||
|
feedbacks, ws)])/sum([getWeightForType(w[0], w[1] if len(w) > 1 else 1) for w in ws])
|
||||||
node['_act'] = feedbacks
|
node['_act'] = feedbacks
|
||||||
node['_wgh'] = ws
|
node['_wgh'] = ws
|
||||||
else:
|
else:
|
||||||
node['score'] = globMu + errorFac*globStd + len(feedbacks)*0.0000000001
|
node['score'] = globMu + errorFac * \
|
||||||
|
globStd + len(feedbacks)*0.0000000001
|
||||||
if 'series' in node:
|
if 'series' in node:
|
||||||
if node['series_index'] == 1.0:
|
if node['series_index'] == 1.0:
|
||||||
node['score'] += 0.000000001
|
node['score'] += 0.000000001
|
||||||
|
|
||||||
|
|
||||||
def getWeightForType(nodeType, edgeWeight=1):
|
def getWeightForType(nodeType, edgeWeight=1):
|
||||||
global weights
|
global weights
|
||||||
w = weights[nodeType]
|
w = weights[nodeType]
|
||||||
@ -454,6 +476,7 @@ def getWeightForType(nodeType, edgeWeight=1):
|
|||||||
else:
|
else:
|
||||||
return w
|
return w
|
||||||
|
|
||||||
|
|
||||||
def printBestList(G, t='book', num=-1):
|
def printBestList(G, t='book', num=-1):
|
||||||
bestlist = []
|
bestlist = []
|
||||||
for n in list(G.nodes):
|
for n in list(G.nodes):
|
||||||
@ -461,10 +484,12 @@ def printBestList(G, t='book', num=-1):
|
|||||||
if node['t'] == t:
|
if node['t'] == t:
|
||||||
if 'score' in node and node['score'] != None:
|
if 'score' in node and node['score'] != None:
|
||||||
bestlist.append(node)
|
bestlist.append(node)
|
||||||
bestlist.sort(key=lambda node: node['score'] + 0.00001*(node['se'] if 'se' in node else 0), reverse=True)
|
bestlist.sort(key=lambda node: node['score'] + 0.00001 *
|
||||||
|
(node['se'] if 'se' in node else 0), reverse=True)
|
||||||
for i, book in enumerate(bestlist):
|
for i, book in enumerate(bestlist):
|
||||||
if t == 'book':
|
if t == 'book':
|
||||||
line = book['title'] + " ("+" & ".join(book['authors'])+")"+": {:.5f}".format(book['score'])
|
line = book['title'] + " ("+" & ".join(book['authors'])+")" + \
|
||||||
|
": {:.5f}".format(book['score'])
|
||||||
else:
|
else:
|
||||||
line = book['label']
|
line = book['label']
|
||||||
print("["+str(i+1).zfill(int((math.log10(num) if num != -1 else 3)+1))+"] "+line)
|
print("["+str(i+1).zfill(int((math.log10(num) if num != -1 else 3)+1))+"] "+line)
|
||||||
@ -478,12 +503,14 @@ def readColor(book):
|
|||||||
else:
|
else:
|
||||||
return 'gray'
|
return 'gray'
|
||||||
|
|
||||||
|
|
||||||
def loadBooksFromDB():
|
def loadBooksFromDB():
|
||||||
books = calibreDB.getBooks()
|
books = calibreDB.getBooks()
|
||||||
infuseDataFromMRB(books)
|
infuseDataFromMRB(books)
|
||||||
# infuseDataFromTGB(books)
|
# infuseDataFromTGB(books)
|
||||||
return books
|
return books
|
||||||
|
|
||||||
|
|
||||||
def mrbGetBook(mrbdf, title, authors):
|
def mrbGetBook(mrbdf, title, authors):
|
||||||
title = title.split('(')[0]
|
title = title.split('(')[0]
|
||||||
title = title.replace('*', '')
|
title = title.replace('*', '')
|
||||||
@ -499,6 +526,7 @@ def mrbGetBook(mrbdf, title, authors):
|
|||||||
return d
|
return d
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def tgbGetBook(df, title, authors):
|
def tgbGetBook(df, title, authors):
|
||||||
title = title.split('(')[0]
|
title = title.split('(')[0]
|
||||||
title = title.replace('*', '')
|
title = title.replace('*', '')
|
||||||
@ -514,6 +542,7 @@ def tgbGetBook(df, title, authors):
|
|||||||
return d
|
return d
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def infuseDataFromMRB(books):
|
def infuseDataFromMRB(books):
|
||||||
mrbdf = pd.read_csv('rec_dbs/mrb_db.csv')
|
mrbdf = pd.read_csv('rec_dbs/mrb_db.csv')
|
||||||
for book in books:
|
for book in books:
|
||||||
@ -522,6 +551,7 @@ def infuseDataFromMRB(books):
|
|||||||
for rec in str(mrb['recommender']).split('|'):
|
for rec in str(mrb['recommender']).split('|'):
|
||||||
book['tags'] += [rec + ':MRB']
|
book['tags'] += [rec + ':MRB']
|
||||||
|
|
||||||
|
|
||||||
def infuseDataFromTGB(books):
|
def infuseDataFromTGB(books):
|
||||||
for i in range(1, 3):
|
for i in range(1, 3):
|
||||||
df = pd.read_csv('rec_dbs/tgb_'+str(i)+'.csv')
|
df = pd.read_csv('rec_dbs/tgb_'+str(i)+'.csv')
|
||||||
@ -530,12 +560,14 @@ def infuseDataFromTGB(books):
|
|||||||
if tgb:
|
if tgb:
|
||||||
book['tgb_rank'] = int(tgb['id'])
|
book['tgb_rank'] = int(tgb['id'])
|
||||||
|
|
||||||
|
|
||||||
class calibreDB():
|
class calibreDB():
|
||||||
@classmethod
|
@classmethod
|
||||||
def _getTxt(cls, request):
|
def _getTxt(cls, request):
|
||||||
ret = os.popen("calibredb "+request).read()
|
ret = os.popen("calibredb "+request).read()
|
||||||
if not ret:
|
if not ret:
|
||||||
raise Error('Unable to connect to CalibreDB. Please close all open instances of Calibre.')
|
raise Error(
|
||||||
|
'Unable to connect to CalibreDB. Please close all open instances of Calibre.')
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -557,7 +589,8 @@ class calibreDB():
|
|||||||
cols = cls.getCustomColumns()
|
cols = cls.getCustomColumns()
|
||||||
avai = ['calice_score' in cols, 'calice_rating' in cols]
|
avai = ['calice_score' in cols, 'calice_rating' in cols]
|
||||||
if not any(avai):
|
if not any(avai):
|
||||||
raise Error('Custom Columns missing from CalibreDB. Create columns for "Calice Score" and/or "Calice Rating" using the "createCaliceColumn" command.')
|
raise Error(
|
||||||
|
'Custom Columns missing from CalibreDB. Create columns for "Calice Score" and/or "Calice Rating" using the "createCaliceColumn" command.')
|
||||||
return avai
|
return avai
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -586,9 +619,12 @@ class calibreDB():
|
|||||||
cls._getTxt('set_custom calice_score '+str(bookId)+' ""')
|
cls._getTxt('set_custom calice_score '+str(bookId)+' ""')
|
||||||
else:
|
else:
|
||||||
if sco:
|
if sco:
|
||||||
cls._getTxt('set_custom calice_score '+str(bookId)+' '+str(round(score,5)))
|
cls._getTxt('set_custom calice_score ' +
|
||||||
|
str(bookId)+' '+str(round(score, 5)))
|
||||||
if rat:
|
if rat:
|
||||||
cls._getTxt('set_custom calice_rating '+str(bookId)+' '+str(int(round(score))))
|
cls._getTxt('set_custom calice_rating ' +
|
||||||
|
str(bookId)+' '+str(int(round(score))))
|
||||||
|
|
||||||
|
|
||||||
def calice(G):
|
def calice(G):
|
||||||
scores = {}
|
scores = {}
|
||||||
@ -602,10 +638,12 @@ def calice(G):
|
|||||||
calibreDB.writeCaliceColumnMultiple(scores)
|
calibreDB.writeCaliceColumnMultiple(scores)
|
||||||
print('Done.')
|
print('Done.')
|
||||||
|
|
||||||
|
|
||||||
def remove_html_tags(text):
|
def remove_html_tags(text):
|
||||||
clean = re.compile('<.*?>')
|
clean = re.compile('<.*?>')
|
||||||
return re.sub(clean, '', text)
|
return re.sub(clean, '', text)
|
||||||
|
|
||||||
|
|
||||||
def getKeywords(txt, rake):
|
def getKeywords(txt, rake):
|
||||||
txt = remove_html_tags(txt)
|
txt = remove_html_tags(txt)
|
||||||
k = []
|
k = []
|
||||||
@ -624,6 +662,7 @@ def getKeywords(txt,rake):
|
|||||||
return k
|
return k
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
def runPagerank(G):
|
def runPagerank(G):
|
||||||
try:
|
try:
|
||||||
scores = nx.pagerank(G=G)
|
scores = nx.pagerank(G=G)
|
||||||
@ -634,6 +673,7 @@ def runPagerank(G):
|
|||||||
for n in list(G.nodes):
|
for n in list(G.nodes):
|
||||||
G.nodes[n]['pagerank_score'] = scores[n] if n in scores else 0
|
G.nodes[n]['pagerank_score'] = scores[n] if n in scores else 0
|
||||||
|
|
||||||
|
|
||||||
def buildBookGraph(books, darkMode=False, extractKeywords=True, mergeTags=True):
|
def buildBookGraph(books, darkMode=False, extractKeywords=True, mergeTags=True):
|
||||||
G = nx.Graph()
|
G = nx.Graph()
|
||||||
if extractKeywords:
|
if extractKeywords:
|
||||||
@ -652,7 +692,8 @@ def buildBookGraph(books, darkMode=False, extractKeywords=True, mergeTags=True):
|
|||||||
else:
|
else:
|
||||||
desc = ''
|
desc = ''
|
||||||
if 'comments' in book and extractKeywords:
|
if 'comments' in book and extractKeywords:
|
||||||
sanitized = re.sub(r'[^a-zA-Z0-9\s\.äöü]+', '', book['comments']).replace('\n',' ')
|
sanitized = re.sub(r'[^a-zA-Z0-9\s\.äöü]+',
|
||||||
|
'', book['comments']).replace('\n', ' ')
|
||||||
keywords = getKeywords(sanitized, rake)
|
keywords = getKeywords(sanitized, rake)
|
||||||
else:
|
else:
|
||||||
keywords = []
|
keywords = []
|
||||||
@ -664,10 +705,12 @@ def buildBookGraph(books, darkMode=False, extractKeywords=True, mergeTags=True):
|
|||||||
else:
|
else:
|
||||||
series = None
|
series = None
|
||||||
series_index = None
|
series_index = None
|
||||||
G.add_node(book['id'], t='book', label=book['title'], title=book['title'], shape='image', image=book['cover'], rating=rating, tags=tags, keywords=keywords, desc=desc, isbn=book['isbn'], files=book['formats'], authors=getAuthors(book), series=series, series_index=series_index, calibreID=book['id'])
|
G.add_node(book['id'], t='book', label=book['title'], title=book['title'], shape='image', image=book['cover'], rating=rating, tags=tags, keywords=keywords,
|
||||||
|
desc=desc, isbn=book['isbn'], files=book['formats'], authors=getAuthors(book), series=series, series_index=series_index, calibreID=book['id'])
|
||||||
|
|
||||||
return G
|
return G
|
||||||
|
|
||||||
|
|
||||||
def getWikiImage(search_term):
|
def getWikiImage(search_term):
|
||||||
from fuzzywuzzy import fuzz
|
from fuzzywuzzy import fuzz
|
||||||
WIKI_REQUEST = 'http://en.wikipedia.org/w/api.php?action=query&prop=pageimages&format=json&piprop=original&titles='
|
WIKI_REQUEST = 'http://en.wikipedia.org/w/api.php?action=query&prop=pageimages&format=json&piprop=original&titles='
|
||||||
@ -681,12 +724,14 @@ def getWikiImage(search_term):
|
|||||||
title = wkpage.title
|
title = wkpage.title
|
||||||
response = requests.get(WIKI_REQUEST+title)
|
response = requests.get(WIKI_REQUEST+title)
|
||||||
json_data = json.loads(response.text)
|
json_data = json.loads(response.text)
|
||||||
img_link = list(json_data['query']['pages'].values())[0]['original']['source']
|
img_link = list(json_data['query']['pages'].values())[
|
||||||
|
0]['original']['source']
|
||||||
return img_link
|
return img_link
|
||||||
except:
|
except:
|
||||||
print('[!] No match for '+search_term+' on WikiPedia...')
|
print('[!] No match for '+search_term+' on WikiPedia...')
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def graphAddAuthors(G, books, darkMode=False):
|
def graphAddAuthors(G, books, darkMode=False):
|
||||||
for author in getAllAuthors(books):
|
for author in getAllAuthors(books):
|
||||||
G.add_node('a/'+author, color='green', t='author', label=author)
|
G.add_node('a/'+author, color='green', t='author', label=author)
|
||||||
@ -695,6 +740,7 @@ def graphAddAuthors(G, books, darkMode=False):
|
|||||||
G.add_edge('a/'+author, book['id'], color=readColor(book))
|
G.add_edge('a/'+author, book['id'], color=readColor(book))
|
||||||
return G
|
return G
|
||||||
|
|
||||||
|
|
||||||
def graphAddRecommenders(G, books, darkMode=False):
|
def graphAddRecommenders(G, books, darkMode=False):
|
||||||
for rec in getAllRecommenders(books):
|
for rec in getAllRecommenders(books):
|
||||||
G.add_node('r/'+rec, color='orange', t='recommender', label=rec)
|
G.add_node('r/'+rec, color='orange', t='recommender', label=rec)
|
||||||
@ -703,6 +749,7 @@ def graphAddRecommenders(G, books, darkMode=False):
|
|||||||
G.add_edge('r/'+rec, book['id'], color=readColor(book))
|
G.add_edge('r/'+rec, book['id'], color=readColor(book))
|
||||||
return G
|
return G
|
||||||
|
|
||||||
|
|
||||||
def graphAddTopLists(G, books, darkMode=False):
|
def graphAddTopLists(G, books, darkMode=False):
|
||||||
for tl in getAllTopLists(books):
|
for tl in getAllTopLists(books):
|
||||||
G.add_node('t/'+tl, color='yellow', t='topList', label=tl)
|
G.add_node('t/'+tl, color='yellow', t='topList', label=tl)
|
||||||
@ -715,7 +762,8 @@ def graphAddTopLists(G, books, darkMode=False):
|
|||||||
|
|
||||||
def graphAddSeries(G, books, darkMode=False):
|
def graphAddSeries(G, books, darkMode=False):
|
||||||
for series in getAllSeries(books):
|
for series in getAllSeries(books):
|
||||||
G.add_node('s/'+series, color='red', t='series', label=series, shape='triangle')
|
G.add_node('s/'+series, color='red', t='series',
|
||||||
|
label=series, shape='triangle')
|
||||||
for book in books:
|
for book in books:
|
||||||
if 'series' in book:
|
if 'series' in book:
|
||||||
G.add_edge('s/'+book['series'], book['id'], color=readColor(book))
|
G.add_edge('s/'+book['series'], book['id'], color=readColor(book))
|
||||||
@ -724,7 +772,8 @@ def graphAddSeries(G, books, darkMode=False):
|
|||||||
|
|
||||||
def graphAddTags(G, books, darkMode=False):
|
def graphAddTags(G, books, darkMode=False):
|
||||||
for tag in getAllTags(books):
|
for tag in getAllTags(books):
|
||||||
G.add_node('t/'+tag, color=['lightGray','darkgray'][darkMode], t='tag', label=tag, shape='box')
|
G.add_node('t/'+tag, color=['lightGray', 'darkgray']
|
||||||
|
[darkMode], t='tag', label=tag, shape='box')
|
||||||
for book in books:
|
for book in books:
|
||||||
for tag in getTags(book):
|
for tag in getTags(book):
|
||||||
G.add_edge('t/'+tag, book['id'], color=readColor(book))
|
G.add_edge('t/'+tag, book['id'], color=readColor(book))
|
||||||
@ -770,7 +819,8 @@ def addScoreToLabels(G):
|
|||||||
node['label'] += " ("+str(node['rating'])+")"
|
node['label'] += " ("+str(node['rating'])+")"
|
||||||
else:
|
else:
|
||||||
if 'score' in node and node['score'] != None and 'se' in node:
|
if 'score' in node and node['score'] != None and 'se' in node:
|
||||||
node['label'] += " ({:.2f}±{:.1f})".format(node['score'], node['se'])
|
node['label'] += " ({:.2f}±{:.1f})".format(
|
||||||
|
node['score'], node['se'])
|
||||||
else:
|
else:
|
||||||
node['label'] += " (0±∞)"
|
node['label'] += " (0±∞)"
|
||||||
|
|
||||||
@ -870,6 +920,7 @@ def genAndShow3D(G, darkMode=False):
|
|||||||
|
|
||||||
fig.show()
|
fig.show()
|
||||||
|
|
||||||
|
|
||||||
def buildFullGraph(darkMode=False):
|
def buildFullGraph(darkMode=False):
|
||||||
books = loadBooksFromDB()
|
books = loadBooksFromDB()
|
||||||
G = buildBookGraph(books, darkMode=darkMode)
|
G = buildBookGraph(books, darkMode=darkMode)
|
||||||
@ -890,6 +941,7 @@ def genScores(G, books, calcPagerank=True):
|
|||||||
scoreUnread(G, globMu, globStd)
|
scoreUnread(G, globMu, globStd)
|
||||||
return globMu, globStd
|
return globMu, globStd
|
||||||
|
|
||||||
|
|
||||||
def addImageToNode(node, cache, shape='circularImage'):
|
def addImageToNode(node, cache, shape='circularImage'):
|
||||||
name = node['label'].split(' (')[0].replace('*', '')
|
name = node['label'].split(' (')[0].replace('*', '')
|
||||||
if not name in cache or (cache[name] == False and random.random() < 0.05):
|
if not name in cache or (cache[name] == False and random.random() < 0.05):
|
||||||
@ -906,6 +958,7 @@ def addImageToNode(node, cache, shape='circularImage'):
|
|||||||
node['image'] = img
|
node['image'] = img
|
||||||
node['shape'] = shape
|
node['shape'] = shape
|
||||||
|
|
||||||
|
|
||||||
def addImagesToNodes(G):
|
def addImagesToNodes(G):
|
||||||
try:
|
try:
|
||||||
with open('.imgLinkCache.json', 'r') as cf:
|
with open('.imgLinkCache.json', 'r') as cf:
|
||||||
@ -915,10 +968,12 @@ def addImagesToNodes(G):
|
|||||||
for n in list(G.nodes):
|
for n in list(G.nodes):
|
||||||
node = G.nodes[n]
|
node = G.nodes[n]
|
||||||
if node['t'] in ['recommender', 'author']:
|
if node['t'] in ['recommender', 'author']:
|
||||||
addImageToNode(node, cache, ['circularImage','image'][node['t']=='author'])
|
addImageToNode(
|
||||||
|
node, cache, ['circularImage', 'image'][node['t'] == 'author'])
|
||||||
with open('.imgLinkCache.json', 'w') as cf:
|
with open('.imgLinkCache.json', 'w') as cf:
|
||||||
cf.write(json.dumps(cache))
|
cf.write(json.dumps(cache))
|
||||||
|
|
||||||
|
|
||||||
def recommendNBooksRecommenderBased(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True):
|
def recommendNBooksRecommenderBased(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True):
|
||||||
removeRestOfSeries(G)
|
removeRestOfSeries(G)
|
||||||
removeBad(G, mu-std*2-1)
|
removeBad(G, mu-std*2-1)
|
||||||
@ -976,6 +1031,7 @@ def recommendNBooksTagBased(G, mu, std, n, removeTopListsB=True):
|
|||||||
scaleOpinionsByRating(G)
|
scaleOpinionsByRating(G)
|
||||||
addScoreToLabels(G)
|
addScoreToLabels(G)
|
||||||
|
|
||||||
|
|
||||||
def recommendNBooks(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True, v3d=False):
|
def recommendNBooks(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True, v3d=False):
|
||||||
removeRestOfSeries(G)
|
removeRestOfSeries(G)
|
||||||
removeBad(G, mu-std-0.5)
|
removeBad(G, mu-std-0.5)
|
||||||
@ -1035,6 +1091,7 @@ def fullGraph(G, removeTopListsB=True):
|
|||||||
scaleOpinionsByRating(G)
|
scaleOpinionsByRating(G)
|
||||||
addScoreToLabels(G)
|
addScoreToLabels(G)
|
||||||
|
|
||||||
|
|
||||||
def recommenderCompetence(G):
|
def recommenderCompetence(G):
|
||||||
# removeRead(G)
|
# removeRead(G)
|
||||||
removeUnread(G)
|
removeUnread(G)
|
||||||
@ -1060,6 +1117,7 @@ def recommenderCompetence(G):
|
|||||||
node['score'] = 0
|
node['score'] = 0
|
||||||
node['score'] /= 2
|
node['score'] /= 2
|
||||||
|
|
||||||
|
|
||||||
def readBooksAnalysis(G, minRating=0, showAllTags=True, removeUnconnected=False, removeTopListsB=True):
|
def readBooksAnalysis(G, minRating=0, showAllTags=True, removeUnconnected=False, removeTopListsB=True):
|
||||||
removeUnread(G)
|
removeUnread(G)
|
||||||
removeBad(G, minRating)
|
removeBad(G, minRating)
|
||||||
@ -1075,6 +1133,7 @@ def readBooksAnalysis(G, minRating=0, showAllTags=True, removeUnconnected=False,
|
|||||||
scaleOpinionsByRating(G)
|
scaleOpinionsByRating(G)
|
||||||
addScoreToLabels(G)
|
addScoreToLabels(G)
|
||||||
|
|
||||||
|
|
||||||
def progress(G, books, mu, minimum=3.5):
|
def progress(G, books, mu, minimum=3.5):
|
||||||
findNewBooks(G, books, mu, -1, minRecSco=minimum)
|
findNewBooks(G, books, mu, -1, minRecSco=minimum)
|
||||||
bookCount = 0
|
bookCount = 0
|
||||||
@ -1138,6 +1197,7 @@ def analyze(G, books, mu, type_name, name, dist=2.1):
|
|||||||
addScoreToLabels(G)
|
addScoreToLabels(G)
|
||||||
match['label'] = "*"+match['label']+"*"
|
match['label'] = "*"+match['label']+"*"
|
||||||
|
|
||||||
|
|
||||||
def waveFlow(G, node, n, dist, menge, firstEdge=False):
|
def waveFlow(G, node, n, dist, menge, firstEdge=False):
|
||||||
if dist <= 0:
|
if dist <= 0:
|
||||||
return
|
return
|
||||||
@ -1167,7 +1227,8 @@ def waveFlow(G, node, n, dist, menge, firstEdge=False):
|
|||||||
book['score'] = 0
|
book['score'] = 0
|
||||||
bestlist.append(book)
|
bestlist.append(book)
|
||||||
bestlist.sort(key=lambda node: node['score'], reverse=True)
|
bestlist.sort(key=lambda node: node['score'], reverse=True)
|
||||||
toKeep = min(int(dist*10), math.ceil(len(bestlist) * dist - len(keeplist)*0.5))
|
toKeep = min(int(dist*10), math.ceil(len(bestlist)
|
||||||
|
* dist - len(keeplist)*0.5))
|
||||||
if toKeep <= 0:
|
if toKeep <= 0:
|
||||||
keeplist.sort(key=lambda node: node['rating'], reverse=True)
|
keeplist.sort(key=lambda node: node['rating'], reverse=True)
|
||||||
keeplist = keeplist[:min(int(dist*10), int(len(keeplist) * dist))]
|
keeplist = keeplist[:min(int(dist*10), int(len(keeplist) * dist))]
|
||||||
@ -1180,6 +1241,7 @@ def waveFlow(G, node, n, dist, menge, firstEdge=False):
|
|||||||
if node in bestlist or node in keeplist:
|
if node in bestlist or node in keeplist:
|
||||||
waveFlow(G, node, m, dist, menge, firstEdge=firstEdge)
|
waveFlow(G, node, m, dist, menge, firstEdge=firstEdge)
|
||||||
|
|
||||||
|
|
||||||
def gensimTokensForLines(lines):
|
def gensimTokensForLines(lines):
|
||||||
for i, line in enumerate(lines):
|
for i, line in enumerate(lines):
|
||||||
tokens = gensim.utils.simple_preprocess(line)
|
tokens = gensim.utils.simple_preprocess(line)
|
||||||
@ -1189,6 +1251,7 @@ def gensimTokensForLines(lines):
|
|||||||
# For training data, add tags
|
# For training data, add tags
|
||||||
yield gensim.models.doc2vec.TaggedDocument(tokens, [i])
|
yield gensim.models.doc2vec.TaggedDocument(tokens, [i])
|
||||||
|
|
||||||
|
|
||||||
def buildDoc2Vec(books):
|
def buildDoc2Vec(books):
|
||||||
import gensim
|
import gensim
|
||||||
for n in list(G.nodes):
|
for n in list(G.nodes):
|
||||||
@ -1197,10 +1260,12 @@ def buildDoc2Vec(books):
|
|||||||
pass
|
pass
|
||||||
gensimTokensForLines(lines)
|
gensimTokensForLines(lines)
|
||||||
|
|
||||||
|
|
||||||
def shell(G, books, mu, std):
|
def shell(G, books, mu, std):
|
||||||
from ptpython.repl import embed
|
from ptpython.repl import embed
|
||||||
embed(globals(), locals())
|
embed(globals(), locals())
|
||||||
|
|
||||||
|
|
||||||
def newBooks(G, books, num, mu, std):
|
def newBooks(G, books, num, mu, std):
|
||||||
removeBad(G, mu-std*2)
|
removeBad(G, mu-std*2)
|
||||||
findNewBooks(G, books, mu, num, minRecSco=mu-std)
|
findNewBooks(G, books, mu, num, minRecSco=mu-std)
|
||||||
@ -1226,21 +1291,29 @@ def findNewBooks(G, books, mu, num=-1, minRecSco=5):
|
|||||||
if node['t'] == 'recommender' and 'score' in node:
|
if node['t'] == 'recommender' and 'score' in node:
|
||||||
oldBooks = []
|
oldBooks = []
|
||||||
newBooks = []
|
newBooks = []
|
||||||
recBooks = mrbdf[mrbdf['recommender'].str.contains(node['label'])].to_dict(orient='records')
|
recBooks = mrbdf[mrbdf['recommender'].str.contains(
|
||||||
|
node['label'])].to_dict(orient='records')
|
||||||
for book in recBooks:
|
for book in recBooks:
|
||||||
if book['title'] in [b['title'] for b in books]:
|
if book['title'] in [b['title'] for b in books]:
|
||||||
oldBooks.append({'title': book['title'], 'author': book['author']})
|
oldBooks.append(
|
||||||
|
{'title': book['title'], 'author': book['author']})
|
||||||
else:
|
else:
|
||||||
newBooks.append({'title': book['title'], 'author': book['author']})
|
newBooks.append(
|
||||||
recs.append({'name': node['label'], 'rec': node, 'newBooks': newBooks, 'oldBooks': oldBooks})
|
{'title': book['title'], 'author': book['author']})
|
||||||
|
recs.append({'name': node['label'], 'rec': node,
|
||||||
|
'newBooks': newBooks, 'oldBooks': oldBooks})
|
||||||
for rec in recs:
|
for rec in recs:
|
||||||
for book in rec['newBooks']:
|
for book in rec['newBooks']:
|
||||||
G.add_node('n/'+book['title'], color='blue', t='newBook', label=book['title'], author=book['author'])
|
G.add_node('n/'+book['title'], color='blue', t='newBook',
|
||||||
|
label=book['title'], author=book['author'])
|
||||||
|
|
||||||
G.add_node('r/'+rec['rec']['label'], color='orange', t='recommender', label=rec['rec']['label'], score=rec['rec']['score'])
|
G.add_node('r/'+rec['rec']['label'], color='orange', t='recommender',
|
||||||
G.add_edge('r/'+rec['rec']['label'], 'n/'+book['title'], color='blue')
|
label=rec['rec']['label'], score=rec['rec']['score'])
|
||||||
|
G.add_edge('r/'+rec['rec']['label'], 'n/' +
|
||||||
|
book['title'], color='blue')
|
||||||
|
|
||||||
G.add_node('a/'+book['author'], color='green', t='author', label=book['author'])
|
G.add_node('a/'+book['author'], color='green',
|
||||||
|
t='author', label=book['author'])
|
||||||
G.add_edge('a/'+book['author'], 'n/'+book['title'], color='blue')
|
G.add_edge('a/'+book['author'], 'n/'+book['title'], color='blue')
|
||||||
for n in list(G.nodes):
|
for n in list(G.nodes):
|
||||||
node = G.nodes[n]
|
node = G.nodes[n]
|
||||||
@ -1257,12 +1330,16 @@ def findNewBooks(G, books, mu, num=-1, minRecSco=5):
|
|||||||
else:
|
else:
|
||||||
ses.append(min(ses))
|
ses.append(min(ses))
|
||||||
scores.append(mu)
|
scores.append(mu)
|
||||||
node['fake_se'] = sum(ses)/(len(ses)**1.2) + 0.5 + 0.5 * (len(scores)==2) # This is not how SE works. DILLIGAF?
|
# This is not how SE works. DILLIGAF?
|
||||||
node['score'] = sum(scores)/len(scores)*1.2 - node['fake_se']*1.6 + 0.5 - 0.1/math.sqrt(len(scores))
|
node['fake_se'] = sum(ses)/(len(ses)**1.2) + \
|
||||||
|
0.5 + 0.5 * (len(scores) == 2)
|
||||||
|
node['score'] = sum(
|
||||||
|
scores)/len(scores)*1.2 - node['fake_se']*1.6 + 0.5 - 0.1/math.sqrt(len(scores))
|
||||||
if len(scores) == 2:
|
if len(scores) == 2:
|
||||||
node['score'] *= 0.80
|
node['score'] *= 0.80
|
||||||
node['value'] = 20 + 5 * float(node['score'])
|
node['value'] = 20 + 5 * float(node['score'])
|
||||||
node['label'] += " ({:.2f}±{:.1f})".format(node['score'], node['fake_se'])
|
node['label'] += " ({:.2f}±{:.1f})".format(node['score'],
|
||||||
|
node['fake_se'])
|
||||||
node['label'] += '\n ' + node['author']
|
node['label'] += '\n ' + node['author']
|
||||||
if num != -1:
|
if num != -1:
|
||||||
removeKeepBest(G, num, 10, 'newBook')
|
removeKeepBest(G, num, 10, 'newBook')
|
||||||
@ -1270,6 +1347,8 @@ def findNewBooks(G, books, mu, num=-1, minRecSco=5):
|
|||||||
# while batchSize is implemented, we only get a good gonvergence when we disable it (batchSize=-1)
|
# while batchSize is implemented, we only get a good gonvergence when we disable it (batchSize=-1)
|
||||||
# but might be necessary to enable later for a larger libary for better training performance...
|
# but might be necessary to enable later for a larger libary for better training performance...
|
||||||
# maybe try again for 128 books?
|
# maybe try again for 128 books?
|
||||||
|
|
||||||
|
|
||||||
def evaluateFitness(books, batchSize=-1, debugPrint=False):
|
def evaluateFitness(books, batchSize=-1, debugPrint=False):
|
||||||
global weights
|
global weights
|
||||||
G = buildBookGraph(books)
|
G = buildBookGraph(books)
|
||||||
@ -1280,7 +1359,8 @@ def evaluateFitness(books, batchSize=-1, debugPrint=False):
|
|||||||
graphAddTags(G, books)
|
graphAddTags(G, books)
|
||||||
runPagerank(G)
|
runPagerank(G)
|
||||||
|
|
||||||
ratedBooks = [n for n in list(G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None]
|
ratedBooks = [n for n in list(
|
||||||
|
G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None]
|
||||||
boundsLoss = 0
|
boundsLoss = 0
|
||||||
linSepLoss = []
|
linSepLoss = []
|
||||||
errSq = []
|
errSq = []
|
||||||
@ -1289,7 +1369,8 @@ def evaluateFitness(books, batchSize=-1, debugPrint=False):
|
|||||||
gradient[wt] = 0
|
gradient[wt] = 0
|
||||||
mu, sigma = genScores(G, books)
|
mu, sigma = genScores(G, books)
|
||||||
for b in G.nodes:
|
for b in G.nodes:
|
||||||
batch = random.sample(ratedBooks, batchSize) if batchSize!=-1 and len(ratedBooks) > batchSize else ratedBooks
|
batch = random.sample(ratedBooks, batchSize) if batchSize != - \
|
||||||
|
1 and len(ratedBooks) > batchSize else ratedBooks
|
||||||
if b in batch:
|
if b in batch:
|
||||||
rating = G.nodes[b]['rating']
|
rating = G.nodes[b]['rating']
|
||||||
G.nodes[b]['rating'] = None
|
G.nodes[b]['rating'] = None
|
||||||
@ -1300,9 +1381,12 @@ def evaluateFitness(books, batchSize=-1, debugPrint=False):
|
|||||||
errSq.append((rating - G.nodes[b]['score'])**2)
|
errSq.append((rating - G.nodes[b]['score'])**2)
|
||||||
G.nodes[b]['rating'] = rating
|
G.nodes[b]['rating'] = rating
|
||||||
for wt in weights:
|
for wt in weights:
|
||||||
scoreB = sum([a*(1.001 if wt==w[0] else 1)*weights[w[0]]*(w[1] if len(w)>1 else 1) for a,w in zip(G.nodes[b]['_act'], G.nodes[b]['_wgh'])])/sum([(1.001 if wt==w[0] else 1)*weights[w[0]]*(w[1] if len(w)>1 else 1) for w in G.nodes[b]['_wgh']])
|
scoreB = sum([a*(1.001 if wt == w[0] else 1)*weights[w[0]]*(w[1] if len(w) > 1 else 1) for a, w in zip(G.nodes[b]['_act'],
|
||||||
gradient[wt] += ((rating - G.nodes[b]['score'])**2 - (rating - scoreB)**2)*1000
|
G.nodes[b]['_wgh'])])/sum([(1.001 if wt == w[0] else 1)*weights[w[0]]*(w[1] if len(w) > 1 else 1) for w in G.nodes[b]['_wgh']])
|
||||||
regressionLoss = sum([max(0,abs(w)-1)**2 for w in weights.values()]) # no punishment if w within -1 and 1
|
gradient[wt] += ((rating - G.nodes[b]['score'])
|
||||||
|
** 2 - (rating - scoreB)**2)*1000
|
||||||
|
# no punishment if w within -1 and 1
|
||||||
|
regressionLoss = sum([max(0, abs(w)-1)**2 for w in weights.values()])
|
||||||
for wt in weights:
|
for wt in weights:
|
||||||
if abs(weights[wt]) > 1.0:
|
if abs(weights[wt]) > 1.0:
|
||||||
gradient[wt] -= weights[wt]*10
|
gradient[wt] -= weights[wt]*10
|
||||||
@ -1315,6 +1399,55 @@ def evaluateFitness(books, batchSize=-1, debugPrint=False):
|
|||||||
fit = sum(errSq)/len(errSq) + 0.001*regressionLoss
|
fit = sum(errSq)/len(errSq) + 0.001*regressionLoss
|
||||||
return fit, gradient
|
return fit, gradient
|
||||||
|
|
||||||
|
|
||||||
|
def calcDissonance(books):
|
||||||
|
global weights
|
||||||
|
G = buildBookGraph(books)
|
||||||
|
graphAddAuthors(G, books)
|
||||||
|
graphAddRecommenders(G, books)
|
||||||
|
graphAddTopLists(G, books)
|
||||||
|
graphAddSeries(G, books)
|
||||||
|
graphAddTags(G, books)
|
||||||
|
runPagerank(G)
|
||||||
|
|
||||||
|
ratedBooks = [n for n in list(
|
||||||
|
G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None]
|
||||||
|
errSq = []
|
||||||
|
gradient = {}
|
||||||
|
for wt in weights:
|
||||||
|
gradient[wt] = 0
|
||||||
|
mu, sigma = genScores(G, books)
|
||||||
|
for b in G.nodes:
|
||||||
|
batch = ratedBooks
|
||||||
|
if b in batch:
|
||||||
|
rating = G.nodes[b]['rating']
|
||||||
|
G.nodes[b]['rating'] = None
|
||||||
|
_, _ = genScores(G, books, calcPagerank=False)
|
||||||
|
G.nodes[b]['_test_score'] = G.nodes[b]['score']
|
||||||
|
G.nodes[b]['rating'] = rating
|
||||||
|
G.nodes[b]['dissonance_off'] = rating - G.nodes[b]['score']
|
||||||
|
G.nodes[b]['dissonance_abs'] = abs(rating - G.nodes[b]['score'])
|
||||||
|
|
||||||
|
return G
|
||||||
|
|
||||||
|
|
||||||
|
def describeDissonance(books, num=-1, sortKey='dissonance_abs', sortDir=True):
|
||||||
|
bestlist = []
|
||||||
|
G = calcDissonance(books)
|
||||||
|
for n in list(G.nodes):
|
||||||
|
node = G.nodes[n]
|
||||||
|
if'dissonance_abs' in node:
|
||||||
|
bestlist.append(node)
|
||||||
|
bestlist.sort(key=lambda node: node[sortKey], reverse=sortDir)
|
||||||
|
for i, book in enumerate(bestlist):
|
||||||
|
line = book['title'] + " ("+" & ".join(book['authors'])+")" + \
|
||||||
|
": You: {:.5f}, AI: {:.5f}, Delta: {:.5f}".format(
|
||||||
|
book['rating'], book['_test_score'], book['dissonance_off'])
|
||||||
|
print("["+str(i+1).zfill(int((math.log10(num) if num != -1 else 3)+1))+"] "+line)
|
||||||
|
if num != -1 and i == num-1:
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
def train(initGamma, full=True):
|
def train(initGamma, full=True):
|
||||||
global weights
|
global weights
|
||||||
if full:
|
if full:
|
||||||
@ -1367,18 +1500,22 @@ def train(initGamma, full=True):
|
|||||||
break
|
break
|
||||||
print('Done.')
|
print('Done.')
|
||||||
|
|
||||||
|
|
||||||
def saveWeights(weights):
|
def saveWeights(weights):
|
||||||
with open('neuralWeights.json', 'w') as f:
|
with open('neuralWeights.json', 'w') as f:
|
||||||
f.write(json.dumps(weights))
|
f.write(json.dumps(weights))
|
||||||
|
|
||||||
|
|
||||||
def loadWeights():
|
def loadWeights():
|
||||||
try:
|
try:
|
||||||
with open('neuralWeights.json', 'r') as f:
|
with open('neuralWeights.json', 'r') as f:
|
||||||
weights = json.loads(f.read())
|
weights = json.loads(f.read())
|
||||||
except IOError:
|
except IOError:
|
||||||
weights = {"topList": 0.15, "recommender": 0.30, "author": 0.70, "series": 0.05, "tag": 0.05, "pagerank": 0.05, "mu": 0.50, "sigma": 0.30, "bias": 0.25, "median": 0.10} #, "tgb_rank": 0.10}
|
weights = {"topList": 0.15, "recommender": 0.30, "author": 0.70, "series": 0.05, "tag": 0.05,
|
||||||
|
"pagerank": 0.05, "mu": 0.50, "sigma": 0.30, "bias": 0.25, "median": 0.10} # , "tgb_rank": 0.10}
|
||||||
return weights
|
return weights
|
||||||
|
|
||||||
|
|
||||||
def cliInterface(imgDef=False):
|
def cliInterface(imgDef=False):
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
@ -1403,13 +1540,16 @@ def cliInterface(imgDef=False):
|
|||||||
cmds = parser.add_subparsers(required=True, dest='cmd')
|
cmds = parser.add_subparsers(required=True, dest='cmd')
|
||||||
|
|
||||||
p_rec = cmds.add_parser('recommend', description="TODO", aliases=['rec'])
|
p_rec = cmds.add_parser('recommend', description="TODO", aliases=['rec'])
|
||||||
p_rec.add_argument('-n', type=int, default=20, help='number of books to recommend')
|
p_rec.add_argument('-n', type=int, default=20,
|
||||||
|
help='number of books to recommend')
|
||||||
p_rec.add_argument('--tag-based', action="store_true")
|
p_rec.add_argument('--tag-based', action="store_true")
|
||||||
p_rec.add_argument('--recommender-based', action="store_true")
|
p_rec.add_argument('--recommender-based', action="store_true")
|
||||||
p_rec.add_argument('--new', type=int, default=-1, help='number of new books to recommend')
|
p_rec.add_argument('--new', type=int, default=-1,
|
||||||
|
help='number of new books to recommend')
|
||||||
|
|
||||||
p_rec = cmds.add_parser('listScores', description="TODO", aliases=['ls'])
|
p_rec = cmds.add_parser('listScores', description="TODO", aliases=['ls'])
|
||||||
p_rec.add_argument('-n', type=int, default=50, help='number of books to recommend')
|
p_rec.add_argument('-n', type=int, default=50,
|
||||||
|
help='number of books to recommend')
|
||||||
|
|
||||||
p_read = cmds.add_parser('read', description="TODO", aliases=[])
|
p_read = cmds.add_parser('read', description="TODO", aliases=[])
|
||||||
p_read.add_argument('--min-rating', type=int, default=0)
|
p_read.add_argument('--min-rating', type=int, default=0)
|
||||||
@ -1417,27 +1557,40 @@ def cliInterface(imgDef=False):
|
|||||||
p_read.add_argument('--only-connected', action="store_true")
|
p_read.add_argument('--only-connected', action="store_true")
|
||||||
|
|
||||||
p_show = cmds.add_parser('analyze', description="TODO", aliases=[])
|
p_show = cmds.add_parser('analyze', description="TODO", aliases=[])
|
||||||
p_show.add_argument('type', choices=['any', 'book', 'recommender', 'author', 'series', 'tag'])
|
p_show.add_argument(
|
||||||
|
'type', choices=['any', 'book', 'recommender', 'author', 'series', 'tag'])
|
||||||
p_show.add_argument('name', type=str)
|
p_show.add_argument('name', type=str)
|
||||||
p_show.add_argument('-d', type=float, default=2.1, help='depth of expansion')
|
p_show.add_argument('-d', type=float, default=2.1,
|
||||||
|
help='depth of expansion')
|
||||||
|
|
||||||
p_train = cmds.add_parser('train', description="TODO", aliases=[])
|
p_train = cmds.add_parser('train', description="TODO", aliases=[])
|
||||||
p_train.add_argument('-g', type=float, default=0.2, help='learning rate gamma')
|
p_train.add_argument('-g', type=float, default=0.2,
|
||||||
|
help='learning rate gamma')
|
||||||
p_train.add_argument('--full', action="store_true")
|
p_train.add_argument('--full', action="store_true")
|
||||||
|
|
||||||
p_prog = cmds.add_parser('progress', description="TODO", aliases=[])
|
p_prog = cmds.add_parser('progress', description="TODO", aliases=[])
|
||||||
p_prog.add_argument('-m', type=float, default=7, help='Mimimum Score to read')
|
p_prog.add_argument('-m', type=float, default=7,
|
||||||
|
help='Mimimum Score to read')
|
||||||
|
|
||||||
p_comp = cmds.add_parser('competence', description="TODO", aliases=[])
|
p_comp = cmds.add_parser('competence', description="TODO", aliases=[])
|
||||||
|
|
||||||
p_shell = cmds.add_parser('shell', description="TODO", aliases=[])
|
p_shell = cmds.add_parser('shell', description="TODO", aliases=[])
|
||||||
|
|
||||||
p_new = cmds.add_parser('newBooks', description="TODO", aliases=[])
|
p_new = cmds.add_parser('newBooks', description="TODO", aliases=[])
|
||||||
p_new.add_argument('-n', type=int, default=10, help='number of books to recommend')
|
p_new.add_argument('-n', type=int, default=10,
|
||||||
|
help='number of books to recommend')
|
||||||
|
|
||||||
p_col = cmds.add_parser('calice', description="TODO", aliases=[])
|
p_cal = cmds.add_parser('calice', description="TODO", aliases=[])
|
||||||
|
|
||||||
p_createCol = cmds.add_parser('createCaliceColumn', description="TODO", aliases=[])
|
p_dis = cmds.add_parser('dissonance', description="TODO", aliases=['dis'])
|
||||||
|
p_dis.add_argument('-n', type=int, default=-1,
|
||||||
|
help='Maximum number of books to lost')
|
||||||
|
p_dis.add_argument(
|
||||||
|
'--sort', choices=['dissonance_abs', 'dissonance_off', 'score'], default='dissonance_abs', const='dissonance_abs', nargs='?')
|
||||||
|
p_dis.add_argument('--reversed', action="store_true")
|
||||||
|
|
||||||
|
p_createCol = cmds.add_parser(
|
||||||
|
'createCaliceColumn', description="TODO", aliases=[])
|
||||||
p_createCol.add_argument('type', choices=['score', 'rating', 'both'])
|
p_createCol.add_argument('type', choices=['score', 'rating', 'both'])
|
||||||
|
|
||||||
p_full = cmds.add_parser('full', description="TODO", aliases=[])
|
p_full = cmds.add_parser('full', description="TODO", aliases=[])
|
||||||
@ -1452,6 +1605,7 @@ def cliInterface(imgDef=False):
|
|||||||
else:
|
else:
|
||||||
mainCLI(args)
|
mainCLI(args)
|
||||||
|
|
||||||
|
|
||||||
def perfTestCLI(args):
|
def perfTestCLI(args):
|
||||||
import time
|
import time
|
||||||
from pycallgraph import PyCallGraph
|
from pycallgraph import PyCallGraph
|
||||||
@ -1466,6 +1620,7 @@ def perfTestCLI(args):
|
|||||||
with PyCallGraph(output=GraphvizOutput(output_file='perfTests/' + str(int(time.time())) + '.png'), config=config):
|
with PyCallGraph(output=GraphvizOutput(output_file='perfTests/' + str(int(time.time())) + '.png'), config=config):
|
||||||
mainCLI(args)
|
mainCLI(args)
|
||||||
|
|
||||||
|
|
||||||
def mainCLI(args):
|
def mainCLI(args):
|
||||||
if args.cmd == "train":
|
if args.cmd == "train":
|
||||||
train(args.g, args.full)
|
train(args.g, args.full)
|
||||||
@ -1482,7 +1637,6 @@ def mainCLI(args):
|
|||||||
if not args.keep_whitepapers:
|
if not args.keep_whitepapers:
|
||||||
removeWhitepapers(G)
|
removeWhitepapers(G)
|
||||||
|
|
||||||
|
|
||||||
if args.cmd == "recommend":
|
if args.cmd == "recommend":
|
||||||
if args.new == -1:
|
if args.new == -1:
|
||||||
args.new = int(args.n / 5)
|
args.new = int(args.n / 5)
|
||||||
@ -1490,16 +1644,21 @@ def mainCLI(args):
|
|||||||
findNewBooks(G, books, mu, args.new, minRecSco=mu-std)
|
findNewBooks(G, books, mu, args.new, minRecSco=mu-std)
|
||||||
if args.tag_based:
|
if args.tag_based:
|
||||||
if args.recommender_based:
|
if args.recommender_based:
|
||||||
raise Exception('tag-based and recommender-based can not be be combined')
|
raise Exception(
|
||||||
recommendNBooksTagBased(G, mu, std, args.n, not args.keep_top_lists)
|
'tag-based and recommender-based can not be be combined')
|
||||||
|
recommendNBooksTagBased(
|
||||||
|
G, mu, std, args.n, not args.keep_top_lists)
|
||||||
elif args.recommender_based:
|
elif args.recommender_based:
|
||||||
recommendNBooksRecommenderBased(G, mu, std, args.n, not args.keep_top_lists, not args.keep_useless_recommenders)
|
recommendNBooksRecommenderBased(
|
||||||
|
G, mu, std, args.n, not args.keep_top_lists, not args.keep_useless_recommenders)
|
||||||
else:
|
else:
|
||||||
recommendNBooks(G, mu, std, args.n, not args.keep_top_lists, not args.keep_useless_recommenders, args.v3d)
|
recommendNBooks(G, mu, std, args.n, not args.keep_top_lists,
|
||||||
|
not args.keep_useless_recommenders, args.v3d)
|
||||||
elif args.cmd == "listScores":
|
elif args.cmd == "listScores":
|
||||||
listScores(G, mu, std, args.n)
|
listScores(G, mu, std, args.n)
|
||||||
elif args.cmd == "read":
|
elif args.cmd == "read":
|
||||||
readBooksAnalysis(G, args.min_rating, args.all_tags, args.only_connected, not args.keep_top_lists)
|
readBooksAnalysis(G, args.min_rating, args.all_tags,
|
||||||
|
args.only_connected, not args.keep_top_lists)
|
||||||
elif args.cmd == "analyze":
|
elif args.cmd == "analyze":
|
||||||
analyze(G, books, mu, args.type, args.name, args.d)
|
analyze(G, books, mu, args.type, args.name, args.d)
|
||||||
elif args.cmd == "full":
|
elif args.cmd == "full":
|
||||||
@ -1518,6 +1677,9 @@ def mainCLI(args):
|
|||||||
elif args.cmd == "calice":
|
elif args.cmd == "calice":
|
||||||
calice(G)
|
calice(G)
|
||||||
exit()
|
exit()
|
||||||
|
elif args.cmd == "dissonance":
|
||||||
|
describeDissonance(books, args.n, args.sort, not args.reversed)
|
||||||
|
exit()
|
||||||
elif args.cmd == "createCaliceColumn":
|
elif args.cmd == "createCaliceColumn":
|
||||||
if args.type in ['score', 'both']:
|
if args.type in ['score', 'both']:
|
||||||
calibreDB.createCaliceScoreColumn()
|
calibreDB.createCaliceScoreColumn()
|
||||||
@ -1525,12 +1687,12 @@ def mainCLI(args):
|
|||||||
if args.type in ['rating', 'both']:
|
if args.type in ['rating', 'both']:
|
||||||
calibreDB.createCaliceRatingColumn()
|
calibreDB.createCaliceRatingColumn()
|
||||||
print('[*] Column "Calice Rating" was created.')
|
print('[*] Column "Calice Rating" was created.')
|
||||||
print('[i] To allow displaying half-stars, please active them manually in the calibre-settings.')
|
print(
|
||||||
|
'[i] To allow displaying half-stars, please active them manually in the calibre-settings.')
|
||||||
exit()
|
exit()
|
||||||
else:
|
else:
|
||||||
raise Exception("Bad")
|
raise Exception("Bad")
|
||||||
|
|
||||||
|
|
||||||
if not args.keep_priv:
|
if not args.keep_priv:
|
||||||
removePriv(G)
|
removePriv(G)
|
||||||
if args.remove_read:
|
if args.remove_read:
|
||||||
|
Loading…
Reference in New Issue
Block a user