initial commit
This commit is contained in:
commit
6dabc68f3f
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
__pycache__
|
||||
*.html
|
||||
.venv
|
498
main.py
Normal file
498
main.py
Normal file
@ -0,0 +1,498 @@
|
||||
import os
|
||||
import json
|
||||
import math
|
||||
import random
|
||||
|
||||
import numpy as np
|
||||
from scipy.stats import norm
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import networkx as nx
|
||||
from pyvis.network import Network
|
||||
|
||||
|
||||
def getAllAuthors(books):
|
||||
authors = set()
|
||||
for book in books:
|
||||
for author in getAuthors(book):
|
||||
authors.add(author)
|
||||
return list(authors)
|
||||
|
||||
|
||||
def getAuthors(book):
|
||||
return book['authors'].split(' & ')
|
||||
|
||||
|
||||
def getRecommenders(book):
|
||||
for tag in book['tags']:
|
||||
if tag.find(" Recommendation") != -1:
|
||||
yield tag.replace(" Recommendation", "")
|
||||
|
||||
|
||||
def getTags(book):
|
||||
for tag in book['tags']:
|
||||
if tag.find(" Recommendation") == -1 and tag.find(" Top ") == -1:
|
||||
yield tag
|
||||
|
||||
|
||||
def getAllRecommenders(books):
|
||||
recs = set()
|
||||
for book in books:
|
||||
for rec in getRecommenders(book):
|
||||
recs.add(rec)
|
||||
return list(recs)
|
||||
|
||||
|
||||
def getTopLists(book):
|
||||
lists = set()
|
||||
for tag in book['tags']:
|
||||
if tag.find(" Top ") != -1:
|
||||
lists.add(tag.split(" Top ")[0])
|
||||
return list(lists)
|
||||
|
||||
|
||||
def getAllTopLists(books):
|
||||
tops = set()
|
||||
for book in books:
|
||||
for top in getTopLists(book):
|
||||
tops.add(top)
|
||||
return list(tops)
|
||||
|
||||
|
||||
def getAllSeries(books):
|
||||
series = set()
|
||||
for book in books:
|
||||
if 'series' in book:
|
||||
series.add(book['series'])
|
||||
return list(series)
|
||||
|
||||
|
||||
def getAllTags(books):
|
||||
tags = set()
|
||||
for book in books:
|
||||
for tag in getTags(book):
|
||||
tags.add(tag)
|
||||
return list(tags)
|
||||
|
||||
|
||||
def getTopListWheight(book, topList):
|
||||
minScope = 100000
|
||||
for tag in book['tags']:
|
||||
if tag.find(topList+" Top ") != -1:
|
||||
scope = int(tag.split(" Top ")[1])
|
||||
minScope = min(minScope, scope)
|
||||
if minScope == 100000:
|
||||
raise Exception("You stupid?")
|
||||
return 100/minScope
|
||||
|
||||
|
||||
def removeRead(G):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
if node['t'] == 'book':
|
||||
if node['rating'] != None:
|
||||
G.remove_node(n)
|
||||
|
||||
|
||||
def removeUnread(G):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
if node['t'] == 'book':
|
||||
if node['rating'] == None:
|
||||
G.remove_node(n)
|
||||
|
||||
|
||||
def removePriv(G):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
if node['t'] == 'book':
|
||||
if 'priv' in node['tags']:
|
||||
G.remove_node(n)
|
||||
|
||||
|
||||
def removeDangling(G, alsoBooks=False):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
if node['t'] != 'book' or alsoBooks:
|
||||
if not len(G.adj[n]):
|
||||
G.remove_node(n)
|
||||
|
||||
|
||||
def removeEdge(G):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
if node['t'] != 'book':
|
||||
if len(G.adj[n]) < 2:
|
||||
G.remove_node(n)
|
||||
|
||||
|
||||
def removeBad(G, threshold, groups=['book', 'topList', 'recommender', 'author', 'series', 'tag']):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
if node['t'] in groups:
|
||||
if 'score' in node and (node['score'] == None or node['score'] < threshold):
|
||||
G.remove_node(n)
|
||||
|
||||
|
||||
def removeKeepBest(G, num, maxDistForRead=1):
|
||||
bestlist = []
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
if node['t'] == 'book':
|
||||
if 'score' in node and node['score'] != None:
|
||||
bestlist.append(node)
|
||||
bestlist.sort(key=lambda node: node['score'], reverse=True)
|
||||
bestlist = bestlist[:num]
|
||||
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
if node['t'] == 'book' and node not in bestlist or 'score' in node and node['score'] == None:
|
||||
if not 'rating' in node or node['rating'] == None or node['rating'] < bestlist[-1]['score']-maxDistForRead:
|
||||
G.remove_node(n)
|
||||
|
||||
|
||||
def removeTags(G):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
if node['t'] == 'tag':
|
||||
G.remove_node(n)
|
||||
|
||||
|
||||
def pruneTags(G, minCons=2):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
if node['t'] == 'tag':
|
||||
foundCon = 0
|
||||
for book in G.adj[n]:
|
||||
for con in G.adj[book]:
|
||||
if G.nodes[con]['t'] not in ['tag', 'topList']:
|
||||
foundCon += 1
|
||||
if foundCon > minCons:
|
||||
G.remove_node(n)
|
||||
|
||||
|
||||
def removeHighSpanTags(G, maxCons=5):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
if node['t'] == 'tag':
|
||||
if len(G.adj[n]) > maxCons:
|
||||
G.remove_node(n)
|
||||
|
||||
|
||||
def removeTopLists(G):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
if node['t'] == 'topList':
|
||||
G.remove_node(n)
|
||||
|
||||
|
||||
def removeRestOfSeries(G):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
if node['t'] == 'series':
|
||||
seriesState = 0
|
||||
for adj in G.adj[n]:
|
||||
adjNode = G.nodes[adj]
|
||||
if adjNode['rating'] != None:
|
||||
seriesState = max(seriesState, int(
|
||||
adjNode['series_index']))
|
||||
for adj in list(G.adj[n]):
|
||||
adjNode = G.nodes[adj]
|
||||
if adjNode['series_index'] > seriesState + 1.0001:
|
||||
G.remove_node(adj)
|
||||
|
||||
|
||||
def scoreOpinions(G, globMu, globStd, errorFac=1.2):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
feedbacks = []
|
||||
if node['t'] in ['topList', 'recommender', 'author', 'series', 'tag']:
|
||||
adjacens = list(G.adj[n].keys())
|
||||
for adj in adjacens:
|
||||
adjNode = G.nodes[adj]
|
||||
if adjNode['rating'] != None:
|
||||
feedbacks.append(adjNode['rating'])
|
||||
if len(feedbacks):
|
||||
node['mean'], node['std'] = norm.fit(feedbacks)
|
||||
node['se'] = globStd / math.sqrt(len(feedbacks))
|
||||
ratio = len(feedbacks) / len(adjacens)
|
||||
node['score'] = node['mean'] - errorFac * \
|
||||
node['se']*(9/10 + (1-ratio)/10) + 0.001 * \
|
||||
(node['t'] == 'recommender')
|
||||
node['feedbacks'] = feedbacks
|
||||
else:
|
||||
node['score'] = None
|
||||
|
||||
|
||||
def scoreUnread(G, globMu, globStd, errorFac=1):
|
||||
for n in list(G.nodes):
|
||||
feedbacks = []
|
||||
deepFeedbacks = []
|
||||
tagFeedbacks = []
|
||||
node = G.nodes[n]
|
||||
if node['t'] == 'book':
|
||||
if node['rating'] == None:
|
||||
adjacens = list(G.adj[n].keys())
|
||||
for adj in adjacens:
|
||||
adjNode = G.nodes[adj]
|
||||
if 'score' in adjNode and adjNode['score'] != None:
|
||||
if adjNode['t'] != 'tag':
|
||||
feedbacks.append(adjNode['score'])
|
||||
for fb in adjNode['feedbacks']:
|
||||
deepFeedbacks.append(fb)
|
||||
else:
|
||||
tagFeedbacks.append(adjNode['score'])
|
||||
if len(feedbacks):
|
||||
node['mean'], node['std'] = norm.fit(deepFeedbacks)
|
||||
node['mean2'], node['std2'] = norm.fit(feedbacks)
|
||||
f_mean, f_std = norm.fit(feedbacks)
|
||||
node['se'] = globStd / math.sqrt(len(deepFeedbacks))
|
||||
# - errorFac*node['se']
|
||||
node['score'] = (
|
||||
(node['mean'] - errorFac*node['se'])*4 + node['mean2']*2 + (f_mean - f_std*0.25)*1)/7
|
||||
if 'series' in node:
|
||||
if node['series_index'] == 1.0:
|
||||
node['score'] += 0.000000001
|
||||
else:
|
||||
node['score'] = None
|
||||
|
||||
|
||||
def printBestList(G, num=25):
|
||||
bestlist = []
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
if node['t'] == 'book':
|
||||
if 'score' in node and node['score'] != None:
|
||||
bestlist.append(node)
|
||||
bestlist.sort(key=lambda node: node['score'], reverse=True)
|
||||
for i, book in enumerate(bestlist):
|
||||
print("["+str(i+1).zfill(int(math.log10(num)+1))+"] "+book['title'] +
|
||||
" ("+" & ".join(book['authors'])+"): {:.5f}".format(book['score']))
|
||||
if i == num-1:
|
||||
break
|
||||
|
||||
|
||||
def readColor(book):
|
||||
if 'rating' in book:
|
||||
return 'green'
|
||||
else:
|
||||
return 'gray'
|
||||
|
||||
|
||||
def loadBooksFromDB():
|
||||
return json.loads(os.popen("calibredb list --for-machine -f all").read())
|
||||
|
||||
|
||||
def buildBookGraph(books):
|
||||
G = nx.Graph()
|
||||
|
||||
# Books
|
||||
for book in books:
|
||||
if 'rating' in book:
|
||||
rating = book['rating']
|
||||
else:
|
||||
rating = None
|
||||
if 'comments' in book:
|
||||
desc = '' # book['comments']
|
||||
else:
|
||||
desc = ''
|
||||
if 'series' in book:
|
||||
series = book['series']
|
||||
series_index = book['series_index']
|
||||
else:
|
||||
series = None
|
||||
series_index = None
|
||||
G.add_node(book['id'], t='book', label=book['title'], title=book['title'], shape='image', image=book['cover'], rating=rating,
|
||||
tags=book['tags'], desc=desc, isbn=book['isbn'], files=book['formats'], authors=getAuthors(book), series=series, series_index=series_index)
|
||||
|
||||
return G
|
||||
|
||||
|
||||
def graphAddAuthors(G, books):
|
||||
for author in getAllAuthors(books):
|
||||
G.add_node('a/'+author, color='green', t='author', label=author)
|
||||
for book in books:
|
||||
for author in getAuthors(book):
|
||||
G.add_edge('a/'+author, book['id'], color=readColor(book))
|
||||
return G
|
||||
|
||||
|
||||
def graphAddRecommenders(G, books):
|
||||
for rec in getAllRecommenders(books):
|
||||
G.add_node('r/'+rec, color='orange', t='recommender', label=rec)
|
||||
for book in books:
|
||||
for rec in getRecommenders(book):
|
||||
G.add_edge('r/'+rec, book['id'], color=readColor(book))
|
||||
return G
|
||||
|
||||
|
||||
def graphAddTopLists(G, books):
|
||||
for tl in getAllTopLists(books):
|
||||
G.add_node('t/'+tl, color='yellow', t='topList', label=tl)
|
||||
for book in books:
|
||||
for top in getTopLists(book):
|
||||
G.add_edge('t/'+top, book['id'], wheight=getTopListWheight(
|
||||
book, top), color=readColor(book))
|
||||
return G
|
||||
|
||||
|
||||
def graphAddSeries(G, books):
|
||||
for series in getAllSeries(books):
|
||||
G.add_node('s/'+series, color='red', t='series', label=series)
|
||||
for book in books:
|
||||
if 'series' in book:
|
||||
G.add_edge('s/'+book['series'], book['id'], color=readColor(book))
|
||||
return G
|
||||
|
||||
|
||||
def graphAddTags(G, books):
|
||||
for tag in getAllTags(books):
|
||||
G.add_node('t/'+tag, color='gray', t='tag', label=tag)
|
||||
for book in books:
|
||||
for tag in getTags(book):
|
||||
G.add_edge('t/'+tag, book['id'], color=readColor(book))
|
||||
return G
|
||||
|
||||
|
||||
def calcRecDist(G, books):
|
||||
globRatings = []
|
||||
for book in books:
|
||||
if G.nodes[book['id']]['rating'] != None:
|
||||
globRatings.append(G.nodes[book['id']]['rating'])
|
||||
return norm.fit(globRatings)
|
||||
|
||||
|
||||
def scaleBooksByRating(G):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
if node['t'] not in []:
|
||||
if 'rating' in node and node['rating'] != None:
|
||||
node['value'] = 20 + 5 * int(node['rating'])
|
||||
else:
|
||||
if 'score' in node and node['score'] != None:
|
||||
node['value'] = 20 + 5 * int(node['score'])
|
||||
else:
|
||||
node['value'] = 15
|
||||
|
||||
|
||||
def scaleOpinionsByRating(G):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
if node['t'] in ['topList', 'recommender', 'author', 'series']:
|
||||
if 'score' in node and node['score'] != None:
|
||||
node['value'] = 20 + 5 * int(node['score'])
|
||||
else:
|
||||
node['value'] = 20
|
||||
|
||||
|
||||
def addScoreToLabels(G):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
if node['t'] not in ['tag']:
|
||||
if 'rating' in node and node['rating'] != None:
|
||||
node['label'] += " ("+str(node['rating'])+")"
|
||||
else:
|
||||
if 'score' in node and node['score'] != None:
|
||||
node['label'] += " (~{:.2f}".format(node['score'])+")"
|
||||
else:
|
||||
node['label'] += " (~0)"
|
||||
|
||||
|
||||
def genAndShowHTML(G, showButtons=False):
|
||||
net = Network('1080px', '1920px')
|
||||
if showButtons:
|
||||
net.show_buttons(filter_=['configure', 'layout',
|
||||
'interaction', 'physics', 'edges'])
|
||||
net.from_nx(G)
|
||||
net.show('nx.html')
|
||||
|
||||
|
||||
def buildFullGraph():
|
||||
books = loadBooksFromDB()
|
||||
G = buildBookGraph(books)
|
||||
|
||||
graphAddAuthors(G, books)
|
||||
graphAddRecommenders(G, books)
|
||||
graphAddTopLists(G, books)
|
||||
graphAddSeries(G, books)
|
||||
graphAddTags(G, books)
|
||||
return G, books
|
||||
|
||||
|
||||
def genScores(G, books):
|
||||
globMu, globStd = calcRecDist(G, books)
|
||||
scoreOpinions(G, globMu, globStd)
|
||||
scoreUnread(G, globMu, globStd)
|
||||
return globMu, globStd
|
||||
|
||||
|
||||
def recommendNBooks(n):
|
||||
G, books = buildFullGraph()
|
||||
mu, std = genScores(G, books)
|
||||
|
||||
removeRestOfSeries(G)
|
||||
removePriv(G)
|
||||
removeBad(G, mu-std-1.5)
|
||||
removeKeepBest(G, int(n*2) + 5, maxDistForRead=1.5)
|
||||
removeEdge(G)
|
||||
removeHighSpanTags(G, 9)
|
||||
removeDangling(G, alsoBooks=False)
|
||||
pruneTags(G, 4)
|
||||
removeBad(G, mu, groups=['book'])
|
||||
pruneTags(G, 3)
|
||||
removeTopLists(G)
|
||||
removeDangling(G, alsoBooks=True)
|
||||
removeKeepBest(G, n, maxDistForRead=0.75)
|
||||
removeEdge(G)
|
||||
removeDangling(G, alsoBooks=True)
|
||||
|
||||
scaleBooksByRating(G)
|
||||
scaleOpinionsByRating(G)
|
||||
addScoreToLabels(G)
|
||||
|
||||
printBestList(G, num=n)
|
||||
genAndShowHTML(G)
|
||||
|
||||
|
||||
def fullGraph():
|
||||
G, books = buildFullGraph()
|
||||
mu, std = genScores(G, books)
|
||||
|
||||
removePriv(G)
|
||||
removeEdge(G)
|
||||
removeHighSpanTags(G, 7)
|
||||
removeDangling(G, alsoBooks=False)
|
||||
removeTopLists(G)
|
||||
pruneTags(G, 3)
|
||||
removeDangling(G, alsoBooks=True)
|
||||
|
||||
scaleBooksByRating(G)
|
||||
scaleOpinionsByRating(G)
|
||||
addScoreToLabels(G)
|
||||
|
||||
printBestList(G, num=100)
|
||||
genAndShowHTML(G)
|
||||
|
||||
|
||||
def readBooksAnalysis():
|
||||
G, books = buildFullGraph()
|
||||
mu, std = genScores(G, books)
|
||||
|
||||
removePriv(G)
|
||||
removeUnread(G)
|
||||
removeEdge(G)
|
||||
removeHighSpanTags(G, 15)
|
||||
removeDangling(G, alsoBooks=False)
|
||||
removeTopLists(G)
|
||||
pruneTags(G, 8)
|
||||
|
||||
scaleBooksByRating(G)
|
||||
scaleOpinionsByRating(G)
|
||||
addScoreToLabels(G)
|
||||
|
||||
printBestList(G, num=100)
|
||||
genAndShowHTML(G)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
recommendNBooks(30)
|
Loading…
Reference in New Issue
Block a user