Lookup images for authors and recommenders on WikiPedia
This commit is contained in:
parent
f5c3077cb4
commit
5e6dc9ffe2
1
.gitignore
vendored
1
.gitignore
vendored
@ -3,3 +3,4 @@ __pycache__
|
||||
.venv
|
||||
neuralWeights.json
|
||||
neuralWeights.json.bak
|
||||
.imgLinkCache.json
|
||||
|
154
caliGraph.py
154
caliGraph.py
@ -5,6 +5,7 @@ import json
|
||||
import math
|
||||
import copy
|
||||
import random
|
||||
import requests
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
@ -13,7 +14,9 @@ from scipy.stats import norm
|
||||
import matplotlib.pyplot as plt
|
||||
import networkx as nx
|
||||
from pyvis.network import Network
|
||||
import plotly.graph_objects as go
|
||||
|
||||
import wikipedia
|
||||
|
||||
def getAllAuthors(books):
|
||||
authors = set()
|
||||
@ -562,6 +565,24 @@ def buildBookGraph(books, darkMode=False, extractKeywords=True, mergeTags=True):
|
||||
|
||||
return G
|
||||
|
||||
def getWikiImage(search_term):
|
||||
from fuzzywuzzy import fuzz
|
||||
WIKI_REQUEST = 'http://en.wikipedia.org/w/api.php?action=query&prop=pageimages&format=json&piprop=original&titles='
|
||||
try:
|
||||
print('[i] Searching for >'+search_term+'< on WikiPedia...')
|
||||
result = wikipedia.search(search_term, results = 1)
|
||||
if fuzz.ratio(search_term, result) < 50:
|
||||
raise Exception('blub')
|
||||
wikipedia.set_lang('en')
|
||||
wkpage = wikipedia.WikipediaPage(title = result[0])
|
||||
title = wkpage.title
|
||||
response = requests.get(WIKI_REQUEST+title)
|
||||
json_data = json.loads(response.text)
|
||||
img_link = list(json_data['query']['pages'].values())[0]['original']['source']
|
||||
return img_link
|
||||
except:
|
||||
print('[!] No match for '+search_term+' on WikiPedia...')
|
||||
return None
|
||||
|
||||
def graphAddAuthors(G, books, darkMode=False):
|
||||
for author in getAllAuthors(books):
|
||||
@ -571,7 +592,6 @@ def graphAddAuthors(G, books, darkMode=False):
|
||||
G.add_edge('a/'+author, book['id'], color=readColor(book))
|
||||
return G
|
||||
|
||||
|
||||
def graphAddRecommenders(G, books, darkMode=False):
|
||||
for rec in getAllRecommenders(books):
|
||||
G.add_node('r/'+rec, color='orange', t='recommender', label=rec)
|
||||
@ -580,7 +600,6 @@ def graphAddRecommenders(G, books, darkMode=False):
|
||||
G.add_edge('r/'+rec, book['id'], color=readColor(book))
|
||||
return G
|
||||
|
||||
|
||||
def graphAddTopLists(G, books, darkMode=False):
|
||||
for tl in getAllTopLists(books):
|
||||
G.add_node('t/'+tl, color='yellow', t='topList', label=tl)
|
||||
@ -664,6 +683,90 @@ def genAndShowHTML(G, showButtons=False, darkMode=False, arrows=False):
|
||||
net.show('nx.html')
|
||||
|
||||
|
||||
def genAndShow3D(G, darkMode=False):
|
||||
node_sizes = []
|
||||
node_labels = []
|
||||
node_cols = []
|
||||
for n in G.nodes:
|
||||
node = G.nodes[n]
|
||||
if node['t']=='tag':
|
||||
node_cols.append('gray')
|
||||
elif node['t']=='book':
|
||||
if 'score' in node: # unread book
|
||||
node_cols.append('lightblue')
|
||||
else:
|
||||
node_cols.append('magenta')
|
||||
elif 'color' in node:
|
||||
node_cols.append(node['color'])
|
||||
else:
|
||||
node_cols.append('black')
|
||||
|
||||
node_labels.append(node['label'])
|
||||
node_sizes.append((node['value']/8)**1.5)
|
||||
|
||||
spring = nx.spring_layout(G,dim=3, seed=random.randint(0, 65536))
|
||||
x_nodes = [spring[p][0] for p in spring]# x-coordinates of nodes
|
||||
y_nodes = [spring[p][1] for p in spring]# y-coordinates
|
||||
z_nodes = [spring[p][2] for p in spring]# z-coordinates
|
||||
|
||||
x_edges=[]
|
||||
y_edges=[]
|
||||
z_edges=[]
|
||||
|
||||
for edge in G.edges():
|
||||
x_coords = [spring[edge[0]][0],spring[edge[1]][0],None]
|
||||
x_edges += x_coords
|
||||
|
||||
y_coords = [spring[edge[0]][1],spring[edge[1]][1],None]
|
||||
y_edges += y_coords
|
||||
|
||||
z_coords = [spring[edge[0]][2],spring[edge[1]][2],None]
|
||||
z_edges += z_coords
|
||||
|
||||
trace_edges = go.Scatter3d(x=x_edges,
|
||||
y=y_edges,
|
||||
z=z_edges,
|
||||
mode='lines',
|
||||
line=dict(color='black', width=2),
|
||||
hoverinfo='none')
|
||||
|
||||
trace_nodes = go.Scatter3d(x=x_nodes,
|
||||
y=y_nodes,
|
||||
z=z_nodes,
|
||||
mode='markers',
|
||||
marker=dict(symbol='circle',
|
||||
size=node_sizes,
|
||||
color=node_cols, #color the nodes according to their community
|
||||
#colorscale=['lightgreen','magenta'], #either green or mageneta
|
||||
line=dict(color='gray', width=0.5)),
|
||||
text=node_labels,
|
||||
hoverinfo='text')
|
||||
|
||||
axis = dict(showbackground=False,
|
||||
showline=False,
|
||||
zeroline=False,
|
||||
showgrid=False,
|
||||
showticklabels=False,
|
||||
title='')
|
||||
|
||||
layout = go.Layout(title="",
|
||||
width=1920,
|
||||
height=1080,
|
||||
plot_bgcolor=['#FFFFFF','#181818'][darkMode],
|
||||
paper_bgcolor=['#FFFFFF','#181818'][darkMode],
|
||||
showlegend=False,
|
||||
scene=dict(xaxis=dict(axis),
|
||||
yaxis=dict(axis),
|
||||
zaxis=dict(axis),
|
||||
),
|
||||
margin=dict(l=0, r=0, b=0, t=0),
|
||||
hovermode='closest')
|
||||
|
||||
data = [trace_edges, trace_nodes]
|
||||
fig = go.Figure(data=data, layout=layout)
|
||||
|
||||
fig.show()
|
||||
|
||||
def buildFullGraph(darkMode=False):
|
||||
books = loadBooksFromDB()
|
||||
G = buildBookGraph(books, darkMode=darkMode)
|
||||
@ -684,6 +787,32 @@ def genScores(G, books, calcPagerank=True):
|
||||
scoreUnread(G, globMu, globStd)
|
||||
return globMu, globStd
|
||||
|
||||
def addImageToNode(node, cache, shape='circularImage'):
|
||||
name = node['label'].split(' (')[0]
|
||||
if not name in cache:
|
||||
term = name
|
||||
img = getWikiImage(term)
|
||||
if img:
|
||||
cache[name] = img
|
||||
else:
|
||||
img = cache[name]
|
||||
if img:
|
||||
node['imagePadding'] = '100px'
|
||||
node['image']=img
|
||||
node['shape']=shape
|
||||
|
||||
def addImagesToNodes(G):
|
||||
try:
|
||||
with open('.imgLinkCache.json', 'r') as cf:
|
||||
cache = json.loads(cf.read())
|
||||
except IOError:
|
||||
cache = {}
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
if node['t'] in ['recommender', 'author']:
|
||||
addImageToNode(node, cache, ['circularImage','image'][node['t']=='author'])
|
||||
with open('.imgLinkCache.json', 'w') as cf:
|
||||
cf.write(json.dumps(cache))
|
||||
|
||||
def recommendNBooksRecommenderBased(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True):
|
||||
removeRestOfSeries(G)
|
||||
@ -1013,11 +1142,13 @@ def findNewBooks(G, books, num, minRecSco=5):
|
||||
scores.append(adj['score'])
|
||||
ses.append(adj['se'])
|
||||
ses.append(min(ses))
|
||||
if len(scores) < 2:
|
||||
if False and len(scores) < 2:
|
||||
G.remove_node(n)
|
||||
else:
|
||||
node['fake_se'] = sum(ses)/(len(ses)**1.2) # This is not how SE works. DILLIGAF?
|
||||
node['score'] = sum(scores)/len(scores)*1.2 - node['fake_se']*2.5 + 0.5 - 0.1/math.sqrt(len(scores))
|
||||
node['fake_se'] = sum(ses)/(len(ses)**1.2) + 0.5 + 0.5 * (len(scores)==1) # This is not how SE works. DILLIGAF?
|
||||
node['score'] = sum(scores)/len(scores)*1.2 - node['fake_se']*2 + 0.5 - 0.1/math.sqrt(len(scores))
|
||||
if len(scores)==1:
|
||||
node['score']*=0.80
|
||||
node['value'] = 20 + 5 * float(node['score'])
|
||||
node['label'] += " ({:.2f}±{:.1f})".format(node['score'], node['fake_se'])
|
||||
node['label'] += '\n ' + node['author']
|
||||
@ -1145,7 +1276,9 @@ def cliInterface():
|
||||
parser.add_argument('--remove-edge', action="store_true")
|
||||
parser.add_argument('--keep-top-lists', action="store_true")
|
||||
parser.add_argument('--keep-useless-recommenders', action="store_true")
|
||||
parser.add_argument('--dark-mode', action="store_true")
|
||||
parser.add_argument('--dark', action="store_true")
|
||||
parser.add_argument('--v3d', action="store_true")
|
||||
parser.add_argument('--imgs', action="store_true")
|
||||
cmds = parser.add_subparsers(required=True, dest='cmd')
|
||||
|
||||
p_rec = cmds.add_parser('recommend', description="TODO", aliases=['rec'])
|
||||
@ -1191,7 +1324,7 @@ def cliInterface():
|
||||
|
||||
bestListT = 'book'
|
||||
|
||||
G, books = buildFullGraph(darkMode=args.dark_mode)
|
||||
G, books = buildFullGraph(darkMode=args.dark)
|
||||
mu, std = genScores(G, books)
|
||||
|
||||
if not args.keep_whitepapers:
|
||||
@ -1249,7 +1382,12 @@ def cliInterface():
|
||||
if not args.no_list:
|
||||
printBestList(G, t=bestListT)
|
||||
if not args.no_web and not args.cmd in ['listScores']:
|
||||
genAndShowHTML(G, darkMode=args.dark_mode)
|
||||
if args.v3d:
|
||||
genAndShow3D(G, darkMode=args.dark)
|
||||
else:
|
||||
if args.imgs:
|
||||
addImagesToNodes(G)
|
||||
genAndShowHTML(G, darkMode=args.dark)
|
||||
|
||||
|
||||
weights = loadWeights()
|
||||
|
Loading…
Reference in New Issue
Block a user