diff --git a/.gitignore b/.gitignore index 5359d8d..b0c914a 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ __pycache__ .venv neuralWeights.json neuralWeights.json.bak +.imgLinkCache.json diff --git a/caliGraph.py b/caliGraph.py index 72013ad..abcdaab 100755 --- a/caliGraph.py +++ b/caliGraph.py @@ -5,6 +5,7 @@ import json import math import copy import random +import requests import numpy as np import pandas as pd @@ -13,7 +14,9 @@ from scipy.stats import norm import matplotlib.pyplot as plt import networkx as nx from pyvis.network import Network +import plotly.graph_objects as go +import wikipedia def getAllAuthors(books): authors = set() @@ -562,6 +565,24 @@ def buildBookGraph(books, darkMode=False, extractKeywords=True, mergeTags=True): return G +def getWikiImage(search_term): + from fuzzywuzzy import fuzz + WIKI_REQUEST = 'http://en.wikipedia.org/w/api.php?action=query&prop=pageimages&format=json&piprop=original&titles=' + try: + print('[i] Searching for >'+search_term+'< on WikiPedia...') + result = wikipedia.search(search_term, results = 1) + if fuzz.ratio(search_term, result) < 50: + raise Exception('blub') + wikipedia.set_lang('en') + wkpage = wikipedia.WikipediaPage(title = result[0]) + title = wkpage.title + response = requests.get(WIKI_REQUEST+title) + json_data = json.loads(response.text) + img_link = list(json_data['query']['pages'].values())[0]['original']['source'] + return img_link + except: + print('[!] No match for '+search_term+' on WikiPedia...') + return None def graphAddAuthors(G, books, darkMode=False): for author in getAllAuthors(books): @@ -571,7 +592,6 @@ def graphAddAuthors(G, books, darkMode=False): G.add_edge('a/'+author, book['id'], color=readColor(book)) return G - def graphAddRecommenders(G, books, darkMode=False): for rec in getAllRecommenders(books): G.add_node('r/'+rec, color='orange', t='recommender', label=rec) @@ -580,7 +600,6 @@ def graphAddRecommenders(G, books, darkMode=False): G.add_edge('r/'+rec, book['id'], color=readColor(book)) return G - def graphAddTopLists(G, books, darkMode=False): for tl in getAllTopLists(books): G.add_node('t/'+tl, color='yellow', t='topList', label=tl) @@ -664,6 +683,90 @@ def genAndShowHTML(G, showButtons=False, darkMode=False, arrows=False): net.show('nx.html') +def genAndShow3D(G, darkMode=False): + node_sizes = [] + node_labels = [] + node_cols = [] + for n in G.nodes: + node = G.nodes[n] + if node['t']=='tag': + node_cols.append('gray') + elif node['t']=='book': + if 'score' in node: # unread book + node_cols.append('lightblue') + else: + node_cols.append('magenta') + elif 'color' in node: + node_cols.append(node['color']) + else: + node_cols.append('black') + + node_labels.append(node['label']) + node_sizes.append((node['value']/8)**1.5) + + spring = nx.spring_layout(G,dim=3, seed=random.randint(0, 65536)) + x_nodes = [spring[p][0] for p in spring]# x-coordinates of nodes + y_nodes = [spring[p][1] for p in spring]# y-coordinates + z_nodes = [spring[p][2] for p in spring]# z-coordinates + + x_edges=[] + y_edges=[] + z_edges=[] + + for edge in G.edges(): + x_coords = [spring[edge[0]][0],spring[edge[1]][0],None] + x_edges += x_coords + + y_coords = [spring[edge[0]][1],spring[edge[1]][1],None] + y_edges += y_coords + + z_coords = [spring[edge[0]][2],spring[edge[1]][2],None] + z_edges += z_coords + + trace_edges = go.Scatter3d(x=x_edges, + y=y_edges, + z=z_edges, + mode='lines', + line=dict(color='black', width=2), + hoverinfo='none') + + trace_nodes = go.Scatter3d(x=x_nodes, + y=y_nodes, + z=z_nodes, + mode='markers', + marker=dict(symbol='circle', + size=node_sizes, + color=node_cols, #color the nodes according to their community + #colorscale=['lightgreen','magenta'], #either green or mageneta + line=dict(color='gray', width=0.5)), + text=node_labels, + hoverinfo='text') + + axis = dict(showbackground=False, + showline=False, + zeroline=False, + showgrid=False, + showticklabels=False, + title='') + + layout = go.Layout(title="", + width=1920, + height=1080, + plot_bgcolor=['#FFFFFF','#181818'][darkMode], + paper_bgcolor=['#FFFFFF','#181818'][darkMode], + showlegend=False, + scene=dict(xaxis=dict(axis), + yaxis=dict(axis), + zaxis=dict(axis), + ), + margin=dict(l=0, r=0, b=0, t=0), + hovermode='closest') + + data = [trace_edges, trace_nodes] + fig = go.Figure(data=data, layout=layout) + + fig.show() + def buildFullGraph(darkMode=False): books = loadBooksFromDB() G = buildBookGraph(books, darkMode=darkMode) @@ -684,6 +787,32 @@ def genScores(G, books, calcPagerank=True): scoreUnread(G, globMu, globStd) return globMu, globStd +def addImageToNode(node, cache, shape='circularImage'): + name = node['label'].split(' (')[0] + if not name in cache: + term = name + img = getWikiImage(term) + if img: + cache[name] = img + else: + img = cache[name] + if img: + node['imagePadding'] = '100px' + node['image']=img + node['shape']=shape + +def addImagesToNodes(G): + try: + with open('.imgLinkCache.json', 'r') as cf: + cache = json.loads(cf.read()) + except IOError: + cache = {} + for n in list(G.nodes): + node = G.nodes[n] + if node['t'] in ['recommender', 'author']: + addImageToNode(node, cache, ['circularImage','image'][node['t']=='author']) + with open('.imgLinkCache.json', 'w') as cf: + cf.write(json.dumps(cache)) def recommendNBooksRecommenderBased(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True): removeRestOfSeries(G) @@ -1013,11 +1142,13 @@ def findNewBooks(G, books, num, minRecSco=5): scores.append(adj['score']) ses.append(adj['se']) ses.append(min(ses)) - if len(scores) < 2: + if False and len(scores) < 2: G.remove_node(n) else: - node['fake_se'] = sum(ses)/(len(ses)**1.2) # This is not how SE works. DILLIGAF? - node['score'] = sum(scores)/len(scores)*1.2 - node['fake_se']*2.5 + 0.5 - 0.1/math.sqrt(len(scores)) + node['fake_se'] = sum(ses)/(len(ses)**1.2) + 0.5 + 0.5 * (len(scores)==1) # This is not how SE works. DILLIGAF? + node['score'] = sum(scores)/len(scores)*1.2 - node['fake_se']*2 + 0.5 - 0.1/math.sqrt(len(scores)) + if len(scores)==1: + node['score']*=0.80 node['value'] = 20 + 5 * float(node['score']) node['label'] += " ({:.2f}±{:.1f})".format(node['score'], node['fake_se']) node['label'] += '\n ' + node['author'] @@ -1145,7 +1276,9 @@ def cliInterface(): parser.add_argument('--remove-edge', action="store_true") parser.add_argument('--keep-top-lists', action="store_true") parser.add_argument('--keep-useless-recommenders', action="store_true") - parser.add_argument('--dark-mode', action="store_true") + parser.add_argument('--dark', action="store_true") + parser.add_argument('--v3d', action="store_true") + parser.add_argument('--imgs', action="store_true") cmds = parser.add_subparsers(required=True, dest='cmd') p_rec = cmds.add_parser('recommend', description="TODO", aliases=['rec']) @@ -1191,7 +1324,7 @@ def cliInterface(): bestListT = 'book' - G, books = buildFullGraph(darkMode=args.dark_mode) + G, books = buildFullGraph(darkMode=args.dark) mu, std = genScores(G, books) if not args.keep_whitepapers: @@ -1249,7 +1382,12 @@ def cliInterface(): if not args.no_list: printBestList(G, t=bestListT) if not args.no_web and not args.cmd in ['listScores']: - genAndShowHTML(G, darkMode=args.dark_mode) + if args.v3d: + genAndShow3D(G, darkMode=args.dark) + else: + if args.imgs: + addImagesToNodes(G) + genAndShowHTML(G, darkMode=args.dark) weights = loadWeights()