From 9a473edfdc7487730f054d777c85751a280853bc Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 19 Nov 2022 16:18:58 +0100 Subject: [PATCH] Allow configurable curiosity and bug fixes --- caliGraph.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/caliGraph.py b/caliGraph.py index dd1cfff..4a148dc 100755 --- a/caliGraph.py +++ b/caliGraph.py @@ -138,7 +138,7 @@ def removeWhitepapers(G): for n in list(G.nodes): node = G.nodes[n] if node['t'] == 'book': - if 'whitepaper' in node['tags']: + if 'whitepaper' in node['tags'] or 'Lernzettel' in node['tags']: G.remove_node(n) @@ -198,7 +198,7 @@ def removeTags(G): G.remove_node(n) -def pruneTags(G, minCons=2): +def pruneTags(G, minCons=2, forceKeepLabels=[]): for n in sorted(list(G.nodes), key=lambda i: G.nodes[i]['score'] + len(G.nodes[i]['feedbacks'])/5 if 'score' in G.nodes[i] and 'feedbacks' in G.nodes[i] else 0): node = G.nodes[n] if node['t'] == 'tag': @@ -213,7 +213,7 @@ def pruneTags(G, minCons=2): foundCon += 0.25 else: foundCon += 1 - if foundCon > minCons: + if foundCon > minCons and node['label'] not in forceKeepLabels: G.remove_node(n) @@ -288,11 +288,11 @@ def pruneAuthorCons(G, maxCons=3): G.remove_node(m) -def removeHighSpanTags(G, maxCons=5): +def removeHighSpanTags(G, maxCons=5, forceKeepLabels=[]): for n in list(G.nodes): node = G.nodes[n] if node['t'] == 'tag': - if len(G.adj[n]) > maxCons: + if len(G.adj[n]) > maxCons and not node['label'] in forceKeepLabels: G.remove_node(n) @@ -395,6 +395,16 @@ def removeUselessTags(G, minUnread=1): G.remove_node(n) +def curiosityReward(G, coeff=1, dTan=True): + for n in list(G.nodes): + node = G.nodes[n] + if 'score' in node and 'se' in node: + delta = node['se'] * coeff + if dTan: + delta *= (1- math.tanh((node['score']/10-0.5)*7)**2) + new = max(0.0, min(10.0, node['score'] + delta)) + node['score'] = new + def removeUselessSeries(G, minSco=0): for n in list(G.nodes): node = G.nodes[n] @@ -814,7 +824,7 @@ def scaleOpinionsByRating(G): def addScoreToLabels(G): for n in list(G.nodes): node = G.nodes[n] - if node['t'] not in ['tag', 'newBook']: + if node['t'] not in []: #['tag', 'newBook']: if 'rating' in node and node['rating'] != None: node['label'] += " ("+str(node['rating'])+")" else: @@ -1185,7 +1195,7 @@ def analyze(G, books, mu, type_name, name, dist=2.1): G.remove_node(n) if dist >= 2: removeThinRecs(G, 2) - removeHighSpanTags(G, 12) + removeHighSpanTags(G, 12, forceKeepLabels=[match['label']]) if dist > 1: removeDangling(G, True) @@ -1530,6 +1540,8 @@ def cliInterface(imgDef=False): parser.add_argument('--keep-top-lists', action="store_true") parser.add_argument('--keep-useless-recommenders', action="store_true") parser.add_argument('--dark', action="store_true") + parser.add_argument('--curiosity', type=float, default=0.0, + help='curiosity coefficient (higher = more speculative)') parser.add_argument('--v3d', action="store_true") if imgDef: parser.add_argument('--no-imgs', action="store_true") @@ -1634,6 +1646,8 @@ def mainCLI(args): G, books = buildFullGraph(darkMode=args.dark) mu, std = genScores(G, books) + curiosityReward(G, args.curiosity) + if not args.keep_whitepapers: removeWhitepapers(G)