From 92d1b33ee3439557054cd10d2eff48f6855bfc34 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Sat, 25 Sep 2021 00:54:09 +0200 Subject: [PATCH] Made recommendation-graph way better; tweaks to bounds-loss --- caliGraph.py | 73 +++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 64 insertions(+), 9 deletions(-) diff --git a/caliGraph.py b/caliGraph.py index 3ddbb6d..04713c1 100755 --- a/caliGraph.py +++ b/caliGraph.py @@ -186,6 +186,26 @@ def pruneTags(G, minCons=2): if foundCon > minCons: G.remove_node(n) + +def pruneRecommenders(G, minCons=2): + for n in list(G.nodes): + node = G.nodes[n] + if node['t'] == 'recommender': + foundCon = 0 + for book in G.adj[n]: + for con in G.adj[book]: + conType = G.nodes[con]['t'] + if conType not in ['topList']: + if conType in ['recommender']: + foundCon += 0.5 + elif conType in ['tag', 'series']: + foundCon += 0.25 + else: + foundCon += 1 + if foundCon > minCons: + G.remove_node(n) + + def pruneRecommenderCons(G, maxCons=5): for n in list(G.nodes): node = G.nodes[n] @@ -367,7 +387,7 @@ def scoreUnread(G, globMu, globStd): feedbacks.append(node['se']) ws.append(getWeightForType('se')) #node['score'] = sum([fb*w for fb, w in zip(feedbacks, weights)])/len(feedbacks) - node['score'] = sum([fb*w for fb, w in zip(feedbacks, ws)])/len(feedbacks) + node['score'] = sum([fb*w for fb, w in zip(feedbacks, ws)])/sum(ws) else: node['score'] = globMu + errorFac*globStd + len(feedbacks)*0.0000000001 if 'series' in node: @@ -552,7 +572,7 @@ def genScores(G, books): return globMu, globStd -def recommendNBooks(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True): +def recommendNBooksRecommenderBased(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True): removeRestOfSeries(G) removeBad(G, mu-std*2-1) removeKeepBest(G, int(n*2) + 5, maxDistForRead=2) @@ -608,6 +628,33 @@ def recommendNBooksTagBased(G, mu, std, n, removeTopListsB=True): scaleOpinionsByRating(G) addScoreToLabels(G) +def recommendNBooks(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True): + removeRestOfSeries(G) + removeBad(G, mu-std*2-1) + removeKeepBest(G, int(n*2) + 5, maxDistForRead=2) + removeEdge(G) + removeHighSpanTags(G, 12) + removeHighSpanReadBooks(G, 6) + removeDangling(G, alsoBooks=False) + pruneRecommenders(G, 12) + pruneTags(G, 13) + removeBad(G, mu, groups=['book']) + removeUselessReadBooks(G) + pruneTags(G, 12) + pruneAuthorCons(G, int(n/5)) + pruneRecommenders(G, 11) + removeUselessTags(G) + if removeTopListsB: + removeTopLists(G) + removeDangling(G, alsoBooks=True) + removeKeepBest(G, n+math.ceil(n/20), maxDistForRead=1.5) + removeUselessReadBooks(G) + removeKeepBest(G, n, maxDistForRead=1.25) + + scaleBooksByRating(G) + scaleOpinionsByRating(G) + addScoreToLabels(G) + def fullGraph(G, removeTopLists=True): removeEdge(G) @@ -727,24 +774,27 @@ def evaluateFitness(books): ratedBooks = [n for n in list(G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None] boundsLoss = 0 errSq = [] - mu, std = genScores(G, books) + mu, sigma = genScores(G, books) for b in G.nodes: if b in ratedBooks: rating = G.nodes[b]['rating'] G.nodes[b]['rating'] = None - mu, std = genScores(G, books) + _, _ = genScores(G, books) if G.nodes[b]['score'] > rating: # over estimated errSq.append(((rating - G.nodes[b]['score'])**2)*2) else: errSq.append((rating - G.nodes[b]['score'])**2) G.nodes[b]['rating'] = rating if 'score' in G.nodes[b] and G.nodes[b]['score'] != None: - if G.nodes[b]['score'] > 10.0: - boundsLoss += (G.nodes[b]['score'] - 10)**2 - elif G.nodes[b]['score'] < 0.0: - boundsLoss += (G.nodes[b]['score'])**2 + score = G.nodes[b]['score'] + if score > 10.0: + boundsLoss += (score - 10)**2 + elif score < 0.0: + boundsLoss += (score)**2 + # reward seperation linearly + boundsLoss -= abs(score - mu)/10 regressionLoss = sum([(1-w)**2 for w in weights.values()]) - return sum(errSq)/len(errSq) + regressionLoss/100 + boundsLoss/1000 + return sum(errSq)/len(errSq) + regressionLoss/100 + boundsLoss/100 def train(gamma = 1): global weights @@ -807,6 +857,7 @@ def cliInterface(): p_rec = cmds.add_parser('recommend', description="TODO", aliases=['rec']) p_rec.add_argument('-n', type=int, default=25, help='number of books to recommend') p_rec.add_argument('--tag-based', action="store_true") + p_rec.add_argument('--recommender-based', action="store_true") p_read = cmds.add_parser('read', description="TODO", aliases=[]) p_read.add_argument('--min-rating', type=int, default=0) @@ -834,7 +885,11 @@ def cliInterface(): if args.cmd=="recommend": if args.tag_based: + if args.recommender_based: + raise Exception('tag-based and recommender-based can not be be combined') recommendNBooksTagBased(G, mu, std, args.n, not args.keep_top_lists) + elif args.recommender_based: + recommendNBooksRecommenderBased(G, mu, std, args.n, not args.keep_top_lists, not args.keep_useless_recommenders) else: recommendNBooks(G, mu, std, args.n, not args.keep_top_lists, not args.keep_useless_recommenders) elif args.cmd=="read":