Made recommendation-graph way better; tweaks to bounds-loss

This commit is contained in:
Dominik Moritz Roth 2021-09-25 00:54:09 +02:00
parent 32bac42c83
commit 92d1b33ee3

View File

@ -186,6 +186,26 @@ def pruneTags(G, minCons=2):
if foundCon > minCons: if foundCon > minCons:
G.remove_node(n) G.remove_node(n)
def pruneRecommenders(G, minCons=2):
for n in list(G.nodes):
node = G.nodes[n]
if node['t'] == 'recommender':
foundCon = 0
for book in G.adj[n]:
for con in G.adj[book]:
conType = G.nodes[con]['t']
if conType not in ['topList']:
if conType in ['recommender']:
foundCon += 0.5
elif conType in ['tag', 'series']:
foundCon += 0.25
else:
foundCon += 1
if foundCon > minCons:
G.remove_node(n)
def pruneRecommenderCons(G, maxCons=5): def pruneRecommenderCons(G, maxCons=5):
for n in list(G.nodes): for n in list(G.nodes):
node = G.nodes[n] node = G.nodes[n]
@ -367,7 +387,7 @@ def scoreUnread(G, globMu, globStd):
feedbacks.append(node['se']) feedbacks.append(node['se'])
ws.append(getWeightForType('se')) ws.append(getWeightForType('se'))
#node['score'] = sum([fb*w for fb, w in zip(feedbacks, weights)])/len(feedbacks) #node['score'] = sum([fb*w for fb, w in zip(feedbacks, weights)])/len(feedbacks)
node['score'] = sum([fb*w for fb, w in zip(feedbacks, ws)])/len(feedbacks) node['score'] = sum([fb*w for fb, w in zip(feedbacks, ws)])/sum(ws)
else: else:
node['score'] = globMu + errorFac*globStd + len(feedbacks)*0.0000000001 node['score'] = globMu + errorFac*globStd + len(feedbacks)*0.0000000001
if 'series' in node: if 'series' in node:
@ -552,7 +572,7 @@ def genScores(G, books):
return globMu, globStd return globMu, globStd
def recommendNBooks(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True): def recommendNBooksRecommenderBased(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True):
removeRestOfSeries(G) removeRestOfSeries(G)
removeBad(G, mu-std*2-1) removeBad(G, mu-std*2-1)
removeKeepBest(G, int(n*2) + 5, maxDistForRead=2) removeKeepBest(G, int(n*2) + 5, maxDistForRead=2)
@ -608,6 +628,33 @@ def recommendNBooksTagBased(G, mu, std, n, removeTopListsB=True):
scaleOpinionsByRating(G) scaleOpinionsByRating(G)
addScoreToLabels(G) addScoreToLabels(G)
def recommendNBooks(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True):
removeRestOfSeries(G)
removeBad(G, mu-std*2-1)
removeKeepBest(G, int(n*2) + 5, maxDistForRead=2)
removeEdge(G)
removeHighSpanTags(G, 12)
removeHighSpanReadBooks(G, 6)
removeDangling(G, alsoBooks=False)
pruneRecommenders(G, 12)
pruneTags(G, 13)
removeBad(G, mu, groups=['book'])
removeUselessReadBooks(G)
pruneTags(G, 12)
pruneAuthorCons(G, int(n/5))
pruneRecommenders(G, 11)
removeUselessTags(G)
if removeTopListsB:
removeTopLists(G)
removeDangling(G, alsoBooks=True)
removeKeepBest(G, n+math.ceil(n/20), maxDistForRead=1.5)
removeUselessReadBooks(G)
removeKeepBest(G, n, maxDistForRead=1.25)
scaleBooksByRating(G)
scaleOpinionsByRating(G)
addScoreToLabels(G)
def fullGraph(G, removeTopLists=True): def fullGraph(G, removeTopLists=True):
removeEdge(G) removeEdge(G)
@ -727,24 +774,27 @@ def evaluateFitness(books):
ratedBooks = [n for n in list(G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None] ratedBooks = [n for n in list(G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None]
boundsLoss = 0 boundsLoss = 0
errSq = [] errSq = []
mu, std = genScores(G, books) mu, sigma = genScores(G, books)
for b in G.nodes: for b in G.nodes:
if b in ratedBooks: if b in ratedBooks:
rating = G.nodes[b]['rating'] rating = G.nodes[b]['rating']
G.nodes[b]['rating'] = None G.nodes[b]['rating'] = None
mu, std = genScores(G, books) _, _ = genScores(G, books)
if G.nodes[b]['score'] > rating: # over estimated if G.nodes[b]['score'] > rating: # over estimated
errSq.append(((rating - G.nodes[b]['score'])**2)*2) errSq.append(((rating - G.nodes[b]['score'])**2)*2)
else: else:
errSq.append((rating - G.nodes[b]['score'])**2) errSq.append((rating - G.nodes[b]['score'])**2)
G.nodes[b]['rating'] = rating G.nodes[b]['rating'] = rating
if 'score' in G.nodes[b] and G.nodes[b]['score'] != None: if 'score' in G.nodes[b] and G.nodes[b]['score'] != None:
if G.nodes[b]['score'] > 10.0: score = G.nodes[b]['score']
boundsLoss += (G.nodes[b]['score'] - 10)**2 if score > 10.0:
elif G.nodes[b]['score'] < 0.0: boundsLoss += (score - 10)**2
boundsLoss += (G.nodes[b]['score'])**2 elif score < 0.0:
boundsLoss += (score)**2
# reward seperation linearly
boundsLoss -= abs(score - mu)/10
regressionLoss = sum([(1-w)**2 for w in weights.values()]) regressionLoss = sum([(1-w)**2 for w in weights.values()])
return sum(errSq)/len(errSq) + regressionLoss/100 + boundsLoss/1000 return sum(errSq)/len(errSq) + regressionLoss/100 + boundsLoss/100
def train(gamma = 1): def train(gamma = 1):
global weights global weights
@ -807,6 +857,7 @@ def cliInterface():
p_rec = cmds.add_parser('recommend', description="TODO", aliases=['rec']) p_rec = cmds.add_parser('recommend', description="TODO", aliases=['rec'])
p_rec.add_argument('-n', type=int, default=25, help='number of books to recommend') p_rec.add_argument('-n', type=int, default=25, help='number of books to recommend')
p_rec.add_argument('--tag-based', action="store_true") p_rec.add_argument('--tag-based', action="store_true")
p_rec.add_argument('--recommender-based', action="store_true")
p_read = cmds.add_parser('read', description="TODO", aliases=[]) p_read = cmds.add_parser('read', description="TODO", aliases=[])
p_read.add_argument('--min-rating', type=int, default=0) p_read.add_argument('--min-rating', type=int, default=0)
@ -834,7 +885,11 @@ def cliInterface():
if args.cmd=="recommend": if args.cmd=="recommend":
if args.tag_based: if args.tag_based:
if args.recommender_based:
raise Exception('tag-based and recommender-based can not be be combined')
recommendNBooksTagBased(G, mu, std, args.n, not args.keep_top_lists) recommendNBooksTagBased(G, mu, std, args.n, not args.keep_top_lists)
elif args.recommender_based:
recommendNBooksRecommenderBased(G, mu, std, args.n, not args.keep_top_lists, not args.keep_useless_recommenders)
else: else:
recommendNBooks(G, mu, std, args.n, not args.keep_top_lists, not args.keep_useless_recommenders) recommendNBooks(G, mu, std, args.n, not args.keep_top_lists, not args.keep_useless_recommenders)
elif args.cmd=="read": elif args.cmd=="read":