Made recommendation-graph way better; tweaks to bounds-loss
This commit is contained in:
parent
32bac42c83
commit
92d1b33ee3
73
caliGraph.py
73
caliGraph.py
@ -186,6 +186,26 @@ def pruneTags(G, minCons=2):
|
||||
if foundCon > minCons:
|
||||
G.remove_node(n)
|
||||
|
||||
|
||||
def pruneRecommenders(G, minCons=2):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
if node['t'] == 'recommender':
|
||||
foundCon = 0
|
||||
for book in G.adj[n]:
|
||||
for con in G.adj[book]:
|
||||
conType = G.nodes[con]['t']
|
||||
if conType not in ['topList']:
|
||||
if conType in ['recommender']:
|
||||
foundCon += 0.5
|
||||
elif conType in ['tag', 'series']:
|
||||
foundCon += 0.25
|
||||
else:
|
||||
foundCon += 1
|
||||
if foundCon > minCons:
|
||||
G.remove_node(n)
|
||||
|
||||
|
||||
def pruneRecommenderCons(G, maxCons=5):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
@ -367,7 +387,7 @@ def scoreUnread(G, globMu, globStd):
|
||||
feedbacks.append(node['se'])
|
||||
ws.append(getWeightForType('se'))
|
||||
#node['score'] = sum([fb*w for fb, w in zip(feedbacks, weights)])/len(feedbacks)
|
||||
node['score'] = sum([fb*w for fb, w in zip(feedbacks, ws)])/len(feedbacks)
|
||||
node['score'] = sum([fb*w for fb, w in zip(feedbacks, ws)])/sum(ws)
|
||||
else:
|
||||
node['score'] = globMu + errorFac*globStd + len(feedbacks)*0.0000000001
|
||||
if 'series' in node:
|
||||
@ -552,7 +572,7 @@ def genScores(G, books):
|
||||
return globMu, globStd
|
||||
|
||||
|
||||
def recommendNBooks(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True):
|
||||
def recommendNBooksRecommenderBased(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True):
|
||||
removeRestOfSeries(G)
|
||||
removeBad(G, mu-std*2-1)
|
||||
removeKeepBest(G, int(n*2) + 5, maxDistForRead=2)
|
||||
@ -608,6 +628,33 @@ def recommendNBooksTagBased(G, mu, std, n, removeTopListsB=True):
|
||||
scaleOpinionsByRating(G)
|
||||
addScoreToLabels(G)
|
||||
|
||||
def recommendNBooks(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True):
|
||||
removeRestOfSeries(G)
|
||||
removeBad(G, mu-std*2-1)
|
||||
removeKeepBest(G, int(n*2) + 5, maxDistForRead=2)
|
||||
removeEdge(G)
|
||||
removeHighSpanTags(G, 12)
|
||||
removeHighSpanReadBooks(G, 6)
|
||||
removeDangling(G, alsoBooks=False)
|
||||
pruneRecommenders(G, 12)
|
||||
pruneTags(G, 13)
|
||||
removeBad(G, mu, groups=['book'])
|
||||
removeUselessReadBooks(G)
|
||||
pruneTags(G, 12)
|
||||
pruneAuthorCons(G, int(n/5))
|
||||
pruneRecommenders(G, 11)
|
||||
removeUselessTags(G)
|
||||
if removeTopListsB:
|
||||
removeTopLists(G)
|
||||
removeDangling(G, alsoBooks=True)
|
||||
removeKeepBest(G, n+math.ceil(n/20), maxDistForRead=1.5)
|
||||
removeUselessReadBooks(G)
|
||||
removeKeepBest(G, n, maxDistForRead=1.25)
|
||||
|
||||
scaleBooksByRating(G)
|
||||
scaleOpinionsByRating(G)
|
||||
addScoreToLabels(G)
|
||||
|
||||
|
||||
def fullGraph(G, removeTopLists=True):
|
||||
removeEdge(G)
|
||||
@ -727,24 +774,27 @@ def evaluateFitness(books):
|
||||
ratedBooks = [n for n in list(G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None]
|
||||
boundsLoss = 0
|
||||
errSq = []
|
||||
mu, std = genScores(G, books)
|
||||
mu, sigma = genScores(G, books)
|
||||
for b in G.nodes:
|
||||
if b in ratedBooks:
|
||||
rating = G.nodes[b]['rating']
|
||||
G.nodes[b]['rating'] = None
|
||||
mu, std = genScores(G, books)
|
||||
_, _ = genScores(G, books)
|
||||
if G.nodes[b]['score'] > rating: # over estimated
|
||||
errSq.append(((rating - G.nodes[b]['score'])**2)*2)
|
||||
else:
|
||||
errSq.append((rating - G.nodes[b]['score'])**2)
|
||||
G.nodes[b]['rating'] = rating
|
||||
if 'score' in G.nodes[b] and G.nodes[b]['score'] != None:
|
||||
if G.nodes[b]['score'] > 10.0:
|
||||
boundsLoss += (G.nodes[b]['score'] - 10)**2
|
||||
elif G.nodes[b]['score'] < 0.0:
|
||||
boundsLoss += (G.nodes[b]['score'])**2
|
||||
score = G.nodes[b]['score']
|
||||
if score > 10.0:
|
||||
boundsLoss += (score - 10)**2
|
||||
elif score < 0.0:
|
||||
boundsLoss += (score)**2
|
||||
# reward seperation linearly
|
||||
boundsLoss -= abs(score - mu)/10
|
||||
regressionLoss = sum([(1-w)**2 for w in weights.values()])
|
||||
return sum(errSq)/len(errSq) + regressionLoss/100 + boundsLoss/1000
|
||||
return sum(errSq)/len(errSq) + regressionLoss/100 + boundsLoss/100
|
||||
|
||||
def train(gamma = 1):
|
||||
global weights
|
||||
@ -807,6 +857,7 @@ def cliInterface():
|
||||
p_rec = cmds.add_parser('recommend', description="TODO", aliases=['rec'])
|
||||
p_rec.add_argument('-n', type=int, default=25, help='number of books to recommend')
|
||||
p_rec.add_argument('--tag-based', action="store_true")
|
||||
p_rec.add_argument('--recommender-based', action="store_true")
|
||||
|
||||
p_read = cmds.add_parser('read', description="TODO", aliases=[])
|
||||
p_read.add_argument('--min-rating', type=int, default=0)
|
||||
@ -834,7 +885,11 @@ def cliInterface():
|
||||
|
||||
if args.cmd=="recommend":
|
||||
if args.tag_based:
|
||||
if args.recommender_based:
|
||||
raise Exception('tag-based and recommender-based can not be be combined')
|
||||
recommendNBooksTagBased(G, mu, std, args.n, not args.keep_top_lists)
|
||||
elif args.recommender_based:
|
||||
recommendNBooksRecommenderBased(G, mu, std, args.n, not args.keep_top_lists, not args.keep_useless_recommenders)
|
||||
else:
|
||||
recommendNBooks(G, mu, std, args.n, not args.keep_top_lists, not args.keep_useless_recommenders)
|
||||
elif args.cmd=="read":
|
||||
|
Loading…
Reference in New Issue
Block a user