Made recommendation-graph way better; tweaks to bounds-loss
This commit is contained in:
parent
32bac42c83
commit
92d1b33ee3
73
caliGraph.py
73
caliGraph.py
@ -186,6 +186,26 @@ def pruneTags(G, minCons=2):
|
|||||||
if foundCon > minCons:
|
if foundCon > minCons:
|
||||||
G.remove_node(n)
|
G.remove_node(n)
|
||||||
|
|
||||||
|
|
||||||
|
def pruneRecommenders(G, minCons=2):
|
||||||
|
for n in list(G.nodes):
|
||||||
|
node = G.nodes[n]
|
||||||
|
if node['t'] == 'recommender':
|
||||||
|
foundCon = 0
|
||||||
|
for book in G.adj[n]:
|
||||||
|
for con in G.adj[book]:
|
||||||
|
conType = G.nodes[con]['t']
|
||||||
|
if conType not in ['topList']:
|
||||||
|
if conType in ['recommender']:
|
||||||
|
foundCon += 0.5
|
||||||
|
elif conType in ['tag', 'series']:
|
||||||
|
foundCon += 0.25
|
||||||
|
else:
|
||||||
|
foundCon += 1
|
||||||
|
if foundCon > minCons:
|
||||||
|
G.remove_node(n)
|
||||||
|
|
||||||
|
|
||||||
def pruneRecommenderCons(G, maxCons=5):
|
def pruneRecommenderCons(G, maxCons=5):
|
||||||
for n in list(G.nodes):
|
for n in list(G.nodes):
|
||||||
node = G.nodes[n]
|
node = G.nodes[n]
|
||||||
@ -367,7 +387,7 @@ def scoreUnread(G, globMu, globStd):
|
|||||||
feedbacks.append(node['se'])
|
feedbacks.append(node['se'])
|
||||||
ws.append(getWeightForType('se'))
|
ws.append(getWeightForType('se'))
|
||||||
#node['score'] = sum([fb*w for fb, w in zip(feedbacks, weights)])/len(feedbacks)
|
#node['score'] = sum([fb*w for fb, w in zip(feedbacks, weights)])/len(feedbacks)
|
||||||
node['score'] = sum([fb*w for fb, w in zip(feedbacks, ws)])/len(feedbacks)
|
node['score'] = sum([fb*w for fb, w in zip(feedbacks, ws)])/sum(ws)
|
||||||
else:
|
else:
|
||||||
node['score'] = globMu + errorFac*globStd + len(feedbacks)*0.0000000001
|
node['score'] = globMu + errorFac*globStd + len(feedbacks)*0.0000000001
|
||||||
if 'series' in node:
|
if 'series' in node:
|
||||||
@ -552,7 +572,7 @@ def genScores(G, books):
|
|||||||
return globMu, globStd
|
return globMu, globStd
|
||||||
|
|
||||||
|
|
||||||
def recommendNBooks(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True):
|
def recommendNBooksRecommenderBased(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True):
|
||||||
removeRestOfSeries(G)
|
removeRestOfSeries(G)
|
||||||
removeBad(G, mu-std*2-1)
|
removeBad(G, mu-std*2-1)
|
||||||
removeKeepBest(G, int(n*2) + 5, maxDistForRead=2)
|
removeKeepBest(G, int(n*2) + 5, maxDistForRead=2)
|
||||||
@ -608,6 +628,33 @@ def recommendNBooksTagBased(G, mu, std, n, removeTopListsB=True):
|
|||||||
scaleOpinionsByRating(G)
|
scaleOpinionsByRating(G)
|
||||||
addScoreToLabels(G)
|
addScoreToLabels(G)
|
||||||
|
|
||||||
|
def recommendNBooks(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True):
|
||||||
|
removeRestOfSeries(G)
|
||||||
|
removeBad(G, mu-std*2-1)
|
||||||
|
removeKeepBest(G, int(n*2) + 5, maxDistForRead=2)
|
||||||
|
removeEdge(G)
|
||||||
|
removeHighSpanTags(G, 12)
|
||||||
|
removeHighSpanReadBooks(G, 6)
|
||||||
|
removeDangling(G, alsoBooks=False)
|
||||||
|
pruneRecommenders(G, 12)
|
||||||
|
pruneTags(G, 13)
|
||||||
|
removeBad(G, mu, groups=['book'])
|
||||||
|
removeUselessReadBooks(G)
|
||||||
|
pruneTags(G, 12)
|
||||||
|
pruneAuthorCons(G, int(n/5))
|
||||||
|
pruneRecommenders(G, 11)
|
||||||
|
removeUselessTags(G)
|
||||||
|
if removeTopListsB:
|
||||||
|
removeTopLists(G)
|
||||||
|
removeDangling(G, alsoBooks=True)
|
||||||
|
removeKeepBest(G, n+math.ceil(n/20), maxDistForRead=1.5)
|
||||||
|
removeUselessReadBooks(G)
|
||||||
|
removeKeepBest(G, n, maxDistForRead=1.25)
|
||||||
|
|
||||||
|
scaleBooksByRating(G)
|
||||||
|
scaleOpinionsByRating(G)
|
||||||
|
addScoreToLabels(G)
|
||||||
|
|
||||||
|
|
||||||
def fullGraph(G, removeTopLists=True):
|
def fullGraph(G, removeTopLists=True):
|
||||||
removeEdge(G)
|
removeEdge(G)
|
||||||
@ -727,24 +774,27 @@ def evaluateFitness(books):
|
|||||||
ratedBooks = [n for n in list(G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None]
|
ratedBooks = [n for n in list(G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None]
|
||||||
boundsLoss = 0
|
boundsLoss = 0
|
||||||
errSq = []
|
errSq = []
|
||||||
mu, std = genScores(G, books)
|
mu, sigma = genScores(G, books)
|
||||||
for b in G.nodes:
|
for b in G.nodes:
|
||||||
if b in ratedBooks:
|
if b in ratedBooks:
|
||||||
rating = G.nodes[b]['rating']
|
rating = G.nodes[b]['rating']
|
||||||
G.nodes[b]['rating'] = None
|
G.nodes[b]['rating'] = None
|
||||||
mu, std = genScores(G, books)
|
_, _ = genScores(G, books)
|
||||||
if G.nodes[b]['score'] > rating: # over estimated
|
if G.nodes[b]['score'] > rating: # over estimated
|
||||||
errSq.append(((rating - G.nodes[b]['score'])**2)*2)
|
errSq.append(((rating - G.nodes[b]['score'])**2)*2)
|
||||||
else:
|
else:
|
||||||
errSq.append((rating - G.nodes[b]['score'])**2)
|
errSq.append((rating - G.nodes[b]['score'])**2)
|
||||||
G.nodes[b]['rating'] = rating
|
G.nodes[b]['rating'] = rating
|
||||||
if 'score' in G.nodes[b] and G.nodes[b]['score'] != None:
|
if 'score' in G.nodes[b] and G.nodes[b]['score'] != None:
|
||||||
if G.nodes[b]['score'] > 10.0:
|
score = G.nodes[b]['score']
|
||||||
boundsLoss += (G.nodes[b]['score'] - 10)**2
|
if score > 10.0:
|
||||||
elif G.nodes[b]['score'] < 0.0:
|
boundsLoss += (score - 10)**2
|
||||||
boundsLoss += (G.nodes[b]['score'])**2
|
elif score < 0.0:
|
||||||
|
boundsLoss += (score)**2
|
||||||
|
# reward seperation linearly
|
||||||
|
boundsLoss -= abs(score - mu)/10
|
||||||
regressionLoss = sum([(1-w)**2 for w in weights.values()])
|
regressionLoss = sum([(1-w)**2 for w in weights.values()])
|
||||||
return sum(errSq)/len(errSq) + regressionLoss/100 + boundsLoss/1000
|
return sum(errSq)/len(errSq) + regressionLoss/100 + boundsLoss/100
|
||||||
|
|
||||||
def train(gamma = 1):
|
def train(gamma = 1):
|
||||||
global weights
|
global weights
|
||||||
@ -807,6 +857,7 @@ def cliInterface():
|
|||||||
p_rec = cmds.add_parser('recommend', description="TODO", aliases=['rec'])
|
p_rec = cmds.add_parser('recommend', description="TODO", aliases=['rec'])
|
||||||
p_rec.add_argument('-n', type=int, default=25, help='number of books to recommend')
|
p_rec.add_argument('-n', type=int, default=25, help='number of books to recommend')
|
||||||
p_rec.add_argument('--tag-based', action="store_true")
|
p_rec.add_argument('--tag-based', action="store_true")
|
||||||
|
p_rec.add_argument('--recommender-based', action="store_true")
|
||||||
|
|
||||||
p_read = cmds.add_parser('read', description="TODO", aliases=[])
|
p_read = cmds.add_parser('read', description="TODO", aliases=[])
|
||||||
p_read.add_argument('--min-rating', type=int, default=0)
|
p_read.add_argument('--min-rating', type=int, default=0)
|
||||||
@ -834,7 +885,11 @@ def cliInterface():
|
|||||||
|
|
||||||
if args.cmd=="recommend":
|
if args.cmd=="recommend":
|
||||||
if args.tag_based:
|
if args.tag_based:
|
||||||
|
if args.recommender_based:
|
||||||
|
raise Exception('tag-based and recommender-based can not be be combined')
|
||||||
recommendNBooksTagBased(G, mu, std, args.n, not args.keep_top_lists)
|
recommendNBooksTagBased(G, mu, std, args.n, not args.keep_top_lists)
|
||||||
|
elif args.recommender_based:
|
||||||
|
recommendNBooksRecommenderBased(G, mu, std, args.n, not args.keep_top_lists, not args.keep_useless_recommenders)
|
||||||
else:
|
else:
|
||||||
recommendNBooks(G, mu, std, args.n, not args.keep_top_lists, not args.keep_useless_recommenders)
|
recommendNBooks(G, mu, std, args.n, not args.keep_top_lists, not args.keep_useless_recommenders)
|
||||||
elif args.cmd=="read":
|
elif args.cmd=="read":
|
||||||
|
Loading…
Reference in New Issue
Block a user