Switched to a simpler bayesian model for score generation

This commit is contained in:
Dominik Moritz Roth 2021-09-24 14:49:59 +02:00
parent 4fa3a57cc7
commit 0231d97a42

View File

@ -294,7 +294,7 @@ def removeUselessReadBooks(G):
else: # No unrated book in cousins else: # No unrated book in cousins
G.remove_node(n) G.remove_node(n)
def scoreOpinions(G, globMu, globStd, errorFac=0.5): def scoreOpinions(G, globMu, globStd, errorFac=-0.5):
for n in list(G.nodes): for n in list(G.nodes):
node = G.nodes[n] node = G.nodes[n]
feedbacks = [] feedbacks = []
@ -308,7 +308,7 @@ def scoreOpinions(G, globMu, globStd, errorFac=0.5):
node['mean'], node['std'] = norm.fit(feedbacks) node['mean'], node['std'] = norm.fit(feedbacks)
node['se'] = globStd / math.sqrt(len(feedbacks)) node['se'] = globStd / math.sqrt(len(feedbacks))
ratio = len(feedbacks) / len(adjacens) ratio = len(feedbacks) / len(adjacens)
node['score'] = node['mean'] - errorFac * \ node['score'] = node['mean'] + errorFac * \
node['se']*(6/7 + (1-ratio)/7) + 0.01 * \ node['se']*(6/7 + (1-ratio)/7) + 0.01 * \
(node['t'] == 'recommender') \ (node['t'] == 'recommender') \
- 0.5 / len(feedbacks)**2 - 0.5 / len(feedbacks)**2
@ -316,12 +316,10 @@ def scoreOpinions(G, globMu, globStd, errorFac=0.5):
else: else:
node['score'] = None node['score'] = None
def scoreUnread(G, globMu, globStd, errorFac=-0.6):
def scoreUnread(G, globMu, globStd, errorFac=0.6):
for n in list(G.nodes): for n in list(G.nodes):
feedbacks = [] feedbacks = [globMu]
deepFeedbacks = [globMu - globStd*0.5] wheights = [getWheightForType('mu')]
deepLen = 1
node = G.nodes[n] node = G.nodes[n]
if node['t'] == 'book': if node['t'] == 'book':
if node['rating'] == None: if node['rating'] == None:
@ -329,33 +327,27 @@ def scoreUnread(G, globMu, globStd, errorFac=0.6):
for adj in adjacens: for adj in adjacens:
adjNode = G.nodes[adj] adjNode = G.nodes[adj]
if 'score' in adjNode and adjNode['score'] != None: if 'score' in adjNode and adjNode['score'] != None:
if adjNode['t'] == 'tag': w = getWheightForType(adjNode['t'], G[n][adj]['wheight'] if 'wheight' in G[n][adj] else None)
w = int(10/(len(G.adj[adj])))
elif adjNode['t'] == 'topList':
w = int(G[n][adj]['wheight']*5)
else:
w = 10
feedbacks.append(adjNode['score'])
for fb in adjNode['feedbacks']: for fb in adjNode['feedbacks']:
for i in range(w): feedbacks.append(fb)
deepFeedbacks.append(fb) wheights.append(w)
deepLen += w
if len(feedbacks): if len(feedbacks):
node['mean'], node['std'] = norm.fit(deepFeedbacks) node['meanUnweighted'], node['std'] = norm.fit(feedbacks)
node['mean2'], node['std2'] = norm.fit(feedbacks) node['mean'] = sum([fb*w for fb, w in zip(feedbacks, wheights)])/len(feedbacks)
if deepLen: node['se'] = globStd / math.sqrt(len(feedbacks))
node['se'] = globStd / math.sqrt(deepLen) node['score'] = node['mean'] + errorFac*node['se']
# - errorFac*node['se']
node['score'] = (
(node['mean'] - errorFac*node['se'])*3 + node['mean2']*2)/5
else:
node['score'] = globMu - errorFac*globStd
if 'series' in node:
if node['series_index'] == 1.0:
node['score'] += 0.000000001
else: else:
node['score'] = None node['score'] = globMu + errorFac*globStd + len(feedbacks)*0.0000000001
if 'series' in node:
if node['series_index'] == 1.0:
node['score'] += 0.000000001
# TODO: Make this neural and train it
def getWheightForType(nodeType, edgeWheight=None):
if nodeType == 'topList':
return edgeWheight*0.5
else:
return 1.0
def printBestList(G, num=-1): def printBestList(G, num=-1):
bestlist = [] bestlist = []