Revert "implemented neuralBins (performance is bad...)"

This reverts commit bd53a83058.
This commit is contained in:
Dominik Moritz Roth 2022-02-24 20:19:00 +01:00
parent 53a7b07c06
commit 2642423289

View File

@ -7,8 +7,6 @@ import copy
import random import random
import requests import requests
from collections import defaultdict
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from scipy.stats import norm from scipy.stats import norm
@ -406,9 +404,9 @@ def scoreOpinions(G, globMu, globStd):
node['score'] = None node['score'] = None
def scoreUnread(G, globMu, globStd): def scoreUnread(G, globMu, globStd):
neuralBins = defaultdict(list)
feedbacks = [globMu-globStd, globMu+globStd]
for n in list(G.nodes): for n in list(G.nodes):
feedbacks = [globMu]
ws = [['mu']]
node = G.nodes[n] node = G.nodes[n]
if node['t'] == 'book': if node['t'] == 'book':
if node['rating'] == None: if node['rating'] == None:
@ -416,41 +414,42 @@ def scoreUnread(G, globMu, globStd):
for adj in adjacens: for adj in adjacens:
adjNode = G.nodes[adj] adjNode = G.nodes[adj]
if 'score' in adjNode and adjNode['score'] != None: if 'score' in adjNode and adjNode['score'] != None:
w = adjNode['t'] w = [adjNode['t'], G[n][adj]['weight'] if 'weight' in G[n][adj] else 1]
for fb in adjNode['feedbacks']: for fb in adjNode['feedbacks']:
neuralBins[w].append(fb)
feedbacks.append(fb) feedbacks.append(fb)
node['mean'], node['std'] = norm.fit(feedbacks) ws.append(w)
node['median'] = np.percentile(feedbacks, [50], method='linear')[0] if len(feedbacks):
node['se'] = globStd / math.sqrt(len(feedbacks)) node['mean'], node['std'] = norm.fit(feedbacks)
neuralBins['mean'] = [node['mean']] node['median'] = np.percentile(feedbacks, [50], method='linear')[0]
neuralBins['sigma'] = [node['std']] node['se'] = globStd / math.sqrt(len(feedbacks))
neuralBins['median'] = [node['median']] feedbacks.append(node['pagerank_score'])
neuralBins['se'] = [node['se']] ws.append(['pagerank'])
neuralBins['pagerank'] = [node['pagerank_score']] #feedbacks.append(10/math.ln10(10+node['tgb_rank']) if 'tgb_rank' in node else 0)
if 'tgb_rank' in node: #ws.append(['tgb_rank'])
neuralBins['tgbrank'] = [10/math.ln10(10+node['tgb_rank'])] feedbacks.append(node['std'])
neuralBins['bias'] = [globMu] ws.append(['sigma'])
score = 0 #feedbacks.append(node['median'])
nb = dict(neuralBins) #ws.append(['median'])
act = {} #feedbacks.append(node['se'])
for b in nb: #ws.append(['se'])
act[b] = sum(nb[b])/len(nb[b]) feedbacks.append(globMu)
score += act[b] * getWeightForType(b) ws.append(['bias'])
score /= sum([abs(getWeightForType(b)) for b in nb]) node['score'] = sum([fb*getWeightForType(w[0], w[1] if len(w)>1 else 1) for fb, w in zip(feedbacks, ws)])/sum([getWeightForType(w[0], w[1] if len(w)>1 else 1) for w in ws])
node['score'] = math.tanh(score/10)*10 node['_act'] = feedbacks
node['_act'] = act node['_wgh'] = ws
else:
node['score'] = globMu + errorFac*globStd + len(feedbacks)*0.0000000001
if 'series' in node: if 'series' in node:
if node['series_index'] == 1.0: if node['series_index'] == 1.0:
node['score'] += 0.000000001 node['score'] += 0.000000001
def getWeightForType(nodeType): def getWeightForType(nodeType, edgeWeight=1):
global weights global weights
if nodeType not in weights: w = weights[nodeType]
weights[nodeType] = 0.1 if nodeType == 'topList':
saveWeights(weights) return edgeWeight*w
print('[i] neuralWeights-Vector extended with >'+nodeType+'<') else:
return weights[nodeType] return w
def printBestList(G, t='book', num=-1): def printBestList(G, t='book', num=-1):
bestlist = [] bestlist = []
@ -1199,7 +1198,7 @@ def findNewBooks(G, books, mu, num=-1, minRecSco=5):
# while batchSize is implemented, we only get a good gonvergence when we disable it (batchSize=-1) # while batchSize is implemented, we only get a good gonvergence when we disable it (batchSize=-1)
# but might be necessary to enable later for a larger libary for better training performance... # but might be necessary to enable later for a larger libary for better training performance...
# maybe try again for 128 books? # maybe try again for 128 books?
def evaluateFitness(books, batchSize=16, debugPrint=False): def evaluateFitness(books, batchSize=-1, debugPrint=False):
global weights global weights
G = buildBookGraph(books) G = buildBookGraph(books)
graphAddAuthors(G, books) graphAddAuthors(G, books)
@ -1214,13 +1213,12 @@ def evaluateFitness(books, batchSize=16, debugPrint=False):
linSepLoss = [] linSepLoss = []
errSq = [] errSq = []
gradient = {} gradient = {}
for w in weights: for wt in weights:
gradient[w] = 0 gradient[wt] = 0
mu, sigma = genScores(G, books) mu, sigma = genScores(G, books)
batch = random.sample(ratedBooks, batchSize) if batchSize!=-1 and len(ratedBooks) > batchSize else ratedBooks
for b in G.nodes: for b in G.nodes:
if b in ratedBooks: batch = random.sample(ratedBooks, batchSize) if batchSize!=-1 and len(ratedBooks) > batchSize else ratedBooks
node = G.nodes[b] if b in batch:
rating = G.nodes[b]['rating'] rating = G.nodes[b]['rating']
G.nodes[b]['rating'] = None G.nodes[b]['rating'] = None
_, _ = genScores(G, books, calcPagerank=False) _, _ = genScores(G, books, calcPagerank=False)
@ -1229,20 +1227,17 @@ def evaluateFitness(books, batchSize=16, debugPrint=False):
else: else:
errSq.append((rating - G.nodes[b]['score'])**2) errSq.append((rating - G.nodes[b]['score'])**2)
G.nodes[b]['rating'] = rating G.nodes[b]['rating'] = rating
if b in batch: for wt in weights:
for wt in weights: scoreB = sum([a*(1.001 if wt==w[0] else 1)*weights[w[0]]*(w[1] if len(w)>1 else 1) for a,w in zip(G.nodes[b]['_act'], G.nodes[b]['_wgh'])])/sum([(1.001 if wt==w[0] else 1)*weights[w[0]]*(w[1] if len(w)>1 else 1) for w in G.nodes[b]['_wgh']])
scoreB = 0 gradient[wt] += ((rating - G.nodes[b]['score'])**2 - (rating - scoreB)**2)*1000
for w in node['_act']:
scoreB += node['_act'][w] * (getWeightForType(w) + (0.001 if wt==w else 0))
scoreB /= sum([abs(getWeightForType(w)) for w in node['_act']])
scoreB = math.tanh(scoreB/10)*10
gradient[wt] += ((rating - G.nodes[b]['score'])**2 - (rating - scoreB)**2)*1000
regressionLoss = sum([max(0,abs(w)-1)**2 for w in weights.values()]) # no punishment if w within -1 and 1 regressionLoss = sum([max(0,abs(w)-1)**2 for w in weights.values()]) # no punishment if w within -1 and 1
for wt in weights: for wt in weights:
if abs(weights[wt]) > 1.0: if abs(weights[wt]) > 1.0:
gradient[wt] -= weights[wt]*3 gradient[wt] -= weights[wt]*10
else:
gradient[wt] -= weights[wt]*1
for g in gradient: for g in gradient:
gradient[g] /= len(batch) gradient[g] /= len(errSq)
if debugPrint: if debugPrint:
print(sum(errSq)/len(errSq), 0.001*regressionLoss) print(sum(errSq)/len(errSq), 0.001*regressionLoss)
fit = sum(errSq)/len(errSq) + 0.001*regressionLoss fit = sum(errSq)/len(errSq) + 0.001*regressionLoss
@ -1258,7 +1253,7 @@ def train(initGamma, full=True):
books = loadBooksFromDB() books = loadBooksFromDB()
bestWeights = copy.copy(weights) bestWeights = copy.copy(weights)
mse, gradient = evaluateFitness(books) mse, gradient = evaluateFitness(books)
delta = math.sqrt(sum(gradient[g]**2 for g in gradient)/len(gradient)) delta = sum(gradient[g]**2 for g in gradient)
best_mse = mse best_mse = mse
stagLen = 0 stagLen = 0
goal = 1.0e-4 goal = 1.0e-4
@ -1271,10 +1266,7 @@ def train(initGamma, full=True):
print({'mse': mse, 'gamma': gamma, 'delta': delta}) print({'mse': mse, 'gamma': gamma, 'delta': delta})
delta = sum(gradient[g]**2 for g in gradient) delta = sum(gradient[g]**2 for g in gradient)
for wt in weights: for wt in weights:
if wt in gradient: weights[wt] += gamma*gradient[wt]/math.sqrt(delta)
weights[wt] += gamma*gradient[wt]/math.sqrt(delta)
#else:
# del weights[wt]
mse, gradient = evaluateFitness(books) mse, gradient = evaluateFitness(books)
if mse < last_mse: if mse < last_mse:
gamma = gamma*1.25 gamma = gamma*1.25