implemented neuralBins (performance is bad...)

This commit is contained in:
Dominik Moritz Roth 2022-02-22 10:37:16 +01:00
parent 880cb6ba7e
commit bd53a83058

View File

@ -7,6 +7,8 @@ import copy
import random import random
import requests import requests
from collections import defaultdict
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from scipy.stats import norm from scipy.stats import norm
@ -404,9 +406,9 @@ def scoreOpinions(G, globMu, globStd):
node['score'] = None node['score'] = None
def scoreUnread(G, globMu, globStd): def scoreUnread(G, globMu, globStd):
neuralBins = defaultdict(list)
feedbacks = [globMu-globStd, globMu+globStd]
for n in list(G.nodes): for n in list(G.nodes):
feedbacks = [globMu]
ws = [['mu']]
node = G.nodes[n] node = G.nodes[n]
if node['t'] == 'book': if node['t'] == 'book':
if node['rating'] == None: if node['rating'] == None:
@ -414,42 +416,41 @@ def scoreUnread(G, globMu, globStd):
for adj in adjacens: for adj in adjacens:
adjNode = G.nodes[adj] adjNode = G.nodes[adj]
if 'score' in adjNode and adjNode['score'] != None: if 'score' in adjNode and adjNode['score'] != None:
w = [adjNode['t'], G[n][adj]['weight'] if 'weight' in G[n][adj] else 1] w = adjNode['t']
for fb in adjNode['feedbacks']: for fb in adjNode['feedbacks']:
neuralBins[w].append(fb)
feedbacks.append(fb) feedbacks.append(fb)
ws.append(w) node['mean'], node['std'] = norm.fit(feedbacks)
if len(feedbacks): node['median'] = np.percentile(feedbacks, [50], method='linear')[0]
node['mean'], node['std'] = norm.fit(feedbacks) node['se'] = globStd / math.sqrt(len(feedbacks))
node['median'] = np.percentile(feedbacks, [50], method='linear')[0] neuralBins['mean'] = [node['mean']]
node['se'] = globStd / math.sqrt(len(feedbacks)) neuralBins['sigma'] = [node['std']]
feedbacks.append(node['pagerank_score']) neuralBins['median'] = [node['median']]
ws.append(['pagerank']) neuralBins['se'] = [node['se']]
#feedbacks.append(10/math.ln10(10+node['tgb_rank']) if 'tgb_rank' in node else 0) neuralBins['pagerank'] = [node['pagerank_score']]
#ws.append(['tgb_rank']) if 'tgb_rank' in node:
feedbacks.append(node['std']) neuralBins['tgbrank'] = [10/math.ln10(10+node['tgb_rank'])]
ws.append(['sigma']) neuralBins['bias'] = [globMu]
#feedbacks.append(node['median']) score = 0
#ws.append(['median']) nb = dict(neuralBins)
#feedbacks.append(node['se']) act = {}
#ws.append(['se']) for b in nb:
feedbacks.append(globMu) act[b] = sum(nb[b])/len(nb[b])
ws.append(['bias']) score += act[b] * getWeightForType(b)
node['score'] = sum([fb*getWeightForType(w[0], w[1] if len(w)>1 else 1) for fb, w in zip(feedbacks, ws)])/sum([getWeightForType(w[0], w[1] if len(w)>1 else 1) for w in ws]) score /= sum([abs(getWeightForType(b)) for b in nb])
node['_act'] = feedbacks node['score'] = math.tanh(score/10)*10
node['_wgh'] = ws node['_act'] = act
else:
node['score'] = globMu + errorFac*globStd + len(feedbacks)*0.0000000001
if 'series' in node: if 'series' in node:
if node['series_index'] == 1.0: if node['series_index'] == 1.0:
node['score'] += 0.000000001 node['score'] += 0.000000001
def getWeightForType(nodeType, edgeWeight=1): def getWeightForType(nodeType):
global weights global weights
w = weights[nodeType] if nodeType not in weights:
if nodeType == 'topList': weights[nodeType] = 0.1
return edgeWeight*w saveWeights(weights)
else: print('[i] neuralWeights-Vector extended with >'+nodeType+'<')
return w return weights[nodeType]
def printBestList(G, t='book', num=-1): def printBestList(G, t='book', num=-1):
bestlist = [] bestlist = []
@ -1198,7 +1199,7 @@ def findNewBooks(G, books, mu, num=-1, minRecSco=5):
# while batchSize is implemented, we only get a good gonvergence when we disable it (batchSize=-1) # while batchSize is implemented, we only get a good gonvergence when we disable it (batchSize=-1)
# but might be necessary to enable later for a larger libary for better training performance... # but might be necessary to enable later for a larger libary for better training performance...
# maybe try again for 128 books? # maybe try again for 128 books?
def evaluateFitness(books, batchSize=-1, debugPrint=False): def evaluateFitness(books, batchSize=16, debugPrint=False):
global weights global weights
G = buildBookGraph(books) G = buildBookGraph(books)
graphAddAuthors(G, books) graphAddAuthors(G, books)
@ -1213,12 +1214,13 @@ def evaluateFitness(books, batchSize=-1, debugPrint=False):
linSepLoss = [] linSepLoss = []
errSq = [] errSq = []
gradient = {} gradient = {}
for wt in weights: for w in weights:
gradient[wt] = 0 gradient[w] = 0
mu, sigma = genScores(G, books) mu, sigma = genScores(G, books)
batch = random.sample(ratedBooks, batchSize) if batchSize!=-1 and len(ratedBooks) > batchSize else ratedBooks
for b in G.nodes: for b in G.nodes:
batch = random.sample(ratedBooks, batchSize) if batchSize!=-1 and len(ratedBooks) > batchSize else ratedBooks if b in ratedBooks:
if b in batch: node = G.nodes[b]
rating = G.nodes[b]['rating'] rating = G.nodes[b]['rating']
G.nodes[b]['rating'] = None G.nodes[b]['rating'] = None
_, _ = genScores(G, books, calcPagerank=False) _, _ = genScores(G, books, calcPagerank=False)
@ -1227,17 +1229,20 @@ def evaluateFitness(books, batchSize=-1, debugPrint=False):
else: else:
errSq.append((rating - G.nodes[b]['score'])**2) errSq.append((rating - G.nodes[b]['score'])**2)
G.nodes[b]['rating'] = rating G.nodes[b]['rating'] = rating
for wt in weights: if b in batch:
scoreB = sum([a*(1.001 if wt==w[0] else 1)*weights[w[0]]*(w[1] if len(w)>1 else 1) for a,w in zip(G.nodes[b]['_act'], G.nodes[b]['_wgh'])])/sum([(1.001 if wt==w[0] else 1)*weights[w[0]]*(w[1] if len(w)>1 else 1) for w in G.nodes[b]['_wgh']]) for wt in weights:
gradient[wt] += ((rating - G.nodes[b]['score'])**2 - (rating - scoreB)**2)*1000 scoreB = 0
for w in node['_act']:
scoreB += node['_act'][w] * (getWeightForType(w) + (0.001 if wt==w else 0))
scoreB /= sum([abs(getWeightForType(w)) for w in node['_act']])
scoreB = math.tanh(scoreB/10)*10
gradient[wt] += ((rating - G.nodes[b]['score'])**2 - (rating - scoreB)**2)*1000
regressionLoss = sum([max(0,abs(w)-1)**2 for w in weights.values()]) # no punishment if w within -1 and 1 regressionLoss = sum([max(0,abs(w)-1)**2 for w in weights.values()]) # no punishment if w within -1 and 1
for wt in weights: for wt in weights:
if abs(weights[wt]) > 1.0: if abs(weights[wt]) > 1.0:
gradient[wt] -= weights[wt]*10 gradient[wt] -= weights[wt]*3
else:
gradient[wt] -= weights[wt]*1
for g in gradient: for g in gradient:
gradient[g] /= len(errSq) gradient[g] /= len(batch)
if debugPrint: if debugPrint:
print(sum(errSq)/len(errSq), 0.001*regressionLoss) print(sum(errSq)/len(errSq), 0.001*regressionLoss)
fit = sum(errSq)/len(errSq) + 0.001*regressionLoss fit = sum(errSq)/len(errSq) + 0.001*regressionLoss
@ -1253,7 +1258,7 @@ def train(initGamma, full=True):
books = loadBooksFromDB() books = loadBooksFromDB()
bestWeights = copy.copy(weights) bestWeights = copy.copy(weights)
mse, gradient = evaluateFitness(books) mse, gradient = evaluateFitness(books)
delta = sum(gradient[g]**2 for g in gradient) delta = math.sqrt(sum(gradient[g]**2 for g in gradient)/len(gradient))
best_mse = mse best_mse = mse
stagLen = 0 stagLen = 0
goal = 1.0e-4 goal = 1.0e-4
@ -1266,7 +1271,10 @@ def train(initGamma, full=True):
print({'mse': mse, 'gamma': gamma, 'delta': delta}) print({'mse': mse, 'gamma': gamma, 'delta': delta})
delta = sum(gradient[g]**2 for g in gradient) delta = sum(gradient[g]**2 for g in gradient)
for wt in weights: for wt in weights:
weights[wt] += gamma*gradient[wt]/math.sqrt(delta) if wt in gradient:
weights[wt] += gamma*gradient[wt]/math.sqrt(delta)
#else:
# del weights[wt]
mse, gradient = evaluateFitness(books) mse, gradient = evaluateFitness(books)
if mse < last_mse: if mse < last_mse:
gamma = gamma*1.25 gamma = gamma*1.25