diff --git a/caliGraph.py b/caliGraph.py index 2dc974a..703874d 100755 --- a/caliGraph.py +++ b/caliGraph.py @@ -7,6 +7,8 @@ import copy import random import requests +from collections import defaultdict + import numpy as np import pandas as pd from scipy.stats import norm @@ -404,9 +406,9 @@ def scoreOpinions(G, globMu, globStd): node['score'] = None def scoreUnread(G, globMu, globStd): + neuralBins = defaultdict(list) + feedbacks = [globMu-globStd, globMu+globStd] for n in list(G.nodes): - feedbacks = [globMu] - ws = [['mu']] node = G.nodes[n] if node['t'] == 'book': if node['rating'] == None: @@ -414,42 +416,41 @@ def scoreUnread(G, globMu, globStd): for adj in adjacens: adjNode = G.nodes[adj] if 'score' in adjNode and adjNode['score'] != None: - w = [adjNode['t'], G[n][adj]['weight'] if 'weight' in G[n][adj] else 1] + w = adjNode['t'] for fb in adjNode['feedbacks']: + neuralBins[w].append(fb) feedbacks.append(fb) - ws.append(w) - if len(feedbacks): - node['mean'], node['std'] = norm.fit(feedbacks) - node['median'] = np.percentile(feedbacks, [50], method='linear')[0] - node['se'] = globStd / math.sqrt(len(feedbacks)) - feedbacks.append(node['pagerank_score']) - ws.append(['pagerank']) - #feedbacks.append(10/math.ln10(10+node['tgb_rank']) if 'tgb_rank' in node else 0) - #ws.append(['tgb_rank']) - feedbacks.append(node['std']) - ws.append(['sigma']) - #feedbacks.append(node['median']) - #ws.append(['median']) - #feedbacks.append(node['se']) - #ws.append(['se']) - feedbacks.append(globMu) - ws.append(['bias']) - node['score'] = sum([fb*getWeightForType(w[0], w[1] if len(w)>1 else 1) for fb, w in zip(feedbacks, ws)])/sum([getWeightForType(w[0], w[1] if len(w)>1 else 1) for w in ws]) - node['_act'] = feedbacks - node['_wgh'] = ws - else: - node['score'] = globMu + errorFac*globStd + len(feedbacks)*0.0000000001 + node['mean'], node['std'] = norm.fit(feedbacks) + node['median'] = np.percentile(feedbacks, [50], method='linear')[0] + node['se'] = globStd / math.sqrt(len(feedbacks)) + neuralBins['mean'] = [node['mean']] + neuralBins['sigma'] = [node['std']] + neuralBins['median'] = [node['median']] + neuralBins['se'] = [node['se']] + neuralBins['pagerank'] = [node['pagerank_score']] + if 'tgb_rank' in node: + neuralBins['tgbrank'] = [10/math.ln10(10+node['tgb_rank'])] + neuralBins['bias'] = [globMu] + score = 0 + nb = dict(neuralBins) + act = {} + for b in nb: + act[b] = sum(nb[b])/len(nb[b]) + score += act[b] * getWeightForType(b) + score /= sum([abs(getWeightForType(b)) for b in nb]) + node['score'] = math.tanh(score/10)*10 + node['_act'] = act if 'series' in node: if node['series_index'] == 1.0: node['score'] += 0.000000001 -def getWeightForType(nodeType, edgeWeight=1): +def getWeightForType(nodeType): global weights - w = weights[nodeType] - if nodeType == 'topList': - return edgeWeight*w - else: - return w + if nodeType not in weights: + weights[nodeType] = 0.1 + saveWeights(weights) + print('[i] neuralWeights-Vector extended with >'+nodeType+'<') + return weights[nodeType] def printBestList(G, t='book', num=-1): bestlist = [] @@ -1198,7 +1199,7 @@ def findNewBooks(G, books, mu, num=-1, minRecSco=5): # while batchSize is implemented, we only get a good gonvergence when we disable it (batchSize=-1) # but might be necessary to enable later for a larger libary for better training performance... # maybe try again for 128 books? -def evaluateFitness(books, batchSize=-1, debugPrint=False): +def evaluateFitness(books, batchSize=16, debugPrint=False): global weights G = buildBookGraph(books) graphAddAuthors(G, books) @@ -1213,12 +1214,13 @@ def evaluateFitness(books, batchSize=-1, debugPrint=False): linSepLoss = [] errSq = [] gradient = {} - for wt in weights: - gradient[wt] = 0 + for w in weights: + gradient[w] = 0 mu, sigma = genScores(G, books) + batch = random.sample(ratedBooks, batchSize) if batchSize!=-1 and len(ratedBooks) > batchSize else ratedBooks for b in G.nodes: - batch = random.sample(ratedBooks, batchSize) if batchSize!=-1 and len(ratedBooks) > batchSize else ratedBooks - if b in batch: + if b in ratedBooks: + node = G.nodes[b] rating = G.nodes[b]['rating'] G.nodes[b]['rating'] = None _, _ = genScores(G, books, calcPagerank=False) @@ -1227,17 +1229,20 @@ def evaluateFitness(books, batchSize=-1, debugPrint=False): else: errSq.append((rating - G.nodes[b]['score'])**2) G.nodes[b]['rating'] = rating - for wt in weights: - scoreB = sum([a*(1.001 if wt==w[0] else 1)*weights[w[0]]*(w[1] if len(w)>1 else 1) for a,w in zip(G.nodes[b]['_act'], G.nodes[b]['_wgh'])])/sum([(1.001 if wt==w[0] else 1)*weights[w[0]]*(w[1] if len(w)>1 else 1) for w in G.nodes[b]['_wgh']]) - gradient[wt] += ((rating - G.nodes[b]['score'])**2 - (rating - scoreB)**2)*1000 + if b in batch: + for wt in weights: + scoreB = 0 + for w in node['_act']: + scoreB += node['_act'][w] * (getWeightForType(w) + (0.001 if wt==w else 0)) + scoreB /= sum([abs(getWeightForType(w)) for w in node['_act']]) + scoreB = math.tanh(scoreB/10)*10 + gradient[wt] += ((rating - G.nodes[b]['score'])**2 - (rating - scoreB)**2)*1000 regressionLoss = sum([max(0,abs(w)-1)**2 for w in weights.values()]) # no punishment if w within -1 and 1 for wt in weights: if abs(weights[wt]) > 1.0: - gradient[wt] -= weights[wt]*10 - else: - gradient[wt] -= weights[wt]*1 + gradient[wt] -= weights[wt]*3 for g in gradient: - gradient[g] /= len(errSq) + gradient[g] /= len(batch) if debugPrint: print(sum(errSq)/len(errSq), 0.001*regressionLoss) fit = sum(errSq)/len(errSq) + 0.001*regressionLoss @@ -1253,7 +1258,7 @@ def train(initGamma, full=True): books = loadBooksFromDB() bestWeights = copy.copy(weights) mse, gradient = evaluateFitness(books) - delta = sum(gradient[g]**2 for g in gradient) + delta = math.sqrt(sum(gradient[g]**2 for g in gradient)/len(gradient)) best_mse = mse stagLen = 0 goal = 1.0e-4 @@ -1266,7 +1271,10 @@ def train(initGamma, full=True): print({'mse': mse, 'gamma': gamma, 'delta': delta}) delta = sum(gradient[g]**2 for g in gradient) for wt in weights: - weights[wt] += gamma*gradient[wt]/math.sqrt(delta) + if wt in gradient: + weights[wt] += gamma*gradient[wt]/math.sqrt(delta) + #else: + # del weights[wt] mse, gradient = evaluateFitness(books) if mse < last_mse: gamma = gamma*1.25