diff --git a/caliGraph.py b/caliGraph.py index b585cce..f968712 100755 --- a/caliGraph.py +++ b/caliGraph.py @@ -7,8 +7,6 @@ import copy import random import requests -from collections import defaultdict - import numpy as np import pandas as pd from scipy.stats import norm @@ -406,9 +404,9 @@ def scoreOpinions(G, globMu, globStd): node['score'] = None def scoreUnread(G, globMu, globStd): - neuralBins = defaultdict(list) - feedbacks = [globMu-globStd, globMu+globStd] for n in list(G.nodes): + feedbacks = [globMu] + ws = [['mu']] node = G.nodes[n] if node['t'] == 'book': if node['rating'] == None: @@ -416,41 +414,42 @@ def scoreUnread(G, globMu, globStd): for adj in adjacens: adjNode = G.nodes[adj] if 'score' in adjNode and adjNode['score'] != None: - w = adjNode['t'] + w = [adjNode['t'], G[n][adj]['weight'] if 'weight' in G[n][adj] else 1] for fb in adjNode['feedbacks']: - neuralBins[w].append(fb) feedbacks.append(fb) - node['mean'], node['std'] = norm.fit(feedbacks) - node['median'] = np.percentile(feedbacks, [50], method='linear')[0] - node['se'] = globStd / math.sqrt(len(feedbacks)) - neuralBins['mean'] = [node['mean']] - neuralBins['sigma'] = [node['std']] - neuralBins['median'] = [node['median']] - neuralBins['se'] = [node['se']] - neuralBins['pagerank'] = [node['pagerank_score']] - if 'tgb_rank' in node: - neuralBins['tgbrank'] = [10/math.ln10(10+node['tgb_rank'])] - neuralBins['bias'] = [globMu] - score = 0 - nb = dict(neuralBins) - act = {} - for b in nb: - act[b] = sum(nb[b])/len(nb[b]) - score += act[b] * getWeightForType(b) - score /= sum([abs(getWeightForType(b)) for b in nb]) - node['score'] = math.tanh(score/10)*10 - node['_act'] = act + ws.append(w) + if len(feedbacks): + node['mean'], node['std'] = norm.fit(feedbacks) + node['median'] = np.percentile(feedbacks, [50], method='linear')[0] + node['se'] = globStd / math.sqrt(len(feedbacks)) + feedbacks.append(node['pagerank_score']) + ws.append(['pagerank']) + #feedbacks.append(10/math.ln10(10+node['tgb_rank']) if 'tgb_rank' in node else 0) + #ws.append(['tgb_rank']) + feedbacks.append(node['std']) + ws.append(['sigma']) + #feedbacks.append(node['median']) + #ws.append(['median']) + #feedbacks.append(node['se']) + #ws.append(['se']) + feedbacks.append(globMu) + ws.append(['bias']) + node['score'] = sum([fb*getWeightForType(w[0], w[1] if len(w)>1 else 1) for fb, w in zip(feedbacks, ws)])/sum([getWeightForType(w[0], w[1] if len(w)>1 else 1) for w in ws]) + node['_act'] = feedbacks + node['_wgh'] = ws + else: + node['score'] = globMu + errorFac*globStd + len(feedbacks)*0.0000000001 if 'series' in node: if node['series_index'] == 1.0: node['score'] += 0.000000001 -def getWeightForType(nodeType): +def getWeightForType(nodeType, edgeWeight=1): global weights - if nodeType not in weights: - weights[nodeType] = 0.1 - saveWeights(weights) - print('[i] neuralWeights-Vector extended with >'+nodeType+'<') - return weights[nodeType] + w = weights[nodeType] + if nodeType == 'topList': + return edgeWeight*w + else: + return w def printBestList(G, t='book', num=-1): bestlist = [] @@ -1199,7 +1198,7 @@ def findNewBooks(G, books, mu, num=-1, minRecSco=5): # while batchSize is implemented, we only get a good gonvergence when we disable it (batchSize=-1) # but might be necessary to enable later for a larger libary for better training performance... # maybe try again for 128 books? -def evaluateFitness(books, batchSize=16, debugPrint=False): +def evaluateFitness(books, batchSize=-1, debugPrint=False): global weights G = buildBookGraph(books) graphAddAuthors(G, books) @@ -1214,13 +1213,12 @@ def evaluateFitness(books, batchSize=16, debugPrint=False): linSepLoss = [] errSq = [] gradient = {} - for w in weights: - gradient[w] = 0 + for wt in weights: + gradient[wt] = 0 mu, sigma = genScores(G, books) - batch = random.sample(ratedBooks, batchSize) if batchSize!=-1 and len(ratedBooks) > batchSize else ratedBooks for b in G.nodes: - if b in ratedBooks: - node = G.nodes[b] + batch = random.sample(ratedBooks, batchSize) if batchSize!=-1 and len(ratedBooks) > batchSize else ratedBooks + if b in batch: rating = G.nodes[b]['rating'] G.nodes[b]['rating'] = None _, _ = genScores(G, books, calcPagerank=False) @@ -1229,20 +1227,17 @@ def evaluateFitness(books, batchSize=16, debugPrint=False): else: errSq.append((rating - G.nodes[b]['score'])**2) G.nodes[b]['rating'] = rating - if b in batch: - for wt in weights: - scoreB = 0 - for w in node['_act']: - scoreB += node['_act'][w] * (getWeightForType(w) + (0.001 if wt==w else 0)) - scoreB /= sum([abs(getWeightForType(w)) for w in node['_act']]) - scoreB = math.tanh(scoreB/10)*10 - gradient[wt] += ((rating - G.nodes[b]['score'])**2 - (rating - scoreB)**2)*1000 + for wt in weights: + scoreB = sum([a*(1.001 if wt==w[0] else 1)*weights[w[0]]*(w[1] if len(w)>1 else 1) for a,w in zip(G.nodes[b]['_act'], G.nodes[b]['_wgh'])])/sum([(1.001 if wt==w[0] else 1)*weights[w[0]]*(w[1] if len(w)>1 else 1) for w in G.nodes[b]['_wgh']]) + gradient[wt] += ((rating - G.nodes[b]['score'])**2 - (rating - scoreB)**2)*1000 regressionLoss = sum([max(0,abs(w)-1)**2 for w in weights.values()]) # no punishment if w within -1 and 1 for wt in weights: if abs(weights[wt]) > 1.0: - gradient[wt] -= weights[wt]*3 + gradient[wt] -= weights[wt]*10 + else: + gradient[wt] -= weights[wt]*1 for g in gradient: - gradient[g] /= len(batch) + gradient[g] /= len(errSq) if debugPrint: print(sum(errSq)/len(errSq), 0.001*regressionLoss) fit = sum(errSq)/len(errSq) + 0.001*regressionLoss @@ -1258,7 +1253,7 @@ def train(initGamma, full=True): books = loadBooksFromDB() bestWeights = copy.copy(weights) mse, gradient = evaluateFitness(books) - delta = math.sqrt(sum(gradient[g]**2 for g in gradient)/len(gradient)) + delta = sum(gradient[g]**2 for g in gradient) best_mse = mse stagLen = 0 goal = 1.0e-4 @@ -1271,10 +1266,7 @@ def train(initGamma, full=True): print({'mse': mse, 'gamma': gamma, 'delta': delta}) delta = sum(gradient[g]**2 for g in gradient) for wt in weights: - if wt in gradient: - weights[wt] += gamma*gradient[wt]/math.sqrt(delta) - #else: - # del weights[wt] + weights[wt] += gamma*gradient[wt]/math.sqrt(delta) mse, gradient = evaluateFitness(books) if mse < last_mse: gamma = gamma*1.25