implemented neuralBins (performance is bad...)
This commit is contained in:
parent
880cb6ba7e
commit
bd53a83058
98
caliGraph.py
98
caliGraph.py
@ -7,6 +7,8 @@ import copy
|
||||
import random
|
||||
import requests
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from scipy.stats import norm
|
||||
@ -404,9 +406,9 @@ def scoreOpinions(G, globMu, globStd):
|
||||
node['score'] = None
|
||||
|
||||
def scoreUnread(G, globMu, globStd):
|
||||
neuralBins = defaultdict(list)
|
||||
feedbacks = [globMu-globStd, globMu+globStd]
|
||||
for n in list(G.nodes):
|
||||
feedbacks = [globMu]
|
||||
ws = [['mu']]
|
||||
node = G.nodes[n]
|
||||
if node['t'] == 'book':
|
||||
if node['rating'] == None:
|
||||
@ -414,42 +416,41 @@ def scoreUnread(G, globMu, globStd):
|
||||
for adj in adjacens:
|
||||
adjNode = G.nodes[adj]
|
||||
if 'score' in adjNode and adjNode['score'] != None:
|
||||
w = [adjNode['t'], G[n][adj]['weight'] if 'weight' in G[n][adj] else 1]
|
||||
w = adjNode['t']
|
||||
for fb in adjNode['feedbacks']:
|
||||
neuralBins[w].append(fb)
|
||||
feedbacks.append(fb)
|
||||
ws.append(w)
|
||||
if len(feedbacks):
|
||||
node['mean'], node['std'] = norm.fit(feedbacks)
|
||||
node['median'] = np.percentile(feedbacks, [50], method='linear')[0]
|
||||
node['se'] = globStd / math.sqrt(len(feedbacks))
|
||||
feedbacks.append(node['pagerank_score'])
|
||||
ws.append(['pagerank'])
|
||||
#feedbacks.append(10/math.ln10(10+node['tgb_rank']) if 'tgb_rank' in node else 0)
|
||||
#ws.append(['tgb_rank'])
|
||||
feedbacks.append(node['std'])
|
||||
ws.append(['sigma'])
|
||||
#feedbacks.append(node['median'])
|
||||
#ws.append(['median'])
|
||||
#feedbacks.append(node['se'])
|
||||
#ws.append(['se'])
|
||||
feedbacks.append(globMu)
|
||||
ws.append(['bias'])
|
||||
node['score'] = sum([fb*getWeightForType(w[0], w[1] if len(w)>1 else 1) for fb, w in zip(feedbacks, ws)])/sum([getWeightForType(w[0], w[1] if len(w)>1 else 1) for w in ws])
|
||||
node['_act'] = feedbacks
|
||||
node['_wgh'] = ws
|
||||
else:
|
||||
node['score'] = globMu + errorFac*globStd + len(feedbacks)*0.0000000001
|
||||
node['mean'], node['std'] = norm.fit(feedbacks)
|
||||
node['median'] = np.percentile(feedbacks, [50], method='linear')[0]
|
||||
node['se'] = globStd / math.sqrt(len(feedbacks))
|
||||
neuralBins['mean'] = [node['mean']]
|
||||
neuralBins['sigma'] = [node['std']]
|
||||
neuralBins['median'] = [node['median']]
|
||||
neuralBins['se'] = [node['se']]
|
||||
neuralBins['pagerank'] = [node['pagerank_score']]
|
||||
if 'tgb_rank' in node:
|
||||
neuralBins['tgbrank'] = [10/math.ln10(10+node['tgb_rank'])]
|
||||
neuralBins['bias'] = [globMu]
|
||||
score = 0
|
||||
nb = dict(neuralBins)
|
||||
act = {}
|
||||
for b in nb:
|
||||
act[b] = sum(nb[b])/len(nb[b])
|
||||
score += act[b] * getWeightForType(b)
|
||||
score /= sum([abs(getWeightForType(b)) for b in nb])
|
||||
node['score'] = math.tanh(score/10)*10
|
||||
node['_act'] = act
|
||||
if 'series' in node:
|
||||
if node['series_index'] == 1.0:
|
||||
node['score'] += 0.000000001
|
||||
|
||||
def getWeightForType(nodeType, edgeWeight=1):
|
||||
def getWeightForType(nodeType):
|
||||
global weights
|
||||
w = weights[nodeType]
|
||||
if nodeType == 'topList':
|
||||
return edgeWeight*w
|
||||
else:
|
||||
return w
|
||||
if nodeType not in weights:
|
||||
weights[nodeType] = 0.1
|
||||
saveWeights(weights)
|
||||
print('[i] neuralWeights-Vector extended with >'+nodeType+'<')
|
||||
return weights[nodeType]
|
||||
|
||||
def printBestList(G, t='book', num=-1):
|
||||
bestlist = []
|
||||
@ -1198,7 +1199,7 @@ def findNewBooks(G, books, mu, num=-1, minRecSco=5):
|
||||
# while batchSize is implemented, we only get a good gonvergence when we disable it (batchSize=-1)
|
||||
# but might be necessary to enable later for a larger libary for better training performance...
|
||||
# maybe try again for 128 books?
|
||||
def evaluateFitness(books, batchSize=-1, debugPrint=False):
|
||||
def evaluateFitness(books, batchSize=16, debugPrint=False):
|
||||
global weights
|
||||
G = buildBookGraph(books)
|
||||
graphAddAuthors(G, books)
|
||||
@ -1213,12 +1214,13 @@ def evaluateFitness(books, batchSize=-1, debugPrint=False):
|
||||
linSepLoss = []
|
||||
errSq = []
|
||||
gradient = {}
|
||||
for wt in weights:
|
||||
gradient[wt] = 0
|
||||
for w in weights:
|
||||
gradient[w] = 0
|
||||
mu, sigma = genScores(G, books)
|
||||
batch = random.sample(ratedBooks, batchSize) if batchSize!=-1 and len(ratedBooks) > batchSize else ratedBooks
|
||||
for b in G.nodes:
|
||||
batch = random.sample(ratedBooks, batchSize) if batchSize!=-1 and len(ratedBooks) > batchSize else ratedBooks
|
||||
if b in batch:
|
||||
if b in ratedBooks:
|
||||
node = G.nodes[b]
|
||||
rating = G.nodes[b]['rating']
|
||||
G.nodes[b]['rating'] = None
|
||||
_, _ = genScores(G, books, calcPagerank=False)
|
||||
@ -1227,17 +1229,20 @@ def evaluateFitness(books, batchSize=-1, debugPrint=False):
|
||||
else:
|
||||
errSq.append((rating - G.nodes[b]['score'])**2)
|
||||
G.nodes[b]['rating'] = rating
|
||||
for wt in weights:
|
||||
scoreB = sum([a*(1.001 if wt==w[0] else 1)*weights[w[0]]*(w[1] if len(w)>1 else 1) for a,w in zip(G.nodes[b]['_act'], G.nodes[b]['_wgh'])])/sum([(1.001 if wt==w[0] else 1)*weights[w[0]]*(w[1] if len(w)>1 else 1) for w in G.nodes[b]['_wgh']])
|
||||
gradient[wt] += ((rating - G.nodes[b]['score'])**2 - (rating - scoreB)**2)*1000
|
||||
if b in batch:
|
||||
for wt in weights:
|
||||
scoreB = 0
|
||||
for w in node['_act']:
|
||||
scoreB += node['_act'][w] * (getWeightForType(w) + (0.001 if wt==w else 0))
|
||||
scoreB /= sum([abs(getWeightForType(w)) for w in node['_act']])
|
||||
scoreB = math.tanh(scoreB/10)*10
|
||||
gradient[wt] += ((rating - G.nodes[b]['score'])**2 - (rating - scoreB)**2)*1000
|
||||
regressionLoss = sum([max(0,abs(w)-1)**2 for w in weights.values()]) # no punishment if w within -1 and 1
|
||||
for wt in weights:
|
||||
if abs(weights[wt]) > 1.0:
|
||||
gradient[wt] -= weights[wt]*10
|
||||
else:
|
||||
gradient[wt] -= weights[wt]*1
|
||||
gradient[wt] -= weights[wt]*3
|
||||
for g in gradient:
|
||||
gradient[g] /= len(errSq)
|
||||
gradient[g] /= len(batch)
|
||||
if debugPrint:
|
||||
print(sum(errSq)/len(errSq), 0.001*regressionLoss)
|
||||
fit = sum(errSq)/len(errSq) + 0.001*regressionLoss
|
||||
@ -1253,7 +1258,7 @@ def train(initGamma, full=True):
|
||||
books = loadBooksFromDB()
|
||||
bestWeights = copy.copy(weights)
|
||||
mse, gradient = evaluateFitness(books)
|
||||
delta = sum(gradient[g]**2 for g in gradient)
|
||||
delta = math.sqrt(sum(gradient[g]**2 for g in gradient)/len(gradient))
|
||||
best_mse = mse
|
||||
stagLen = 0
|
||||
goal = 1.0e-4
|
||||
@ -1266,7 +1271,10 @@ def train(initGamma, full=True):
|
||||
print({'mse': mse, 'gamma': gamma, 'delta': delta})
|
||||
delta = sum(gradient[g]**2 for g in gradient)
|
||||
for wt in weights:
|
||||
weights[wt] += gamma*gradient[wt]/math.sqrt(delta)
|
||||
if wt in gradient:
|
||||
weights[wt] += gamma*gradient[wt]/math.sqrt(delta)
|
||||
#else:
|
||||
# del weights[wt]
|
||||
mse, gradient = evaluateFitness(books)
|
||||
if mse < last_mse:
|
||||
gamma = gamma*1.25
|
||||
|
Loading…
Reference in New Issue
Block a user