Compare commits

...

3 Commits

Author SHA1 Message Date
cbb884b377 smashing 'deees bugs 2022-03-07 17:56:59 +01:00
9a02bdc2a8 Allow disabling pagerank 2022-02-22 15:22:36 +01:00
01d41f3a82 Fixed bin accumulation; added perf-test 2022-02-22 15:03:51 +01:00
2 changed files with 36 additions and 12 deletions

2
.gitignore vendored
View File

@ -2,5 +2,5 @@ __pycache__
*.html *.html
.venv .venv
neuralWeights.json neuralWeights.json
neuralWeights.json.bak neuralWeights.json.*
.imgLinkCache.json .imgLinkCache.json

View File

@ -418,8 +418,8 @@ def scoreUnread(G, globMu, globStd):
if 'score' in adjNode and adjNode['score'] != None: if 'score' in adjNode and adjNode['score'] != None:
w = adjNode['t'] w = adjNode['t']
for fb in adjNode['feedbacks']: for fb in adjNode['feedbacks']:
neuralBins[w].append(fb)
feedbacks.append(fb) feedbacks.append(fb)
neuralBins[w].append(adjNode['score'])
node['mean'], node['std'] = norm.fit(feedbacks) node['mean'], node['std'] = norm.fit(feedbacks)
node['median'] = np.percentile(feedbacks, [50], method='linear')[0] node['median'] = np.percentile(feedbacks, [50], method='linear')[0]
node['se'] = globStd / math.sqrt(len(feedbacks)) node['se'] = globStd / math.sqrt(len(feedbacks))
@ -427,15 +427,17 @@ def scoreUnread(G, globMu, globStd):
neuralBins['sigma'] = [node['std']] neuralBins['sigma'] = [node['std']]
neuralBins['median'] = [node['median']] neuralBins['median'] = [node['median']]
neuralBins['se'] = [node['se']] neuralBins['se'] = [node['se']]
neuralBins['pagerank'] = [node['pagerank_score']] if 'pagerank_score' in node:
neuralBins['pagerank'] = [node['pagerank_score']]
if 'tgb_rank' in node: if 'tgb_rank' in node:
neuralBins['tgbrank'] = [10/math.ln10(10+node['tgb_rank'])] neuralBins['tgbrank'] = [10/math.ln10(10+node['tgb_rank'])]
neuralBins['bias'] = [globMu] neuralBins['bias'] = [globMu]
score = 0 score = 0
nb = dict(neuralBins) nb = dict(neuralBins)
act = {} act = {}
jig = {}
for b in nb: for b in nb:
act[b] = sum(nb[b])/len(nb[b]) act[b], jig[b] = norm.fit(nb[b])
score += act[b] * getWeightForType(b) score += act[b] * getWeightForType(b)
score /= sum([abs(getWeightForType(b)) for b in nb]) score /= sum([abs(getWeightForType(b)) for b in nb])
node['score'] = math.tanh(score/10)*10 node['score'] = math.tanh(score/10)*10
@ -1199,7 +1201,7 @@ def findNewBooks(G, books, mu, num=-1, minRecSco=5):
# while batchSize is implemented, we only get a good gonvergence when we disable it (batchSize=-1) # while batchSize is implemented, we only get a good gonvergence when we disable it (batchSize=-1)
# but might be necessary to enable later for a larger libary for better training performance... # but might be necessary to enable later for a larger libary for better training performance...
# maybe try again for 128 books? # maybe try again for 128 books?
def evaluateFitness(books, batchSize=16, debugPrint=False): def evaluateFitness(books, batchSize=16, debugPrint=False, calcPagerank=True):
global weights global weights
G = buildBookGraph(books) G = buildBookGraph(books)
graphAddAuthors(G, books) graphAddAuthors(G, books)
@ -1207,7 +1209,8 @@ def evaluateFitness(books, batchSize=16, debugPrint=False):
graphAddTopLists(G, books) graphAddTopLists(G, books)
graphAddSeries(G, books) graphAddSeries(G, books)
graphAddTags(G, books) graphAddTags(G, books)
runPagerank(G) if calcPagerank:
runPagerank(G)
ratedBooks = [n for n in list(G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None] ratedBooks = [n for n in list(G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None]
boundsLoss = 0 boundsLoss = 0
@ -1216,7 +1219,7 @@ def evaluateFitness(books, batchSize=16, debugPrint=False):
gradient = {} gradient = {}
for w in weights: for w in weights:
gradient[w] = 0 gradient[w] = 0
mu, sigma = genScores(G, books) mu, sigma = genScores(G, books, calcPagerank=runPagerank)
batch = random.sample(ratedBooks, batchSize) if batchSize!=-1 and len(ratedBooks) > batchSize else ratedBooks batch = random.sample(ratedBooks, batchSize) if batchSize!=-1 and len(ratedBooks) > batchSize else ratedBooks
for b in G.nodes: for b in G.nodes:
if b in ratedBooks: if b in ratedBooks:
@ -1248,7 +1251,7 @@ def evaluateFitness(books, batchSize=16, debugPrint=False):
fit = sum(errSq)/len(errSq) + 0.001*regressionLoss fit = sum(errSq)/len(errSq) + 0.001*regressionLoss
return fit, gradient return fit, gradient
def train(initGamma, full=True): def train(initGamma, full=True, noPagerank=False):
global weights global weights
if full: if full:
for wt in weights: for wt in weights:
@ -1257,7 +1260,7 @@ def train(initGamma, full=True):
gamma = initGamma gamma = initGamma
books = loadBooksFromDB() books = loadBooksFromDB()
bestWeights = copy.copy(weights) bestWeights = copy.copy(weights)
mse, gradient = evaluateFitness(books) mse, gradient = evaluateFitness(books, calcPagerank=not noPagerank)
delta = math.sqrt(sum(gradient[g]**2 for g in gradient)/len(gradient)) delta = math.sqrt(sum(gradient[g]**2 for g in gradient)/len(gradient))
best_mse = mse best_mse = mse
stagLen = 0 stagLen = 0
@ -1275,7 +1278,7 @@ def train(initGamma, full=True):
weights[wt] += gamma*gradient[wt]/math.sqrt(delta) weights[wt] += gamma*gradient[wt]/math.sqrt(delta)
#else: #else:
# del weights[wt] # del weights[wt]
mse, gradient = evaluateFitness(books) mse, gradient = evaluateFitness(books, calcPagerank=not noPagerank)
if mse < last_mse: if mse < last_mse:
gamma = gamma*1.25 gamma = gamma*1.25
else: else:
@ -1331,6 +1334,8 @@ def cliInterface():
parser.add_argument('--dark', action="store_true") parser.add_argument('--dark', action="store_true")
parser.add_argument('--v3d', action="store_true") parser.add_argument('--v3d', action="store_true")
parser.add_argument('--imgs', action="store_true") parser.add_argument('--imgs', action="store_true")
parser.add_argument('--perf-test', action="store_true")
parser.add_argument('--no-pagerank', action="store_true")
cmds = parser.add_subparsers(required=True, dest='cmd') cmds = parser.add_subparsers(required=True, dest='cmd')
p_rec = cmds.add_parser('recommend', description="TODO", aliases=['rec']) p_rec = cmds.add_parser('recommend', description="TODO", aliases=['rec'])
@ -1370,14 +1375,33 @@ def cliInterface():
args = parser.parse_args() args = parser.parse_args()
if args.perf_test:
perfTestCLI(args)
else:
mainCLI(args)
def perfTestCLI(args):
import time
from pycallgraph import PyCallGraph
from pycallgraph import Config
from pycallgraph import GlobbingFilter
from pycallgraph.output import GraphvizOutput
config = Config()
config.trace_filter = GlobbingFilter(exclude=[
"pycallgraph.*",
])
with PyCallGraph(output=GraphvizOutput(output_file='perfTests/' + str(int(time.time())) + '.png'), config=config):
mainCLI(args)
def mainCLI(args):
if args.cmd=="train": if args.cmd=="train":
train(args.g, args.full) train(args.g, args.full, args.no_pagerank)
exit() exit()
bestListT = 'book' bestListT = 'book'
G, books = buildFullGraph(darkMode=args.dark) G, books = buildFullGraph(darkMode=args.dark)
mu, std = genScores(G, books) mu, std = genScores(G, books, calcPagerank=not args.no_pagerank)
if not args.keep_whitepapers: if not args.keep_whitepapers:
removeWhitepapers(G) removeWhitepapers(G)