Implemented Neural Net + training
This commit is contained in:
parent
0231d97a42
commit
cb0ad906eb
126
caliGraph.py
126
caliGraph.py
@ -2,6 +2,7 @@
|
||||
import os
|
||||
import json
|
||||
import math
|
||||
import copy
|
||||
import random
|
||||
|
||||
import numpy as np
|
||||
@ -78,7 +79,7 @@ def getAllTags(books):
|
||||
return list(tags)
|
||||
|
||||
|
||||
def getTopListWheight(book, topList):
|
||||
def getTopListWeight(book, topList):
|
||||
minScope = 100000
|
||||
for tag in book['tags']:
|
||||
if tag.find(topList+" Top ") != -1:
|
||||
@ -294,7 +295,24 @@ def removeUselessReadBooks(G):
|
||||
else: # No unrated book in cousins
|
||||
G.remove_node(n)
|
||||
|
||||
def scoreOpinions(G, globMu, globStd, errorFac=-0.5):
|
||||
def removeUselessTags(G):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
if node['t'] == 'tag':
|
||||
for adj in G.adj[n]:
|
||||
foundUnread = True
|
||||
adjNode = G.nodes[adj]
|
||||
if adjNode['t']=='book' and 'score' in adjNode:
|
||||
break
|
||||
else: # No unrated book here
|
||||
foundUnread = False
|
||||
if foundUnread:
|
||||
break
|
||||
else: # No unrated book in cousins
|
||||
G.remove_node(n)
|
||||
|
||||
|
||||
def scoreOpinions(G, globMu, globStd, errorFac=0):
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
feedbacks = []
|
||||
@ -319,7 +337,7 @@ def scoreOpinions(G, globMu, globStd, errorFac=-0.5):
|
||||
def scoreUnread(G, globMu, globStd, errorFac=-0.6):
|
||||
for n in list(G.nodes):
|
||||
feedbacks = [globMu]
|
||||
wheights = [getWheightForType('mu')]
|
||||
weights = [getWeightForType('mu')]
|
||||
node = G.nodes[n]
|
||||
if node['t'] == 'book':
|
||||
if node['rating'] == None:
|
||||
@ -327,13 +345,15 @@ def scoreUnread(G, globMu, globStd, errorFac=-0.6):
|
||||
for adj in adjacens:
|
||||
adjNode = G.nodes[adj]
|
||||
if 'score' in adjNode and adjNode['score'] != None:
|
||||
w = getWheightForType(adjNode['t'], G[n][adj]['wheight'] if 'wheight' in G[n][adj] else None)
|
||||
w = getWeightForType(adjNode['t'], G[n][adj]['weight'] if 'weight' in G[n][adj] else 1)
|
||||
for fb in adjNode['feedbacks']:
|
||||
feedbacks.append(fb)
|
||||
wheights.append(w)
|
||||
weights.append(w)
|
||||
if len(feedbacks):
|
||||
node['meanUnweighted'], node['std'] = norm.fit(feedbacks)
|
||||
node['mean'] = sum([fb*w for fb, w in zip(feedbacks, wheights)])/len(feedbacks)
|
||||
feedbacks.append(node['std'])
|
||||
weights.append(getWeightForType('sigma'))
|
||||
node['mean'] = sum([fb*w for fb, w in zip(feedbacks, weights)])/len(feedbacks)
|
||||
node['se'] = globStd / math.sqrt(len(feedbacks))
|
||||
node['score'] = node['mean'] + errorFac*node['se']
|
||||
else:
|
||||
@ -343,11 +363,13 @@ def scoreUnread(G, globMu, globStd, errorFac=-0.6):
|
||||
node['score'] += 0.000000001
|
||||
|
||||
# TODO: Make this neural and train it
|
||||
def getWheightForType(nodeType, edgeWheight=None):
|
||||
def getWeightForType(nodeType, edgeWeight=1):
|
||||
global weights
|
||||
w = weights[nodeType]
|
||||
if nodeType == 'topList':
|
||||
return edgeWheight*0.5
|
||||
return edgeWeight*w
|
||||
else:
|
||||
return 1.0
|
||||
return w
|
||||
|
||||
def printBestList(G, num=-1):
|
||||
bestlist = []
|
||||
@ -423,7 +445,7 @@ def graphAddTopLists(G, books):
|
||||
G.add_node('t/'+tl, color='yellow', t='topList', label=tl)
|
||||
for book in books:
|
||||
for top in getTopLists(book):
|
||||
G.add_edge('t/'+top, book['id'], wheight=getTopListWheight(
|
||||
G.add_edge('t/'+top, book['id'], weight=getTopListWeight(
|
||||
book, top), color=readColor(book))
|
||||
return G
|
||||
|
||||
@ -520,25 +542,27 @@ def genScores(G, books):
|
||||
|
||||
def recommendNBooks(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True):
|
||||
removeRestOfSeries(G)
|
||||
removeBad(G, mu-std-1.5)
|
||||
removeKeepBest(G, int(n*2) + 5, maxDistForRead=1.5)
|
||||
removeBad(G, mu-std*2-1)
|
||||
removeKeepBest(G, int(n*2) + 5, maxDistForRead=1.5*2)
|
||||
removeEdge(G)
|
||||
removeHighSpanTags(G, 9)
|
||||
removeDangling(G, alsoBooks=False)
|
||||
pruneTags(G, 6)
|
||||
pruneTags(G, 6*2)
|
||||
removeBad(G, mu, groups=['book'])
|
||||
removeUselessReadBooks(G)
|
||||
pruneTags(G, 4.25)
|
||||
pruneTags(G, 4.25*2)
|
||||
pruneRecommenderCons(G, int(n/7)+1)
|
||||
pruneAuthorCons(G, int(n/15))
|
||||
removeUselessTags(G)
|
||||
if removeTopListsB:
|
||||
removeTopLists(G)
|
||||
removeDangling(G, alsoBooks=True)
|
||||
removeKeepBest(G, n, maxDistForRead=0.75)
|
||||
removeKeepBest(G, n, maxDistForRead=0.75*2)
|
||||
removeEdge(G)
|
||||
removeDangling(G, alsoBooks=True)
|
||||
if removeUselessRecommenders:
|
||||
removeUnusedRecommenders(G)
|
||||
removeDangling(G, alsoBooks=True)
|
||||
|
||||
scaleBooksByRating(G)
|
||||
scaleOpinionsByRating(G)
|
||||
@ -650,6 +674,54 @@ def waveFlow(G, node, n, dist, menge, firstEdge=False):
|
||||
if node in bestlist or node in keeplist:
|
||||
waveFlow(G, node, m, dist, menge, firstEdge=firstEdge)
|
||||
|
||||
def evaluateFitness():
|
||||
G, books = buildFullGraph()
|
||||
ratedBooks = [n for n in list(G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None]
|
||||
errSq = []
|
||||
for m in ratedBooks:
|
||||
rating = G.nodes[m]['rating']
|
||||
G.nodes[m]['rating'] = None
|
||||
mu, std = genScores(G, books)
|
||||
errSq.append((rating - G.nodes[m]['score'])**2)
|
||||
G.nodes[m]['rating'] = rating
|
||||
return sum(errSq) / len(errSq)
|
||||
|
||||
def train(gamma = 0.1):
|
||||
global weights
|
||||
bestWeights = copy.copy(weights)
|
||||
best_mse = evaluateFitness()
|
||||
w = list(weights.keys())
|
||||
attr = random.choice(w)
|
||||
delta = gamma * (-0.5 + (0.75 + 0.25*random.random()))
|
||||
|
||||
while True:
|
||||
print({'mse': best_mse, 'w': weights, 'gamma': gamma})
|
||||
weights = copy.copy(bestWeights)
|
||||
weights[attr] += delta
|
||||
mse = evaluateFitness()
|
||||
if mse < best_mse: # got better
|
||||
saveWeights(weights)
|
||||
gamma *= 1.1
|
||||
bestWeights = copy.copy(weights)
|
||||
best_mse = mse
|
||||
delta *= 2
|
||||
if random.random() < 0.10:
|
||||
attr = random.choice(w)
|
||||
else:
|
||||
weights = copy.copy(bestWeights)
|
||||
gamma *= 0.8
|
||||
attr = random.choice(w)
|
||||
delta = gamma * (-0.5 + (0.75 + 0.25*random.random()))
|
||||
|
||||
def saveWeights(weights):
|
||||
with open('neuralWeights.json', 'w') as f:
|
||||
f.write(json.dumps(weights))
|
||||
|
||||
def loadWeights():
|
||||
with open('neuralWeights.json', 'r') as f:
|
||||
weights = json.loads(f.read())
|
||||
return weights
|
||||
|
||||
def cliInterface():
|
||||
import argparse
|
||||
|
||||
@ -677,18 +749,19 @@ def cliInterface():
|
||||
p_show.add_argument('name', type=str)
|
||||
p_show.add_argument('-d', type=float, default=2.7, help='depth of expansion')
|
||||
|
||||
p_train = cmds.add_parser('train', description="TODO", aliases=[])
|
||||
p_train.add_argument('-g', type=float, default=0.1, help='learning rate gamma')
|
||||
|
||||
p_full = cmds.add_parser('full', description="TODO", aliases=[])
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.cmd=="train":
|
||||
train(args.g)
|
||||
exit()
|
||||
|
||||
G, books = buildFullGraph()
|
||||
mu, std = genScores(G, books)
|
||||
if not args.keep_priv:
|
||||
removePriv(G)
|
||||
if args.remove_read:
|
||||
removeRead(G)
|
||||
elif args.remove_unread:
|
||||
removeUnread(G)
|
||||
|
||||
if args.cmd=="recommend":
|
||||
recommendNBooks(G, mu, std, args.n, not args.keep_top_lists, not args.keep_useless_recommenders)
|
||||
@ -701,6 +774,15 @@ def cliInterface():
|
||||
else:
|
||||
raise Exception("Bad")
|
||||
|
||||
if not args.keep_priv:
|
||||
removePriv(G)
|
||||
if args.remove_read:
|
||||
removeRead(G)
|
||||
elif args.remove_unread:
|
||||
removeUnread(G)
|
||||
|
||||
removeDangling(G, alsoBooks=True)
|
||||
|
||||
if args.remove_edge:
|
||||
removeEdge(G)
|
||||
|
||||
@ -709,5 +791,7 @@ def cliInterface():
|
||||
if not args.no_web:
|
||||
genAndShowHTML(G)
|
||||
|
||||
|
||||
weights = loadWeights()
|
||||
if __name__ == "__main__":
|
||||
cliInterface()
|
||||
|
1
neuralWeights.json
Normal file
1
neuralWeights.json
Normal file
@ -0,0 +1 @@
|
||||
{'topList': 2.2007417599641768, 'recommender': 1.338876321897804, 'author': 2.1427159240782587, 'series': 1.0, 'tag': 1.0, 'mu': 1.3256374080828757, 'sigma': -0.9188502564501485}
|
Loading…
Reference in New Issue
Block a user