Implemented Neural Net + training
This commit is contained in:
parent
0231d97a42
commit
cb0ad906eb
126
caliGraph.py
126
caliGraph.py
@ -2,6 +2,7 @@
|
|||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
import math
|
import math
|
||||||
|
import copy
|
||||||
import random
|
import random
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -78,7 +79,7 @@ def getAllTags(books):
|
|||||||
return list(tags)
|
return list(tags)
|
||||||
|
|
||||||
|
|
||||||
def getTopListWheight(book, topList):
|
def getTopListWeight(book, topList):
|
||||||
minScope = 100000
|
minScope = 100000
|
||||||
for tag in book['tags']:
|
for tag in book['tags']:
|
||||||
if tag.find(topList+" Top ") != -1:
|
if tag.find(topList+" Top ") != -1:
|
||||||
@ -294,7 +295,24 @@ def removeUselessReadBooks(G):
|
|||||||
else: # No unrated book in cousins
|
else: # No unrated book in cousins
|
||||||
G.remove_node(n)
|
G.remove_node(n)
|
||||||
|
|
||||||
def scoreOpinions(G, globMu, globStd, errorFac=-0.5):
|
def removeUselessTags(G):
|
||||||
|
for n in list(G.nodes):
|
||||||
|
node = G.nodes[n]
|
||||||
|
if node['t'] == 'tag':
|
||||||
|
for adj in G.adj[n]:
|
||||||
|
foundUnread = True
|
||||||
|
adjNode = G.nodes[adj]
|
||||||
|
if adjNode['t']=='book' and 'score' in adjNode:
|
||||||
|
break
|
||||||
|
else: # No unrated book here
|
||||||
|
foundUnread = False
|
||||||
|
if foundUnread:
|
||||||
|
break
|
||||||
|
else: # No unrated book in cousins
|
||||||
|
G.remove_node(n)
|
||||||
|
|
||||||
|
|
||||||
|
def scoreOpinions(G, globMu, globStd, errorFac=0):
|
||||||
for n in list(G.nodes):
|
for n in list(G.nodes):
|
||||||
node = G.nodes[n]
|
node = G.nodes[n]
|
||||||
feedbacks = []
|
feedbacks = []
|
||||||
@ -319,7 +337,7 @@ def scoreOpinions(G, globMu, globStd, errorFac=-0.5):
|
|||||||
def scoreUnread(G, globMu, globStd, errorFac=-0.6):
|
def scoreUnread(G, globMu, globStd, errorFac=-0.6):
|
||||||
for n in list(G.nodes):
|
for n in list(G.nodes):
|
||||||
feedbacks = [globMu]
|
feedbacks = [globMu]
|
||||||
wheights = [getWheightForType('mu')]
|
weights = [getWeightForType('mu')]
|
||||||
node = G.nodes[n]
|
node = G.nodes[n]
|
||||||
if node['t'] == 'book':
|
if node['t'] == 'book':
|
||||||
if node['rating'] == None:
|
if node['rating'] == None:
|
||||||
@ -327,13 +345,15 @@ def scoreUnread(G, globMu, globStd, errorFac=-0.6):
|
|||||||
for adj in adjacens:
|
for adj in adjacens:
|
||||||
adjNode = G.nodes[adj]
|
adjNode = G.nodes[adj]
|
||||||
if 'score' in adjNode and adjNode['score'] != None:
|
if 'score' in adjNode and adjNode['score'] != None:
|
||||||
w = getWheightForType(adjNode['t'], G[n][adj]['wheight'] if 'wheight' in G[n][adj] else None)
|
w = getWeightForType(adjNode['t'], G[n][adj]['weight'] if 'weight' in G[n][adj] else 1)
|
||||||
for fb in adjNode['feedbacks']:
|
for fb in adjNode['feedbacks']:
|
||||||
feedbacks.append(fb)
|
feedbacks.append(fb)
|
||||||
wheights.append(w)
|
weights.append(w)
|
||||||
if len(feedbacks):
|
if len(feedbacks):
|
||||||
node['meanUnweighted'], node['std'] = norm.fit(feedbacks)
|
node['meanUnweighted'], node['std'] = norm.fit(feedbacks)
|
||||||
node['mean'] = sum([fb*w for fb, w in zip(feedbacks, wheights)])/len(feedbacks)
|
feedbacks.append(node['std'])
|
||||||
|
weights.append(getWeightForType('sigma'))
|
||||||
|
node['mean'] = sum([fb*w for fb, w in zip(feedbacks, weights)])/len(feedbacks)
|
||||||
node['se'] = globStd / math.sqrt(len(feedbacks))
|
node['se'] = globStd / math.sqrt(len(feedbacks))
|
||||||
node['score'] = node['mean'] + errorFac*node['se']
|
node['score'] = node['mean'] + errorFac*node['se']
|
||||||
else:
|
else:
|
||||||
@ -343,11 +363,13 @@ def scoreUnread(G, globMu, globStd, errorFac=-0.6):
|
|||||||
node['score'] += 0.000000001
|
node['score'] += 0.000000001
|
||||||
|
|
||||||
# TODO: Make this neural and train it
|
# TODO: Make this neural and train it
|
||||||
def getWheightForType(nodeType, edgeWheight=None):
|
def getWeightForType(nodeType, edgeWeight=1):
|
||||||
|
global weights
|
||||||
|
w = weights[nodeType]
|
||||||
if nodeType == 'topList':
|
if nodeType == 'topList':
|
||||||
return edgeWheight*0.5
|
return edgeWeight*w
|
||||||
else:
|
else:
|
||||||
return 1.0
|
return w
|
||||||
|
|
||||||
def printBestList(G, num=-1):
|
def printBestList(G, num=-1):
|
||||||
bestlist = []
|
bestlist = []
|
||||||
@ -423,7 +445,7 @@ def graphAddTopLists(G, books):
|
|||||||
G.add_node('t/'+tl, color='yellow', t='topList', label=tl)
|
G.add_node('t/'+tl, color='yellow', t='topList', label=tl)
|
||||||
for book in books:
|
for book in books:
|
||||||
for top in getTopLists(book):
|
for top in getTopLists(book):
|
||||||
G.add_edge('t/'+top, book['id'], wheight=getTopListWheight(
|
G.add_edge('t/'+top, book['id'], weight=getTopListWeight(
|
||||||
book, top), color=readColor(book))
|
book, top), color=readColor(book))
|
||||||
return G
|
return G
|
||||||
|
|
||||||
@ -520,25 +542,27 @@ def genScores(G, books):
|
|||||||
|
|
||||||
def recommendNBooks(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True):
|
def recommendNBooks(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True):
|
||||||
removeRestOfSeries(G)
|
removeRestOfSeries(G)
|
||||||
removeBad(G, mu-std-1.5)
|
removeBad(G, mu-std*2-1)
|
||||||
removeKeepBest(G, int(n*2) + 5, maxDistForRead=1.5)
|
removeKeepBest(G, int(n*2) + 5, maxDistForRead=1.5*2)
|
||||||
removeEdge(G)
|
removeEdge(G)
|
||||||
removeHighSpanTags(G, 9)
|
removeHighSpanTags(G, 9)
|
||||||
removeDangling(G, alsoBooks=False)
|
removeDangling(G, alsoBooks=False)
|
||||||
pruneTags(G, 6)
|
pruneTags(G, 6*2)
|
||||||
removeBad(G, mu, groups=['book'])
|
removeBad(G, mu, groups=['book'])
|
||||||
removeUselessReadBooks(G)
|
removeUselessReadBooks(G)
|
||||||
pruneTags(G, 4.25)
|
pruneTags(G, 4.25*2)
|
||||||
pruneRecommenderCons(G, int(n/7)+1)
|
pruneRecommenderCons(G, int(n/7)+1)
|
||||||
pruneAuthorCons(G, int(n/15))
|
pruneAuthorCons(G, int(n/15))
|
||||||
|
removeUselessTags(G)
|
||||||
if removeTopListsB:
|
if removeTopListsB:
|
||||||
removeTopLists(G)
|
removeTopLists(G)
|
||||||
removeDangling(G, alsoBooks=True)
|
removeDangling(G, alsoBooks=True)
|
||||||
removeKeepBest(G, n, maxDistForRead=0.75)
|
removeKeepBest(G, n, maxDistForRead=0.75*2)
|
||||||
removeEdge(G)
|
removeEdge(G)
|
||||||
removeDangling(G, alsoBooks=True)
|
removeDangling(G, alsoBooks=True)
|
||||||
if removeUselessRecommenders:
|
if removeUselessRecommenders:
|
||||||
removeUnusedRecommenders(G)
|
removeUnusedRecommenders(G)
|
||||||
|
removeDangling(G, alsoBooks=True)
|
||||||
|
|
||||||
scaleBooksByRating(G)
|
scaleBooksByRating(G)
|
||||||
scaleOpinionsByRating(G)
|
scaleOpinionsByRating(G)
|
||||||
@ -650,6 +674,54 @@ def waveFlow(G, node, n, dist, menge, firstEdge=False):
|
|||||||
if node in bestlist or node in keeplist:
|
if node in bestlist or node in keeplist:
|
||||||
waveFlow(G, node, m, dist, menge, firstEdge=firstEdge)
|
waveFlow(G, node, m, dist, menge, firstEdge=firstEdge)
|
||||||
|
|
||||||
|
def evaluateFitness():
|
||||||
|
G, books = buildFullGraph()
|
||||||
|
ratedBooks = [n for n in list(G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None]
|
||||||
|
errSq = []
|
||||||
|
for m in ratedBooks:
|
||||||
|
rating = G.nodes[m]['rating']
|
||||||
|
G.nodes[m]['rating'] = None
|
||||||
|
mu, std = genScores(G, books)
|
||||||
|
errSq.append((rating - G.nodes[m]['score'])**2)
|
||||||
|
G.nodes[m]['rating'] = rating
|
||||||
|
return sum(errSq) / len(errSq)
|
||||||
|
|
||||||
|
def train(gamma = 0.1):
|
||||||
|
global weights
|
||||||
|
bestWeights = copy.copy(weights)
|
||||||
|
best_mse = evaluateFitness()
|
||||||
|
w = list(weights.keys())
|
||||||
|
attr = random.choice(w)
|
||||||
|
delta = gamma * (-0.5 + (0.75 + 0.25*random.random()))
|
||||||
|
|
||||||
|
while True:
|
||||||
|
print({'mse': best_mse, 'w': weights, 'gamma': gamma})
|
||||||
|
weights = copy.copy(bestWeights)
|
||||||
|
weights[attr] += delta
|
||||||
|
mse = evaluateFitness()
|
||||||
|
if mse < best_mse: # got better
|
||||||
|
saveWeights(weights)
|
||||||
|
gamma *= 1.1
|
||||||
|
bestWeights = copy.copy(weights)
|
||||||
|
best_mse = mse
|
||||||
|
delta *= 2
|
||||||
|
if random.random() < 0.10:
|
||||||
|
attr = random.choice(w)
|
||||||
|
else:
|
||||||
|
weights = copy.copy(bestWeights)
|
||||||
|
gamma *= 0.8
|
||||||
|
attr = random.choice(w)
|
||||||
|
delta = gamma * (-0.5 + (0.75 + 0.25*random.random()))
|
||||||
|
|
||||||
|
def saveWeights(weights):
|
||||||
|
with open('neuralWeights.json', 'w') as f:
|
||||||
|
f.write(json.dumps(weights))
|
||||||
|
|
||||||
|
def loadWeights():
|
||||||
|
with open('neuralWeights.json', 'r') as f:
|
||||||
|
weights = json.loads(f.read())
|
||||||
|
return weights
|
||||||
|
|
||||||
def cliInterface():
|
def cliInterface():
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
@ -677,18 +749,19 @@ def cliInterface():
|
|||||||
p_show.add_argument('name', type=str)
|
p_show.add_argument('name', type=str)
|
||||||
p_show.add_argument('-d', type=float, default=2.7, help='depth of expansion')
|
p_show.add_argument('-d', type=float, default=2.7, help='depth of expansion')
|
||||||
|
|
||||||
|
p_train = cmds.add_parser('train', description="TODO", aliases=[])
|
||||||
|
p_train.add_argument('-g', type=float, default=0.1, help='learning rate gamma')
|
||||||
|
|
||||||
p_full = cmds.add_parser('full', description="TODO", aliases=[])
|
p_full = cmds.add_parser('full', description="TODO", aliases=[])
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.cmd=="train":
|
||||||
|
train(args.g)
|
||||||
|
exit()
|
||||||
|
|
||||||
G, books = buildFullGraph()
|
G, books = buildFullGraph()
|
||||||
mu, std = genScores(G, books)
|
mu, std = genScores(G, books)
|
||||||
if not args.keep_priv:
|
|
||||||
removePriv(G)
|
|
||||||
if args.remove_read:
|
|
||||||
removeRead(G)
|
|
||||||
elif args.remove_unread:
|
|
||||||
removeUnread(G)
|
|
||||||
|
|
||||||
if args.cmd=="recommend":
|
if args.cmd=="recommend":
|
||||||
recommendNBooks(G, mu, std, args.n, not args.keep_top_lists, not args.keep_useless_recommenders)
|
recommendNBooks(G, mu, std, args.n, not args.keep_top_lists, not args.keep_useless_recommenders)
|
||||||
@ -701,6 +774,15 @@ def cliInterface():
|
|||||||
else:
|
else:
|
||||||
raise Exception("Bad")
|
raise Exception("Bad")
|
||||||
|
|
||||||
|
if not args.keep_priv:
|
||||||
|
removePriv(G)
|
||||||
|
if args.remove_read:
|
||||||
|
removeRead(G)
|
||||||
|
elif args.remove_unread:
|
||||||
|
removeUnread(G)
|
||||||
|
|
||||||
|
removeDangling(G, alsoBooks=True)
|
||||||
|
|
||||||
if args.remove_edge:
|
if args.remove_edge:
|
||||||
removeEdge(G)
|
removeEdge(G)
|
||||||
|
|
||||||
@ -709,5 +791,7 @@ def cliInterface():
|
|||||||
if not args.no_web:
|
if not args.no_web:
|
||||||
genAndShowHTML(G)
|
genAndShowHTML(G)
|
||||||
|
|
||||||
|
|
||||||
|
weights = loadWeights()
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
cliInterface()
|
cliInterface()
|
||||||
|
1
neuralWeights.json
Normal file
1
neuralWeights.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{'topList': 2.2007417599641768, 'recommender': 1.338876321897804, 'author': 2.1427159240782587, 'series': 1.0, 'tag': 1.0, 'mu': 1.3256374080828757, 'sigma': -0.9188502564501485}
|
Loading…
Reference in New Issue
Block a user