Compare commits
3 Commits
master
...
neuralBins
Author | SHA1 | Date | |
---|---|---|---|
cbb884b377 | |||
9a02bdc2a8 | |||
01d41f3a82 |
2
.gitignore
vendored
2
.gitignore
vendored
@ -2,5 +2,5 @@ __pycache__
|
|||||||
*.html
|
*.html
|
||||||
.venv
|
.venv
|
||||||
neuralWeights.json
|
neuralWeights.json
|
||||||
neuralWeights.json.bak
|
neuralWeights.json.*
|
||||||
.imgLinkCache.json
|
.imgLinkCache.json
|
||||||
|
46
caliGraph.py
46
caliGraph.py
@ -418,8 +418,8 @@ def scoreUnread(G, globMu, globStd):
|
|||||||
if 'score' in adjNode and adjNode['score'] != None:
|
if 'score' in adjNode and adjNode['score'] != None:
|
||||||
w = adjNode['t']
|
w = adjNode['t']
|
||||||
for fb in adjNode['feedbacks']:
|
for fb in adjNode['feedbacks']:
|
||||||
neuralBins[w].append(fb)
|
|
||||||
feedbacks.append(fb)
|
feedbacks.append(fb)
|
||||||
|
neuralBins[w].append(adjNode['score'])
|
||||||
node['mean'], node['std'] = norm.fit(feedbacks)
|
node['mean'], node['std'] = norm.fit(feedbacks)
|
||||||
node['median'] = np.percentile(feedbacks, [50], method='linear')[0]
|
node['median'] = np.percentile(feedbacks, [50], method='linear')[0]
|
||||||
node['se'] = globStd / math.sqrt(len(feedbacks))
|
node['se'] = globStd / math.sqrt(len(feedbacks))
|
||||||
@ -427,15 +427,17 @@ def scoreUnread(G, globMu, globStd):
|
|||||||
neuralBins['sigma'] = [node['std']]
|
neuralBins['sigma'] = [node['std']]
|
||||||
neuralBins['median'] = [node['median']]
|
neuralBins['median'] = [node['median']]
|
||||||
neuralBins['se'] = [node['se']]
|
neuralBins['se'] = [node['se']]
|
||||||
neuralBins['pagerank'] = [node['pagerank_score']]
|
if 'pagerank_score' in node:
|
||||||
|
neuralBins['pagerank'] = [node['pagerank_score']]
|
||||||
if 'tgb_rank' in node:
|
if 'tgb_rank' in node:
|
||||||
neuralBins['tgbrank'] = [10/math.ln10(10+node['tgb_rank'])]
|
neuralBins['tgbrank'] = [10/math.ln10(10+node['tgb_rank'])]
|
||||||
neuralBins['bias'] = [globMu]
|
neuralBins['bias'] = [globMu]
|
||||||
score = 0
|
score = 0
|
||||||
nb = dict(neuralBins)
|
nb = dict(neuralBins)
|
||||||
act = {}
|
act = {}
|
||||||
|
jig = {}
|
||||||
for b in nb:
|
for b in nb:
|
||||||
act[b] = sum(nb[b])/len(nb[b])
|
act[b], jig[b] = norm.fit(nb[b])
|
||||||
score += act[b] * getWeightForType(b)
|
score += act[b] * getWeightForType(b)
|
||||||
score /= sum([abs(getWeightForType(b)) for b in nb])
|
score /= sum([abs(getWeightForType(b)) for b in nb])
|
||||||
node['score'] = math.tanh(score/10)*10
|
node['score'] = math.tanh(score/10)*10
|
||||||
@ -1199,7 +1201,7 @@ def findNewBooks(G, books, mu, num=-1, minRecSco=5):
|
|||||||
# while batchSize is implemented, we only get a good gonvergence when we disable it (batchSize=-1)
|
# while batchSize is implemented, we only get a good gonvergence when we disable it (batchSize=-1)
|
||||||
# but might be necessary to enable later for a larger libary for better training performance...
|
# but might be necessary to enable later for a larger libary for better training performance...
|
||||||
# maybe try again for 128 books?
|
# maybe try again for 128 books?
|
||||||
def evaluateFitness(books, batchSize=16, debugPrint=False):
|
def evaluateFitness(books, batchSize=16, debugPrint=False, calcPagerank=True):
|
||||||
global weights
|
global weights
|
||||||
G = buildBookGraph(books)
|
G = buildBookGraph(books)
|
||||||
graphAddAuthors(G, books)
|
graphAddAuthors(G, books)
|
||||||
@ -1207,7 +1209,8 @@ def evaluateFitness(books, batchSize=16, debugPrint=False):
|
|||||||
graphAddTopLists(G, books)
|
graphAddTopLists(G, books)
|
||||||
graphAddSeries(G, books)
|
graphAddSeries(G, books)
|
||||||
graphAddTags(G, books)
|
graphAddTags(G, books)
|
||||||
runPagerank(G)
|
if calcPagerank:
|
||||||
|
runPagerank(G)
|
||||||
|
|
||||||
ratedBooks = [n for n in list(G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None]
|
ratedBooks = [n for n in list(G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None]
|
||||||
boundsLoss = 0
|
boundsLoss = 0
|
||||||
@ -1216,7 +1219,7 @@ def evaluateFitness(books, batchSize=16, debugPrint=False):
|
|||||||
gradient = {}
|
gradient = {}
|
||||||
for w in weights:
|
for w in weights:
|
||||||
gradient[w] = 0
|
gradient[w] = 0
|
||||||
mu, sigma = genScores(G, books)
|
mu, sigma = genScores(G, books, calcPagerank=runPagerank)
|
||||||
batch = random.sample(ratedBooks, batchSize) if batchSize!=-1 and len(ratedBooks) > batchSize else ratedBooks
|
batch = random.sample(ratedBooks, batchSize) if batchSize!=-1 and len(ratedBooks) > batchSize else ratedBooks
|
||||||
for b in G.nodes:
|
for b in G.nodes:
|
||||||
if b in ratedBooks:
|
if b in ratedBooks:
|
||||||
@ -1248,7 +1251,7 @@ def evaluateFitness(books, batchSize=16, debugPrint=False):
|
|||||||
fit = sum(errSq)/len(errSq) + 0.001*regressionLoss
|
fit = sum(errSq)/len(errSq) + 0.001*regressionLoss
|
||||||
return fit, gradient
|
return fit, gradient
|
||||||
|
|
||||||
def train(initGamma, full=True):
|
def train(initGamma, full=True, noPagerank=False):
|
||||||
global weights
|
global weights
|
||||||
if full:
|
if full:
|
||||||
for wt in weights:
|
for wt in weights:
|
||||||
@ -1257,7 +1260,7 @@ def train(initGamma, full=True):
|
|||||||
gamma = initGamma
|
gamma = initGamma
|
||||||
books = loadBooksFromDB()
|
books = loadBooksFromDB()
|
||||||
bestWeights = copy.copy(weights)
|
bestWeights = copy.copy(weights)
|
||||||
mse, gradient = evaluateFitness(books)
|
mse, gradient = evaluateFitness(books, calcPagerank=not noPagerank)
|
||||||
delta = math.sqrt(sum(gradient[g]**2 for g in gradient)/len(gradient))
|
delta = math.sqrt(sum(gradient[g]**2 for g in gradient)/len(gradient))
|
||||||
best_mse = mse
|
best_mse = mse
|
||||||
stagLen = 0
|
stagLen = 0
|
||||||
@ -1275,7 +1278,7 @@ def train(initGamma, full=True):
|
|||||||
weights[wt] += gamma*gradient[wt]/math.sqrt(delta)
|
weights[wt] += gamma*gradient[wt]/math.sqrt(delta)
|
||||||
#else:
|
#else:
|
||||||
# del weights[wt]
|
# del weights[wt]
|
||||||
mse, gradient = evaluateFitness(books)
|
mse, gradient = evaluateFitness(books, calcPagerank=not noPagerank)
|
||||||
if mse < last_mse:
|
if mse < last_mse:
|
||||||
gamma = gamma*1.25
|
gamma = gamma*1.25
|
||||||
else:
|
else:
|
||||||
@ -1331,6 +1334,8 @@ def cliInterface():
|
|||||||
parser.add_argument('--dark', action="store_true")
|
parser.add_argument('--dark', action="store_true")
|
||||||
parser.add_argument('--v3d', action="store_true")
|
parser.add_argument('--v3d', action="store_true")
|
||||||
parser.add_argument('--imgs', action="store_true")
|
parser.add_argument('--imgs', action="store_true")
|
||||||
|
parser.add_argument('--perf-test', action="store_true")
|
||||||
|
parser.add_argument('--no-pagerank', action="store_true")
|
||||||
cmds = parser.add_subparsers(required=True, dest='cmd')
|
cmds = parser.add_subparsers(required=True, dest='cmd')
|
||||||
|
|
||||||
p_rec = cmds.add_parser('recommend', description="TODO", aliases=['rec'])
|
p_rec = cmds.add_parser('recommend', description="TODO", aliases=['rec'])
|
||||||
@ -1370,14 +1375,33 @@ def cliInterface():
|
|||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.perf_test:
|
||||||
|
perfTestCLI(args)
|
||||||
|
else:
|
||||||
|
mainCLI(args)
|
||||||
|
|
||||||
|
def perfTestCLI(args):
|
||||||
|
import time
|
||||||
|
from pycallgraph import PyCallGraph
|
||||||
|
from pycallgraph import Config
|
||||||
|
from pycallgraph import GlobbingFilter
|
||||||
|
from pycallgraph.output import GraphvizOutput
|
||||||
|
config = Config()
|
||||||
|
config.trace_filter = GlobbingFilter(exclude=[
|
||||||
|
"pycallgraph.*",
|
||||||
|
])
|
||||||
|
with PyCallGraph(output=GraphvizOutput(output_file='perfTests/' + str(int(time.time())) + '.png'), config=config):
|
||||||
|
mainCLI(args)
|
||||||
|
|
||||||
|
def mainCLI(args):
|
||||||
if args.cmd=="train":
|
if args.cmd=="train":
|
||||||
train(args.g, args.full)
|
train(args.g, args.full, args.no_pagerank)
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
bestListT = 'book'
|
bestListT = 'book'
|
||||||
|
|
||||||
G, books = buildFullGraph(darkMode=args.dark)
|
G, books = buildFullGraph(darkMode=args.dark)
|
||||||
mu, std = genScores(G, books)
|
mu, std = genScores(G, books, calcPagerank=not args.no_pagerank)
|
||||||
|
|
||||||
if not args.keep_whitepapers:
|
if not args.keep_whitepapers:
|
||||||
removeWhitepapers(G)
|
removeWhitepapers(G)
|
||||||
|
Loading…
Reference in New Issue
Block a user