Allow disabling pagerank
This commit is contained in:
parent
01d41f3a82
commit
9a02bdc2a8
26
caliGraph.py
26
caliGraph.py
@ -427,7 +427,8 @@ def scoreUnread(G, globMu, globStd):
|
||||
neuralBins['sigma'] = [node['std']]
|
||||
neuralBins['median'] = [node['median']]
|
||||
neuralBins['se'] = [node['se']]
|
||||
neuralBins['pagerank'] = [node['pagerank_score']]
|
||||
if 'pagerank_score' in node:
|
||||
neuralBins['pagerank'] = [node['pagerank_score']]
|
||||
if 'tgb_rank' in node:
|
||||
neuralBins['tgbrank'] = [10/math.ln10(10+node['tgb_rank'])]
|
||||
neuralBins['bias'] = [globMu]
|
||||
@ -1199,7 +1200,7 @@ def findNewBooks(G, books, mu, num=-1, minRecSco=5):
|
||||
# while batchSize is implemented, we only get a good gonvergence when we disable it (batchSize=-1)
|
||||
# but might be necessary to enable later for a larger libary for better training performance...
|
||||
# maybe try again for 128 books?
|
||||
def evaluateFitness(books, batchSize=16, debugPrint=False):
|
||||
def evaluateFitness(books, batchSize=16, debugPrint=False, runPagerank=True):
|
||||
global weights
|
||||
G = buildBookGraph(books)
|
||||
graphAddAuthors(G, books)
|
||||
@ -1207,7 +1208,8 @@ def evaluateFitness(books, batchSize=16, debugPrint=False):
|
||||
graphAddTopLists(G, books)
|
||||
graphAddSeries(G, books)
|
||||
graphAddTags(G, books)
|
||||
runPagerank(G)
|
||||
if runPagerank:
|
||||
runPagerank(G)
|
||||
|
||||
ratedBooks = [n for n in list(G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None]
|
||||
boundsLoss = 0
|
||||
@ -1216,7 +1218,7 @@ def evaluateFitness(books, batchSize=16, debugPrint=False):
|
||||
gradient = {}
|
||||
for w in weights:
|
||||
gradient[w] = 0
|
||||
mu, sigma = genScores(G, books)
|
||||
mu, sigma = genScores(G, books, calcPagerank=runPagerank)
|
||||
batch = random.sample(ratedBooks, batchSize) if batchSize!=-1 and len(ratedBooks) > batchSize else ratedBooks
|
||||
for b in G.nodes:
|
||||
if b in ratedBooks:
|
||||
@ -1248,7 +1250,7 @@ def evaluateFitness(books, batchSize=16, debugPrint=False):
|
||||
fit = sum(errSq)/len(errSq) + 0.001*regressionLoss
|
||||
return fit, gradient
|
||||
|
||||
def train(initGamma, full=True):
|
||||
def train(initGamma, full=True, noPagerank=False):
|
||||
global weights
|
||||
if full:
|
||||
for wt in weights:
|
||||
@ -1257,7 +1259,7 @@ def train(initGamma, full=True):
|
||||
gamma = initGamma
|
||||
books = loadBooksFromDB()
|
||||
bestWeights = copy.copy(weights)
|
||||
mse, gradient = evaluateFitness(books)
|
||||
mse, gradient = evaluateFitness(books, runPagerank=not noPagerank)
|
||||
delta = math.sqrt(sum(gradient[g]**2 for g in gradient)/len(gradient))
|
||||
best_mse = mse
|
||||
stagLen = 0
|
||||
@ -1275,7 +1277,7 @@ def train(initGamma, full=True):
|
||||
weights[wt] += gamma*gradient[wt]/math.sqrt(delta)
|
||||
#else:
|
||||
# del weights[wt]
|
||||
mse, gradient = evaluateFitness(books)
|
||||
mse, gradient = evaluateFitness(books, runPagerank=not noPagerank)
|
||||
if mse < last_mse:
|
||||
gamma = gamma*1.25
|
||||
else:
|
||||
@ -1332,6 +1334,7 @@ def cliInterface():
|
||||
parser.add_argument('--v3d', action="store_true")
|
||||
parser.add_argument('--imgs', action="store_true")
|
||||
parser.add_argument('--perf-test', action="store_true")
|
||||
parser.add_argument('--no-pagerank', action="store_true")
|
||||
cmds = parser.add_subparsers(required=True, dest='cmd')
|
||||
|
||||
p_rec = cmds.add_parser('recommend', description="TODO", aliases=['rec'])
|
||||
@ -1371,12 +1374,13 @@ def cliInterface():
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.perfTest:
|
||||
if args.perf_test:
|
||||
perfTestCLI(args)
|
||||
else:
|
||||
mainCLI(args)
|
||||
|
||||
def perfTestCLI(args):
|
||||
import time
|
||||
from pycallgraph import PyCallGraph
|
||||
from pycallgraph import Config
|
||||
from pycallgraph import GlobbingFilter
|
||||
@ -1385,18 +1389,18 @@ def perfTestCLI(args):
|
||||
config.trace_filter = GlobbingFilter(exclude=[
|
||||
"pycallgraph.*",
|
||||
])
|
||||
with PyCallGraph(output=GraphvizOutput(output_file='perfTests/serve_httpd_' + str(int(time.time())) + '.png'), config=config):
|
||||
with PyCallGraph(output=GraphvizOutput(output_file='perfTests/' + str(int(time.time())) + '.png'), config=config):
|
||||
mainCLI(args)
|
||||
|
||||
def mainCLI(args):
|
||||
if args.cmd=="train":
|
||||
train(args.g, args.full)
|
||||
train(args.g, args.full, args.no_pagerank)
|
||||
exit()
|
||||
|
||||
bestListT = 'book'
|
||||
|
||||
G, books = buildFullGraph(darkMode=args.dark)
|
||||
mu, std = genScores(G, books)
|
||||
mu, std = genScores(G, books, calcPagerank=not args.no_pagerank)
|
||||
|
||||
if not args.keep_whitepapers:
|
||||
removeWhitepapers(G)
|
||||
|
Loading…
Reference in New Issue
Block a user