Better Tag-Pruning (keep more good tags)
This commit is contained in:
parent
a8d9f96e70
commit
cc6606f468
24
caliGraph.py
24
caliGraph.py
@ -28,11 +28,13 @@ def getRecommenders(book):
|
|||||||
for tag in book['tags']:
|
for tag in book['tags']:
|
||||||
if tag.find(" Recommendation") != -1:
|
if tag.find(" Recommendation") != -1:
|
||||||
yield tag.replace(" Recommendation", "")
|
yield tag.replace(" Recommendation", "")
|
||||||
|
elif tag.find("s Literature Club") != -1:
|
||||||
|
yield tag.replace("s Literature Club", "")
|
||||||
|
|
||||||
|
|
||||||
def getTags(book):
|
def getTags(book):
|
||||||
for tag in book['tags']:
|
for tag in book['tags']:
|
||||||
if tag.find(" Recommendation") == -1 and tag.find(" Top ") == -1:
|
if tag.find(" Recommendation") == -1 and tag.find("s Literature Club") == -1 and tag.find(" Top ") == -1:
|
||||||
yield tag
|
yield tag
|
||||||
|
|
||||||
|
|
||||||
@ -166,8 +168,14 @@ def pruneTags(G, minCons=2):
|
|||||||
foundCon = 0
|
foundCon = 0
|
||||||
for book in G.adj[n]:
|
for book in G.adj[n]:
|
||||||
for con in G.adj[book]:
|
for con in G.adj[book]:
|
||||||
if G.nodes[con]['t'] not in ['tag', 'topList', 'series']:
|
conType = G.nodes[con]['t']
|
||||||
foundCon += 1
|
if conType not in ['topList']:
|
||||||
|
if conType in ['recommender']:
|
||||||
|
foundCon += 0.5
|
||||||
|
elif conType in ['tag', 'series']:
|
||||||
|
foundCon += 0.25
|
||||||
|
else:
|
||||||
|
foundCon += 1
|
||||||
if foundCon > minCons:
|
if foundCon > minCons:
|
||||||
G.remove_node(n)
|
G.remove_node(n)
|
||||||
|
|
||||||
@ -483,9 +491,9 @@ def recommendNBooks(G, mu, std, n):
|
|||||||
removeEdge(G)
|
removeEdge(G)
|
||||||
removeHighSpanTags(G, 9)
|
removeHighSpanTags(G, 9)
|
||||||
removeDangling(G, alsoBooks=False)
|
removeDangling(G, alsoBooks=False)
|
||||||
pruneTags(G, 4)
|
pruneTags(G, 6)
|
||||||
removeBad(G, mu, groups=['book'])
|
removeBad(G, mu, groups=['book'])
|
||||||
pruneTags(G, 3)
|
pruneTags(G, 4.25)
|
||||||
pruneRecommenderCons(G, int(n/7)+1)
|
pruneRecommenderCons(G, int(n/7)+1)
|
||||||
pruneAuthorCons(G, int(n/15))
|
pruneAuthorCons(G, int(n/15))
|
||||||
removeTopLists(G)
|
removeTopLists(G)
|
||||||
@ -544,7 +552,7 @@ def analyze(G, type_name, name, dist=2.7):
|
|||||||
print("Best Match: "+match['label'])
|
print("Best Match: "+match['label'])
|
||||||
|
|
||||||
menge = set()
|
menge = set()
|
||||||
pruneDist(G, match, n, dist, menge)
|
waveFlow(G, match, n, dist, menge)
|
||||||
for n in list(G.nodes):
|
for n in list(G.nodes):
|
||||||
if n not in menge:
|
if n not in menge:
|
||||||
G.remove_node(n)
|
G.remove_node(n)
|
||||||
@ -560,7 +568,7 @@ def analyze(G, type_name, name, dist=2.7):
|
|||||||
addScoreToLabels(G)
|
addScoreToLabels(G)
|
||||||
match['label'] = "*"+match['label']+"*"
|
match['label'] = "*"+match['label']+"*"
|
||||||
|
|
||||||
def pruneDist(G, node, n, dist, menge, firstEdge=False):
|
def waveFlow(G, node, n, dist, menge, firstEdge=False):
|
||||||
if dist <= 0:
|
if dist <= 0:
|
||||||
return
|
return
|
||||||
dist -= 1
|
dist -= 1
|
||||||
@ -600,7 +608,7 @@ def pruneDist(G, node, n, dist, menge, firstEdge=False):
|
|||||||
for m in list(G.adj[n]):
|
for m in list(G.adj[n]):
|
||||||
node = G.nodes[m]
|
node = G.nodes[m]
|
||||||
if node in bestlist or node in keeplist:
|
if node in bestlist or node in keeplist:
|
||||||
pruneDist(G, node, m, dist, menge, firstEdge=firstEdge)
|
waveFlow(G, node, m, dist, menge, firstEdge=firstEdge)
|
||||||
|
|
||||||
def cliInterface():
|
def cliInterface():
|
||||||
import argparse
|
import argparse
|
||||||
|
Loading…
Reference in New Issue
Block a user