New shell-command, nltk for keyword extraction from description
This commit is contained in:
parent
36baf1aaec
commit
199fab7875
64
caliGraph.py
64
caliGraph.py
@ -1,5 +1,6 @@
|
||||
#!./.venv/bin/python3.10
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
import math
|
||||
import copy
|
||||
@ -454,28 +455,55 @@ def readColor(book):
|
||||
def loadBooksFromDB():
|
||||
return json.loads(os.popen("calibredb list --for-machine -f all").read())
|
||||
|
||||
def remove_html_tags(text):
|
||||
clean = re.compile('<.*?>')
|
||||
return re.sub(clean, '', text)
|
||||
|
||||
def buildBookGraph(books, darkMode=False):
|
||||
def getKeywords(txt,rake):
|
||||
txt = remove_html_tags(txt)
|
||||
k = []
|
||||
rake.extract_keywords_from_text(txt)
|
||||
kws = rake.get_ranked_phrases_with_scores()
|
||||
for i,(score,kw) in enumerate(kws):
|
||||
l = len(kw.split(' '))
|
||||
k.append((score**(1/(l*0.5)),kw))
|
||||
k.sort(key=lambda x: x[0],reverse=True)
|
||||
minSco = k[0][0]/3*2
|
||||
for i,kw in enumerate(k):
|
||||
if kw[0] < minSco:
|
||||
return [(sco,word.title()) for sco,word in k[:i]]
|
||||
return k
|
||||
|
||||
def buildBookGraph(books, darkMode=False, extractKeywords=True, mergeTags=True):
|
||||
G = nx.Graph()
|
||||
if extractKeywords:
|
||||
from rake_nltk.rake import Rake
|
||||
rake = Rake()
|
||||
|
||||
# Books
|
||||
for book in books:
|
||||
tags = book['tags']
|
||||
if 'rating' in book:
|
||||
rating = book['rating']
|
||||
else:
|
||||
rating = None
|
||||
if 'comments' in book:
|
||||
desc = '' # book['comments']
|
||||
desc = book['comments']
|
||||
else:
|
||||
desc = ''
|
||||
if 'comments' in book and extractKeywords:
|
||||
keywords = getKeywords(book['comments'],rake)
|
||||
else:
|
||||
keywords = []
|
||||
if mergeTags:
|
||||
tags = tags + [word for (score, word) in keywords]
|
||||
if 'series' in book:
|
||||
series = book['series']
|
||||
series_index = book['series_index']
|
||||
else:
|
||||
series = None
|
||||
series_index = None
|
||||
G.add_node(book['id'], t='book', label=book['title'], title=book['title'], shape='image', image=book['cover'], rating=rating,
|
||||
tags=book['tags'], desc=desc, isbn=book['isbn'], files=book['formats'], authors=getAuthors(book), series=series, series_index=series_index)
|
||||
G.add_node(book['id'], t='book', label=book['title'], title=book['title'], shape='image', image=book['cover'], rating=rating, tags=tags, keywords=keywords, desc=desc, isbn=book['isbn'], files=book['formats'], authors=getAuthors(book), series=series, series_index=series_index)
|
||||
|
||||
return G
|
||||
|
||||
@ -837,6 +865,27 @@ def waveFlow(G, node, n, dist, menge, firstEdge=False):
|
||||
if node in bestlist or node in keeplist:
|
||||
waveFlow(G, node, m, dist, menge, firstEdge=firstEdge)
|
||||
|
||||
def gensimTokensForLines(lines):
|
||||
for i, line in enumerate(lines):
|
||||
tokens = gensim.utils.simple_preprocess(line)
|
||||
if tokens_only:
|
||||
yield tokens
|
||||
else:
|
||||
# For training data, add tags
|
||||
yield gensim.models.doc2vec.TaggedDocument(tokens, [i])
|
||||
|
||||
def buildDoc2Vec(books):
|
||||
import gensim
|
||||
for n in list(G.nodes):
|
||||
node = G.nodes[n]
|
||||
if node['t'] == 'book':
|
||||
pass
|
||||
gensimTokensForLines(lines)
|
||||
|
||||
def shell(G, books, mu, std):
|
||||
from ptpython.repl import embed
|
||||
embed(globals(), locals())
|
||||
|
||||
def evaluateFitness(books, debugPrint=False):
|
||||
global weights
|
||||
G = buildBookGraph(books)
|
||||
@ -985,6 +1034,8 @@ def cliInterface():
|
||||
|
||||
p_comp = cmds.add_parser('competence', description="TODO", aliases=[])
|
||||
|
||||
p_shell = cmds.add_parser('shell', description="TODO", aliases=[])
|
||||
|
||||
p_full = cmds.add_parser('full', description="TODO", aliases=[])
|
||||
|
||||
args = parser.parse_args()
|
||||
@ -1018,7 +1069,10 @@ def cliInterface():
|
||||
fullGraph(G, not args.keep_top_lists)
|
||||
elif args.cmd=="competence":
|
||||
recommenderCompetence(G)
|
||||
elif args.cmd=="progress":
|
||||
elif args.cmd=="shell":
|
||||
shell(G, books, mu, std)
|
||||
elif args.cmd=="competence":
|
||||
recommenderCompetence(G)
|
||||
progress(G, args.m)
|
||||
return
|
||||
else:
|
||||
|
Loading…
Reference in New Issue
Block a user