diff --git a/caliGraph.py b/caliGraph.py
index 8376ba1..2bb17e1 100755
--- a/caliGraph.py
+++ b/caliGraph.py
@@ -1,5 +1,6 @@
 #!./.venv/bin/python3.10
 import os
+import re
 import json
 import math
 import copy
@@ -454,28 +455,55 @@ def readColor(book):
 def loadBooksFromDB():
     return json.loads(os.popen("calibredb list --for-machine -f all").read())
 
+def remove_html_tags(text):
+    clean = re.compile('<.*?>')
+    return re.sub(clean, '', text)
 
-def buildBookGraph(books, darkMode=False):
+def getKeywords(txt,rake):
+   txt = remove_html_tags(txt)
+   k = []
+   rake.extract_keywords_from_text(txt)
+   kws = rake.get_ranked_phrases_with_scores()
+   for i,(score,kw) in enumerate(kws):
+     l = len(kw.split(' '))
+     k.append((score**(1/(l*0.5)),kw))
+   k.sort(key=lambda x: x[0],reverse=True)
+   minSco = k[0][0]/3*2
+   for i,kw in enumerate(k):
+       if kw[0] < minSco:
+         return [(sco,word.title()) for sco,word in k[:i]]
+   return k
+
+def buildBookGraph(books, darkMode=False, extractKeywords=True, mergeTags=True):
     G = nx.Graph()
+    if extractKeywords:
+        from rake_nltk.rake import Rake
+    rake = Rake()
 
     # Books
     for book in books:
+        tags = book['tags']
         if 'rating' in book:
             rating = book['rating']
         else:
             rating = None
         if 'comments' in book:
-            desc = ''  # book['comments']
+            desc = book['comments']
         else:
             desc = ''
+        if 'comments' in book and extractKeywords:
+            keywords = getKeywords(book['comments'],rake)
+        else:
+            keywords = []
+        if mergeTags:
+            tags = tags + [word for (score, word) in keywords]
         if 'series' in book:
             series = book['series']
             series_index = book['series_index']
         else:
             series = None
             series_index = None
-        G.add_node(book['id'], t='book', label=book['title'], title=book['title'], shape='image', image=book['cover'], rating=rating,
-                   tags=book['tags'], desc=desc, isbn=book['isbn'], files=book['formats'], authors=getAuthors(book), series=series, series_index=series_index)
+        G.add_node(book['id'], t='book', label=book['title'], title=book['title'], shape='image', image=book['cover'], rating=rating, tags=tags, keywords=keywords, desc=desc, isbn=book['isbn'], files=book['formats'], authors=getAuthors(book), series=series, series_index=series_index)
 
     return G
 
@@ -837,6 +865,27 @@ def waveFlow(G, node, n, dist, menge, firstEdge=False):
         if node in bestlist or node in keeplist:
             waveFlow(G, node, m, dist, menge, firstEdge=firstEdge)
 
+def gensimTokensForLines(lines):
+    for i, line in enumerate(lines):
+        tokens = gensim.utils.simple_preprocess(line)
+        if tokens_only:
+            yield tokens
+        else:
+            # For training data, add tags
+            yield gensim.models.doc2vec.TaggedDocument(tokens, [i])
+
+def buildDoc2Vec(books):
+    import gensim
+    for n in list(G.nodes):
+        node = G.nodes[n]
+        if node['t'] == 'book':
+            pass
+    gensimTokensForLines(lines)
+
+def shell(G, books, mu, std):
+    from ptpython.repl import embed
+    embed(globals(), locals())
+
 def evaluateFitness(books, debugPrint=False):
     global weights
     G = buildBookGraph(books)
@@ -985,6 +1034,8 @@ def cliInterface():
 
     p_comp = cmds.add_parser('competence', description="TODO", aliases=[])
 
+    p_shell = cmds.add_parser('shell', description="TODO", aliases=[])
+    
     p_full = cmds.add_parser('full', description="TODO", aliases=[])
 
     args = parser.parse_args()
@@ -1018,7 +1069,10 @@ def cliInterface():
         fullGraph(G, not args.keep_top_lists)
     elif args.cmd=="competence":
         recommenderCompetence(G)
-    elif args.cmd=="progress":
+    elif args.cmd=="shell":
+        shell(G, books, mu, std)
+    elif args.cmd=="competence":
+        recommenderCompetence(G)
         progress(G, args.m)
         return
     else: