New feature: Dissonance

2022-09-11 18:56:47 +02:00 · 2022-09-11 18:56:47 +02:00 · 1c34d2876f
commit 1c34d2876f
parent 6af38c686f
1 changed files with 329 additions and 167 deletions
--- a/caliGraph.py
+++ b/caliGraph.py
@ -18,9 +18,11 @@ import plotly.graph_objects as go
 import wikipedia
 class Error(Exception):
    pass
 def getAllAuthors(books):
    authors = set()
    for book in books:
@ -131,6 +133,7 @@ def removePriv(G):
            if 'priv' in node['tags']:
                G.remove_node(n)
 def removeWhitepapers(G):
    for n in list(G.nodes):
        node = G.nodes[n]
@ -146,6 +149,7 @@ def removeDangling(G, alsoBooks=False):
            if not len(G.adj[n]):
                G.remove_node(n)
 def removeThinRecs(G, minCons=3):
    for n in list(G.nodes):
        node = G.nodes[n]
@ -153,6 +157,7 @@ def removeThinRecs(G, minCons=3):
            if not len(G.adj[n]) >= minCons:
                G.remove_node(n)
 def removeEdge(G):
    for n in list(G.nodes):
        node = G.nodes[n]
@ -256,6 +261,7 @@ def pruneRecommenderCons(G, maxCons=5):
                            if foundCon < 2:
                                G.remove_node(m)
 def pruneAuthorCons(G, maxCons=3):
    for n in list(G.nodes):
        node = G.nodes[n]
@ -281,6 +287,7 @@ def pruneAuthorCons(G, maxCons=3):
                            if foundCon < 2:
                                G.remove_node(m)
 def removeHighSpanTags(G, maxCons=5):
    for n in list(G.nodes):
        node = G.nodes[n]
@ -303,24 +310,28 @@ def removeTopLists(G):
        if node['t'] == 'topList':
            G.remove_node(n)
 def removeRecommenders(G):
    for n in list(G.nodes):
        node = G.nodes[n]
        if node['t'] == 'recommender':
            G.remove_node(n)
 def removeAuthors(G):
    for n in list(G.nodes):
        node = G.nodes[n]
        if node['t'] == 'author':
            G.remove_node(n)
 def removeSeries(G):
    for n in list(G.nodes):
        node = G.nodes[n]
        if node['t'] == 'series':
            G.remove_node(n)
 def removeRestOfSeries(G):
    for n in list(G.nodes):
        node = G.nodes[n]
@ -336,6 +347,7 @@ def removeRestOfSeries(G):
                if adjNode['series_index'] > seriesState + 1.0001:
                    G.remove_node(adj)
 def removeUnusedRecommenders(G):
    for n in list(G.nodes):
        node = G.nodes[n]
@ -347,6 +359,7 @@ def removeUnusedRecommenders(G):
            else:  # No unrated recommendation
                G.remove_node(n)
 def removeUselessReadBooks(G):
    minForce = 1.5
    minContact = 2
@ -368,6 +381,7 @@ def removeUselessReadBooks(G):
            if force < minForce or contacts < minContact:
                G.remove_node(n)
 def removeUselessTags(G, minUnread=1):
    for n in list(G.nodes):
        node = G.nodes[n]
@ -380,6 +394,7 @@ def removeUselessTags(G, minUnread=1):
            if foundUnread < minUnread:
                G.remove_node(n)
 def removeUselessSeries(G, minSco=0):
    for n in list(G.nodes):
        node = G.nodes[n]
@ -387,6 +402,7 @@ def removeUselessSeries(G, minSco=0):
            if len(G.adj[n]) < 2 or node['score'] < minSco:
                G.remove_node(n)
 def scoreOpinions(G, globMu, globStd):
    for n in list(G.nodes):
        node = G.nodes[n]
@ -406,6 +422,7 @@ def scoreOpinions(G, globMu, globStd):
            else:
                node['score'] = None
 def scoreUnread(G, globMu, globStd):
    for n in list(G.nodes):
        feedbacks = [globMu]
@ -417,13 +434,15 @@ def scoreUnread(G, globMu, globStd):
                for adj in adjacens:
                    adjNode = G.nodes[adj]
                    if 'score' in adjNode and adjNode['score'] != None:
-                        w = [adjNode['t'], G[n][adj]['weight'] if 'weight' in G[n][adj] else 1]
+                        w = [adjNode['t'], G[n][adj]['weight']
                             if 'weight' in G[n][adj] else 1]
                        for fb in adjNode['feedbacks']:
                            feedbacks.append(fb)
                            ws.append(w)
                if len(feedbacks):
                    node['mean'], node['std'] = norm.fit(feedbacks)
-                    node['median'] = np.percentile(feedbacks, [50], method='linear')[0]
+                    node['median'] = np.percentile(
                        feedbacks, [50], method='linear')[0]
                    node['se'] = globStd / math.sqrt(len(feedbacks))
                    feedbacks.append(node['pagerank_score'])
                    ws.append(['pagerank'])
@ -437,15 +456,18 @@ def scoreUnread(G, globMu, globStd):
                    # ws.append(['se'])
                    feedbacks.append(globMu)
                    ws.append(['bias'])
-                    node['score'] = sum([fb*getWeightForType(w[0], w[1] if len(w)>1 else 1) for fb, w in zip(feedbacks, ws)])/sum([getWeightForType(w[0], w[1] if len(w)>1 else 1) for w in ws])
+                    node['score'] = sum([fb*getWeightForType(w[0], w[1] if len(w) > 1 else 1) for fb, w in zip(
                        feedbacks, ws)])/sum([getWeightForType(w[0], w[1] if len(w) > 1 else 1) for w in ws])
                    node['_act'] = feedbacks
                    node['_wgh'] = ws
                else:
-                    node['score'] = globMu + errorFac*globStd + len(feedbacks)*0.0000000001
+                    node['score'] = globMu + errorFac * \
                        globStd + len(feedbacks)*0.0000000001
                if 'series' in node:
                    if node['series_index'] == 1.0:
                        node['score'] += 0.000000001
 def getWeightForType(nodeType, edgeWeight=1):
    global weights
    w = weights[nodeType]
@ -454,6 +476,7 @@ def getWeightForType(nodeType, edgeWeight=1):
    else:
        return w
 def printBestList(G, t='book', num=-1):
    bestlist = []
    for n in list(G.nodes):
@ -461,10 +484,12 @@ def printBestList(G, t='book', num=-1):
        if node['t'] == t:
            if 'score' in node and node['score'] != None:
                bestlist.append(node)
-    bestlist.sort(key=lambda node: node['score'] + 0.00001*(node['se'] if 'se' in node else 0), reverse=True)
+    bestlist.sort(key=lambda node: node['score'] + 0.00001 *
                  (node['se'] if 'se' in node else 0), reverse=True)
    for i, book in enumerate(bestlist):
        if t == 'book':
-            line = book['title'] + " ("+" & ".join(book['authors'])+")"+": {:.5f}".format(book['score'])
+            line = book['title'] + " ("+" & ".join(book['authors'])+")" + \
                ": {:.5f}".format(book['score'])
        else:
            line = book['label']
        print("["+str(i+1).zfill(int((math.log10(num) if num != -1 else 3)+1))+"] "+line)
@ -478,12 +503,14 @@ def readColor(book):
    else:
        return 'gray'
 def loadBooksFromDB():
    books = calibreDB.getBooks()
    infuseDataFromMRB(books)
    # infuseDataFromTGB(books)
    return books
 def mrbGetBook(mrbdf, title, authors):
    title = title.split('(')[0]
    title = title.replace('*', '')
@ -499,6 +526,7 @@ def mrbGetBook(mrbdf, title, authors):
                return d
    return False
 def tgbGetBook(df, title, authors):
    title = title.split('(')[0]
    title = title.replace('*', '')
@ -514,6 +542,7 @@ def tgbGetBook(df, title, authors):
                return d
    return False
 def infuseDataFromMRB(books):
    mrbdf = pd.read_csv('rec_dbs/mrb_db.csv')
    for book in books:
@ -522,6 +551,7 @@ def infuseDataFromMRB(books):
            for rec in str(mrb['recommender']).split('|'):
                book['tags'] += [rec + ':MRB']
 def infuseDataFromTGB(books):
    for i in range(1, 3):
        df = pd.read_csv('rec_dbs/tgb_'+str(i)+'.csv')
@ -530,12 +560,14 @@ def infuseDataFromTGB(books):
            if tgb:
                book['tgb_rank'] = int(tgb['id'])
 class calibreDB():
    @classmethod
    def _getTxt(cls, request):
        ret = os.popen("calibredb "+request).read()
        if not ret:
-            raise Error('Unable to connect to CalibreDB. Please close all open instances of Calibre.')
+            raise Error(
                'Unable to connect to CalibreDB. Please close all open instances of Calibre.')
        return ret
    @classmethod
@ -557,7 +589,8 @@ class calibreDB():
        cols = cls.getCustomColumns()
        avai = ['calice_score' in cols, 'calice_rating' in cols]
        if not any(avai):
-            raise Error('Custom Columns missing from CalibreDB. Create columns for "Calice Score" and/or "Calice Rating" using the "createCaliceColumn" command.')
+            raise Error(
                'Custom Columns missing from CalibreDB. Create columns for "Calice Score" and/or "Calice Rating" using the "createCaliceColumn" command.')
        return avai
    @classmethod
@ -586,9 +619,12 @@ class calibreDB():
                cls._getTxt('set_custom calice_score '+str(bookId)+' ""')
            else:
                if sco:
-                    cls._getTxt('set_custom calice_score '+str(bookId)+' '+str(round(score,5)))
+                    cls._getTxt('set_custom calice_score ' +
                                str(bookId)+' '+str(round(score, 5)))
                if rat:
-                    cls._getTxt('set_custom calice_rating '+str(bookId)+' '+str(int(round(score))))
+                    cls._getTxt('set_custom calice_rating ' +
                                str(bookId)+' '+str(int(round(score))))
 def calice(G):
    scores = {}
@ -602,10 +638,12 @@ def calice(G):
    calibreDB.writeCaliceColumnMultiple(scores)
    print('Done.')
 def remove_html_tags(text):
    clean = re.compile('<.*?>')
    return re.sub(clean, '', text)
 def getKeywords(txt, rake):
    txt = remove_html_tags(txt)
    k = []
@ -624,6 +662,7 @@ def getKeywords(txt,rake):
        return k
    return []
 def runPagerank(G):
    try:
        scores = nx.pagerank(G=G)
@ -634,6 +673,7 @@ def runPagerank(G):
    for n in list(G.nodes):
        G.nodes[n]['pagerank_score'] = scores[n] if n in scores else 0
 def buildBookGraph(books, darkMode=False, extractKeywords=True, mergeTags=True):
    G = nx.Graph()
    if extractKeywords:
@ -652,7 +692,8 @@ def buildBookGraph(books, darkMode=False, extractKeywords=True, mergeTags=True):
        else:
            desc = ''
        if 'comments' in book and extractKeywords:
-            sanitized = re.sub(r'[^a-zA-Z0-9\s\.äöü]+', '', book['comments']).replace('\n',' ')
+            sanitized = re.sub(r'[^a-zA-Z0-9\s\.äöü]+',
                               '', book['comments']).replace('\n', ' ')
            keywords = getKeywords(sanitized, rake)
        else:
            keywords = []
@ -664,10 +705,12 @@ def buildBookGraph(books, darkMode=False, extractKeywords=True, mergeTags=True):
        else:
            series = None
            series_index = None
-        G.add_node(book['id'], t='book', label=book['title'], title=book['title'], shape='image', image=book['cover'], rating=rating, tags=tags, keywords=keywords, desc=desc, isbn=book['isbn'], files=book['formats'], authors=getAuthors(book), series=series, series_index=series_index, calibreID=book['id'])
+        G.add_node(book['id'], t='book', label=book['title'], title=book['title'], shape='image', image=book['cover'], rating=rating, tags=tags, keywords=keywords,
                   desc=desc, isbn=book['isbn'], files=book['formats'], authors=getAuthors(book), series=series, series_index=series_index, calibreID=book['id'])
    return G
 def getWikiImage(search_term):
    from fuzzywuzzy import fuzz
    WIKI_REQUEST = 'http://en.wikipedia.org/w/api.php?action=query&prop=pageimages&format=json&piprop=original&titles='
@ -681,12 +724,14 @@ def getWikiImage(search_term):
        title = wkpage.title
        response = requests.get(WIKI_REQUEST+title)
        json_data = json.loads(response.text)
-        img_link = list(json_data['query']['pages'].values())[0]['original']['source']
+        img_link = list(json_data['query']['pages'].values())[
            0]['original']['source']
        return img_link
    except:
        print('[!] No match for '+search_term+' on WikiPedia...')
        return None
 def graphAddAuthors(G, books, darkMode=False):
    for author in getAllAuthors(books):
        G.add_node('a/'+author, color='green', t='author', label=author)
@ -695,6 +740,7 @@ def graphAddAuthors(G, books, darkMode=False):
            G.add_edge('a/'+author, book['id'], color=readColor(book))
    return G
 def graphAddRecommenders(G, books, darkMode=False):
    for rec in getAllRecommenders(books):
        G.add_node('r/'+rec, color='orange', t='recommender', label=rec)
@ -703,6 +749,7 @@ def graphAddRecommenders(G, books, darkMode=False):
            G.add_edge('r/'+rec, book['id'], color=readColor(book))
    return G
 def graphAddTopLists(G, books, darkMode=False):
    for tl in getAllTopLists(books):
        G.add_node('t/'+tl, color='yellow', t='topList', label=tl)
@ -715,7 +762,8 @@ def graphAddTopLists(G, books, darkMode=False):
 def graphAddSeries(G, books, darkMode=False):
    for series in getAllSeries(books):
-        G.add_node('s/'+series, color='red', t='series', label=series, shape='triangle')
+        G.add_node('s/'+series, color='red', t='series',
                   label=series, shape='triangle')
    for book in books:
        if 'series' in book:
            G.add_edge('s/'+book['series'], book['id'], color=readColor(book))
@ -724,7 +772,8 @@ def graphAddSeries(G, books, darkMode=False):
 def graphAddTags(G, books, darkMode=False):
    for tag in getAllTags(books):
-        G.add_node('t/'+tag, color=['lightGray','darkgray'][darkMode], t='tag', label=tag, shape='box')
+        G.add_node('t/'+tag, color=['lightGray', 'darkgray']
                   [darkMode], t='tag', label=tag, shape='box')
    for book in books:
        for tag in getTags(book):
            G.add_edge('t/'+tag, book['id'], color=readColor(book))
@ -770,7 +819,8 @@ def addScoreToLabels(G):
                node['label'] += " ("+str(node['rating'])+")"
            else:
                if 'score' in node and node['score'] != None and 'se' in node:
-                    node['label'] += " ({:.2f}±{:.1f})".format(node['score'], node['se'])
+                    node['label'] += " ({:.2f}±{:.1f})".format(
                        node['score'], node['se'])
                else:
                    node['label'] += " (0±∞)"
@ -870,6 +920,7 @@ def genAndShow3D(G, darkMode=False):
    fig.show()
 def buildFullGraph(darkMode=False):
    books = loadBooksFromDB()
    G = buildBookGraph(books, darkMode=darkMode)
@ -890,6 +941,7 @@ def genScores(G, books, calcPagerank=True):
    scoreUnread(G, globMu, globStd)
    return globMu, globStd
 def addImageToNode(node, cache, shape='circularImage'):
    name = node['label'].split(' (')[0].replace('*', '')
    if not name in cache or (cache[name] == False and random.random() < 0.05):
@ -906,6 +958,7 @@ def addImageToNode(node, cache, shape='circularImage'):
        node['image'] = img
        node['shape'] = shape
 def addImagesToNodes(G):
    try:
        with open('.imgLinkCache.json', 'r') as cf:
@ -915,10 +968,12 @@ def addImagesToNodes(G):
    for n in list(G.nodes):
        node = G.nodes[n]
        if node['t'] in ['recommender', 'author']:
-            addImageToNode(node, cache, ['circularImage','image'][node['t']=='author'])
+            addImageToNode(
                node, cache, ['circularImage', 'image'][node['t'] == 'author'])
    with open('.imgLinkCache.json', 'w') as cf:
        cf.write(json.dumps(cache))
 def recommendNBooksRecommenderBased(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True):
    removeRestOfSeries(G)
    removeBad(G, mu-std*2-1)
@ -976,6 +1031,7 @@ def recommendNBooksTagBased(G, mu, std, n, removeTopListsB=True):
    scaleOpinionsByRating(G)
    addScoreToLabels(G)
 def recommendNBooks(G, mu, std, n, removeTopListsB=True, removeUselessRecommenders=True, v3d=False):
    removeRestOfSeries(G)
    removeBad(G, mu-std-0.5)
@ -1035,6 +1091,7 @@ def fullGraph(G, removeTopListsB=True):
    scaleOpinionsByRating(G)
    addScoreToLabels(G)
 def recommenderCompetence(G):
    # removeRead(G)
    removeUnread(G)
@ -1060,6 +1117,7 @@ def recommenderCompetence(G):
                    node['score'] = 0
                node['score'] /= 2
 def readBooksAnalysis(G, minRating=0, showAllTags=True, removeUnconnected=False, removeTopListsB=True):
    removeUnread(G)
    removeBad(G, minRating)
@ -1075,6 +1133,7 @@ def readBooksAnalysis(G, minRating=0, showAllTags=True, removeUnconnected=False,
    scaleOpinionsByRating(G)
    addScoreToLabels(G)
 def progress(G, books, mu, minimum=3.5):
    findNewBooks(G, books, mu, -1, minRecSco=minimum)
    bookCount = 0
@ -1138,6 +1197,7 @@ def analyze(G, books, mu, type_name, name, dist=2.1):
    addScoreToLabels(G)
    match['label'] = "*"+match['label']+"*"
 def waveFlow(G, node, n, dist, menge, firstEdge=False):
    if dist <= 0:
        return
@ -1167,7 +1227,8 @@ def waveFlow(G, node, n, dist, menge, firstEdge=False):
                book['score'] = 0
                bestlist.append(book)
    bestlist.sort(key=lambda node: node['score'], reverse=True)
-    toKeep = min(int(dist*10), math.ceil(len(bestlist) * dist - len(keeplist)*0.5))
+    toKeep = min(int(dist*10), math.ceil(len(bestlist)
                 * dist - len(keeplist)*0.5))
    if toKeep <= 0:
        keeplist.sort(key=lambda node: node['rating'], reverse=True)
        keeplist = keeplist[:min(int(dist*10), int(len(keeplist) * dist))]
@ -1180,6 +1241,7 @@ def waveFlow(G, node, n, dist, menge, firstEdge=False):
        if node in bestlist or node in keeplist:
            waveFlow(G, node, m, dist, menge, firstEdge=firstEdge)
 def gensimTokensForLines(lines):
    for i, line in enumerate(lines):
        tokens = gensim.utils.simple_preprocess(line)
@ -1189,6 +1251,7 @@ def gensimTokensForLines(lines):
            # For training data, add tags
            yield gensim.models.doc2vec.TaggedDocument(tokens, [i])
 def buildDoc2Vec(books):
    import gensim
    for n in list(G.nodes):
@ -1197,10 +1260,12 @@ def buildDoc2Vec(books):
            pass
    gensimTokensForLines(lines)
 def shell(G, books, mu, std):
    from ptpython.repl import embed
    embed(globals(), locals())
 def newBooks(G, books, num, mu, std):
    removeBad(G, mu-std*2)
    findNewBooks(G, books, mu, num, minRecSco=mu-std)
@ -1226,21 +1291,29 @@ def findNewBooks(G, books, mu, num=-1, minRecSco=5):
        if node['t'] == 'recommender' and 'score' in node:
            oldBooks = []
            newBooks = []
-            recBooks = mrbdf[mrbdf['recommender'].str.contains(node['label'])].to_dict(orient='records')
+            recBooks = mrbdf[mrbdf['recommender'].str.contains(
                node['label'])].to_dict(orient='records')
            for book in recBooks:
                if book['title'] in [b['title'] for b in books]:
-                    oldBooks.append({'title': book['title'], 'author': book['author']})
+                    oldBooks.append(
                        {'title': book['title'], 'author': book['author']})
                else:
-                    newBooks.append({'title': book['title'], 'author': book['author']})
+                    newBooks.append(
-            recs.append({'name': node['label'], 'rec': node, 'newBooks': newBooks, 'oldBooks': oldBooks})
+                        {'title': book['title'], 'author': book['author']})
            recs.append({'name': node['label'], 'rec': node,
                        'newBooks': newBooks, 'oldBooks': oldBooks})
    for rec in recs:
        for book in rec['newBooks']:
-            G.add_node('n/'+book['title'], color='blue', t='newBook', label=book['title'], author=book['author'])
+            G.add_node('n/'+book['title'], color='blue', t='newBook',
                       label=book['title'], author=book['author'])
-            G.add_node('r/'+rec['rec']['label'], color='orange', t='recommender', label=rec['rec']['label'], score=rec['rec']['score'])
+            G.add_node('r/'+rec['rec']['label'], color='orange', t='recommender',
-            G.add_edge('r/'+rec['rec']['label'], 'n/'+book['title'], color='blue')
+                       label=rec['rec']['label'], score=rec['rec']['score'])
            G.add_edge('r/'+rec['rec']['label'], 'n/' +
                       book['title'], color='blue')
-            G.add_node('a/'+book['author'], color='green', t='author', label=book['author'])
+            G.add_node('a/'+book['author'], color='green',
                       t='author', label=book['author'])
            G.add_edge('a/'+book['author'], 'n/'+book['title'], color='blue')
    for n in list(G.nodes):
        node = G.nodes[n]
@ -1257,12 +1330,16 @@ def findNewBooks(G, books, mu, num=-1, minRecSco=5):
            else:
                ses.append(min(ses))
                scores.append(mu)
-                node['fake_se'] = sum(ses)/(len(ses)**1.2) + 0.5 + 0.5 * (len(scores)==2) # This is not how SE works. DILLIGAF?
+                # This is not how SE works. DILLIGAF?
-                node['score'] = sum(scores)/len(scores)*1.2 - node['fake_se']*1.6 + 0.5 - 0.1/math.sqrt(len(scores))
+                node['fake_se'] = sum(ses)/(len(ses)**1.2) + \
                    0.5 + 0.5 * (len(scores) == 2)
                node['score'] = sum(
                    scores)/len(scores)*1.2 - node['fake_se']*1.6 + 0.5 - 0.1/math.sqrt(len(scores))
                if len(scores) == 2:
                    node['score'] *= 0.80
                node['value'] = 20 + 5 * float(node['score'])
-                node['label'] += " ({:.2f}±{:.1f})".format(node['score'], node['fake_se'])
+                node['label'] += " ({:.2f}±{:.1f})".format(node['score'],
                                                           node['fake_se'])
                node['label'] += '\n ' + node['author']
    if num != -1:
        removeKeepBest(G, num, 10, 'newBook')
@ -1270,6 +1347,8 @@ def findNewBooks(G, books, mu, num=-1, minRecSco=5):
 # while batchSize is implemented, we only get a good gonvergence when we disable it (batchSize=-1)
 # but might be necessary to enable later for a larger libary for better training performance...
 # maybe try again for 128 books?
 def evaluateFitness(books, batchSize=-1, debugPrint=False):
    global weights
    G = buildBookGraph(books)
@ -1280,7 +1359,8 @@ def evaluateFitness(books, batchSize=-1, debugPrint=False):
    graphAddTags(G, books)
    runPagerank(G)
-    ratedBooks = [n for n in list(G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None]
+    ratedBooks = [n for n in list(
        G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None]
    boundsLoss = 0
    linSepLoss = []
    errSq = []
@ -1289,7 +1369,8 @@ def evaluateFitness(books, batchSize=-1, debugPrint=False):
        gradient[wt] = 0
    mu, sigma = genScores(G, books)
    for b in G.nodes:
-        batch = random.sample(ratedBooks, batchSize) if batchSize!=-1 and len(ratedBooks) > batchSize else ratedBooks
+        batch = random.sample(ratedBooks, batchSize) if batchSize != - \
            1 and len(ratedBooks) > batchSize else ratedBooks
        if b in batch:
            rating = G.nodes[b]['rating']
            G.nodes[b]['rating'] = None
@ -1300,9 +1381,12 @@ def evaluateFitness(books, batchSize=-1, debugPrint=False):
                errSq.append((rating - G.nodes[b]['score'])**2)
            G.nodes[b]['rating'] = rating
            for wt in weights:
-                scoreB = sum([a*(1.001 if wt==w[0] else 1)*weights[w[0]]*(w[1] if len(w)>1 else 1) for a,w in zip(G.nodes[b]['_act'], G.nodes[b]['_wgh'])])/sum([(1.001 if wt==w[0] else 1)*weights[w[0]]*(w[1] if len(w)>1 else 1) for w in G.nodes[b]['_wgh']])
+                scoreB = sum([a*(1.001 if wt == w[0] else 1)*weights[w[0]]*(w[1] if len(w) > 1 else 1) for a, w in zip(G.nodes[b]['_act'],
-                gradient[wt] += ((rating - G.nodes[b]['score'])**2 - (rating - scoreB)**2)*1000
+                             G.nodes[b]['_wgh'])])/sum([(1.001 if wt == w[0] else 1)*weights[w[0]]*(w[1] if len(w) > 1 else 1) for w in G.nodes[b]['_wgh']])
-    regressionLoss = sum([max(0,abs(w)-1)**2 for w in weights.values()]) # no punishment if w within -1 and 1
+                gradient[wt] += ((rating - G.nodes[b]['score'])
                                 ** 2 - (rating - scoreB)**2)*1000
    # no punishment if w within -1 and 1
    regressionLoss = sum([max(0, abs(w)-1)**2 for w in weights.values()])
    for wt in weights:
        if abs(weights[wt]) > 1.0:
            gradient[wt] -= weights[wt]*10
@ -1315,6 +1399,55 @@ def evaluateFitness(books, batchSize=-1, debugPrint=False):
    fit = sum(errSq)/len(errSq) + 0.001*regressionLoss
    return fit, gradient
 def calcDissonance(books):
    global weights
    G = buildBookGraph(books)
    graphAddAuthors(G, books)
    graphAddRecommenders(G, books)
    graphAddTopLists(G, books)
    graphAddSeries(G, books)
    graphAddTags(G, books)
    runPagerank(G)
    ratedBooks = [n for n in list(
        G.nodes) if 'rating' in G.nodes[n] and G.nodes[n]['rating'] != None]
    errSq = []
    gradient = {}
    for wt in weights:
        gradient[wt] = 0
    mu, sigma = genScores(G, books)
    for b in G.nodes:
        batch = ratedBooks
        if b in batch:
            rating = G.nodes[b]['rating']
            G.nodes[b]['rating'] = None
            _, _ = genScores(G, books, calcPagerank=False)
            G.nodes[b]['_test_score'] = G.nodes[b]['score']
            G.nodes[b]['rating'] = rating
            G.nodes[b]['dissonance_off'] = rating - G.nodes[b]['score']
            G.nodes[b]['dissonance_abs'] = abs(rating - G.nodes[b]['score'])
    return G
 def describeDissonance(books, num=-1, sortKey='dissonance_abs', sortDir=True):
    bestlist = []
    G = calcDissonance(books)
    for n in list(G.nodes):
        node = G.nodes[n]
        if'dissonance_abs' in node:
            bestlist.append(node)
    bestlist.sort(key=lambda node: node[sortKey], reverse=sortDir)
    for i, book in enumerate(bestlist):
        line = book['title'] + " ("+" & ".join(book['authors'])+")" + \
            ": You: {:.5f}, AI: {:.5f}, Delta: {:.5f}".format(
                book['rating'], book['_test_score'], book['dissonance_off'])
        print("["+str(i+1).zfill(int((math.log10(num) if num != -1 else 3)+1))+"] "+line)
        if num != -1 and i == num-1:
            break
 def train(initGamma, full=True):
    global weights
    if full:
@ -1367,18 +1500,22 @@ def train(initGamma, full=True):
                break
    print('Done.')
 def saveWeights(weights):
    with open('neuralWeights.json', 'w') as f:
        f.write(json.dumps(weights))
 def loadWeights():
    try:
        with open('neuralWeights.json', 'r') as f:
            weights = json.loads(f.read())
    except IOError:
-        weights = {"topList": 0.15, "recommender": 0.30, "author": 0.70, "series": 0.05, "tag": 0.05, "pagerank": 0.05, "mu": 0.50, "sigma": 0.30, "bias": 0.25, "median": 0.10} #, "tgb_rank": 0.10}
+        weights = {"topList": 0.15, "recommender": 0.30, "author": 0.70, "series": 0.05, "tag": 0.05,
                   "pagerank": 0.05, "mu": 0.50, "sigma": 0.30, "bias": 0.25, "median": 0.10}  # , "tgb_rank": 0.10}
    return weights
 def cliInterface(imgDef=False):
    import argparse
@ -1403,13 +1540,16 @@ def cliInterface(imgDef=False):
    cmds = parser.add_subparsers(required=True, dest='cmd')
    p_rec = cmds.add_parser('recommend', description="TODO", aliases=['rec'])
-    p_rec.add_argument('-n', type=int, default=20, help='number of books to recommend')
+    p_rec.add_argument('-n', type=int, default=20,
                       help='number of books to recommend')
    p_rec.add_argument('--tag-based', action="store_true")
    p_rec.add_argument('--recommender-based', action="store_true")
-    p_rec.add_argument('--new', type=int, default=-1, help='number of new books to recommend')
+    p_rec.add_argument('--new', type=int, default=-1,
                       help='number of new books to recommend')
    p_rec = cmds.add_parser('listScores', description="TODO", aliases=['ls'])
-    p_rec.add_argument('-n', type=int, default=50, help='number of books to recommend')
+    p_rec.add_argument('-n', type=int, default=50,
                       help='number of books to recommend')
    p_read = cmds.add_parser('read', description="TODO", aliases=[])
    p_read.add_argument('--min-rating', type=int, default=0)
@ -1417,27 +1557,40 @@ def cliInterface(imgDef=False):
    p_read.add_argument('--only-connected', action="store_true")
    p_show = cmds.add_parser('analyze', description="TODO", aliases=[])
-    p_show.add_argument('type', choices=['any', 'book', 'recommender', 'author', 'series', 'tag'])
+    p_show.add_argument(
        'type', choices=['any', 'book', 'recommender', 'author', 'series', 'tag'])
    p_show.add_argument('name', type=str)
-    p_show.add_argument('-d', type=float, default=2.1, help='depth of expansion')
+    p_show.add_argument('-d', type=float, default=2.1,
                        help='depth of expansion')
    p_train = cmds.add_parser('train', description="TODO", aliases=[])
-    p_train.add_argument('-g', type=float, default=0.2, help='learning rate gamma')
+    p_train.add_argument('-g', type=float, default=0.2,
                         help='learning rate gamma')
    p_train.add_argument('--full', action="store_true")
    p_prog = cmds.add_parser('progress', description="TODO", aliases=[])
-    p_prog.add_argument('-m', type=float, default=7, help='Mimimum Score to read')
+    p_prog.add_argument('-m', type=float, default=7,
                        help='Mimimum Score to read')
    p_comp = cmds.add_parser('competence', description="TODO", aliases=[])
    p_shell = cmds.add_parser('shell', description="TODO", aliases=[])
    p_new = cmds.add_parser('newBooks', description="TODO", aliases=[])
-    p_new.add_argument('-n', type=int, default=10, help='number of books to recommend')
+    p_new.add_argument('-n', type=int, default=10,
                       help='number of books to recommend')
-    p_col = cmds.add_parser('calice', description="TODO", aliases=[])
+    p_cal = cmds.add_parser('calice', description="TODO", aliases=[])
-    p_createCol = cmds.add_parser('createCaliceColumn', description="TODO", aliases=[])
+    p_dis = cmds.add_parser('dissonance', description="TODO", aliases=['dis'])
    p_dis.add_argument('-n', type=int, default=-1,
                       help='Maximum number of books to lost')
    p_dis.add_argument(
        '--sort', choices=['dissonance_abs', 'dissonance_off', 'score'], default='dissonance_abs', const='dissonance_abs', nargs='?')
    p_dis.add_argument('--reversed', action="store_true")
    p_createCol = cmds.add_parser(
        'createCaliceColumn', description="TODO", aliases=[])
    p_createCol.add_argument('type', choices=['score', 'rating', 'both'])
    p_full = cmds.add_parser('full', description="TODO", aliases=[])
@ -1452,6 +1605,7 @@ def cliInterface(imgDef=False):
    else:
        mainCLI(args)
 def perfTestCLI(args):
    import time
    from pycallgraph import PyCallGraph
@ -1466,6 +1620,7 @@ def perfTestCLI(args):
    with PyCallGraph(output=GraphvizOutput(output_file='perfTests/' + str(int(time.time())) + '.png'), config=config):
        mainCLI(args)
 def mainCLI(args):
    if args.cmd == "train":
        train(args.g, args.full)
@ -1482,7 +1637,6 @@ def mainCLI(args):
    if not args.keep_whitepapers:
        removeWhitepapers(G)
    if args.cmd == "recommend":
        if args.new == -1:
            args.new = int(args.n / 5)
@ -1490,16 +1644,21 @@ def mainCLI(args):
            findNewBooks(G, books, mu, args.new, minRecSco=mu-std)
        if args.tag_based:
            if args.recommender_based:
-                raise Exception('tag-based and recommender-based can not be be combined')
+                raise Exception(
-            recommendNBooksTagBased(G, mu, std, args.n, not args.keep_top_lists)
+                    'tag-based and recommender-based can not be be combined')
            recommendNBooksTagBased(
                G, mu, std, args.n, not args.keep_top_lists)
        elif args.recommender_based:
-            recommendNBooksRecommenderBased(G, mu, std, args.n, not args.keep_top_lists, not args.keep_useless_recommenders)
+            recommendNBooksRecommenderBased(
                G, mu, std, args.n, not args.keep_top_lists, not args.keep_useless_recommenders)
        else:
-            recommendNBooks(G, mu, std, args.n, not args.keep_top_lists, not args.keep_useless_recommenders, args.v3d)
+            recommendNBooks(G, mu, std, args.n, not args.keep_top_lists,
                            not args.keep_useless_recommenders, args.v3d)
    elif args.cmd == "listScores":
        listScores(G, mu, std, args.n)
    elif args.cmd == "read":
-        readBooksAnalysis(G, args.min_rating, args.all_tags, args.only_connected, not args.keep_top_lists)
+        readBooksAnalysis(G, args.min_rating, args.all_tags,
                          args.only_connected, not args.keep_top_lists)
    elif args.cmd == "analyze":
        analyze(G, books, mu, args.type, args.name, args.d)
    elif args.cmd == "full":
@ -1518,6 +1677,9 @@ def mainCLI(args):
    elif args.cmd == "calice":
        calice(G)
        exit()
    elif args.cmd == "dissonance":
        describeDissonance(books, args.n, args.sort, not args.reversed)
        exit()
    elif args.cmd == "createCaliceColumn":
        if args.type in ['score', 'both']:
            calibreDB.createCaliceScoreColumn()
@ -1525,12 +1687,12 @@ def mainCLI(args):
        if args.type in ['rating', 'both']:
            calibreDB.createCaliceRatingColumn()
            print('[*] Column "Calice Rating" was created.')
-            print('[i] To allow displaying half-stars, please active them manually in the calibre-settings.')
+            print(
                '[i] To allow displaying half-stars, please active them manually in the calibre-settings.')
        exit()
    else:
        raise Exception("Bad")
    if not args.keep_priv:
        removePriv(G)
    if args.remove_read: