cli autocomplete and defered imports

Fix curiosity tanh bug
Syntax Bug fixed
2023-01-17 23:18:26 +01:00 · 2022-11-30 17:44:12 +01:00 · 2022-11-19 16:35:04 +01:00 · 2022-11-19 16:28:06 +01:00 · 2022-11-19 16:18:58 +01:00 · 2022-09-11 18:56:47 +02:00
11 changed files with 8086 additions and 114 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,7 @@
 __pycache__
 *.html
 .venv
+neuralWeights.json
+neuralWeights.json.*
+.imgLinkCache.json
+.idea
--- a/README.md
+++ b/README.md
@ -1,4 +1,4 @@
 # CaliGraph
 CaliGraph connects to the database of your local Calibre-Instance in order to recommend which unread books are likely to be enjoyed. The recommendations are displayed in a graph which explains the recommendation by showing correlations to previously read books, authors, recommendations by same individuals, tags...

-![Screenshot](https://gitea.dominik.roth.ml/dodox/CaliGraph/raw/branch/master/Screenshot_README.png)
+![Screenshot](./Screenshot_README.png)
--- a/Screenshot_README.png
+++ b/Screenshot_README.png
--- a/caliGraph.py
+++ b/caliGraph.py
--- a/neuralWeights.json
+++ b/neuralWeights.json
@ -1 +0,0 @@
-{"topList": 0.8269026935523768, "recommender": 0.5687397308864482, "author": 0.9602300111040548, "series": 0.0015315822252870478, "tag": 0.0028789924118737056, "mu": 0.48730278196967397, "sigma": 0.02394878388858184, "se": 0.45207554705083647, "bias": 0.555294008129175}
--- a/perfTests/1645539905.png
+++ b/perfTests/1645539905.png
--- a/rec_dbs/mrb_db.csv
+++ b/rec_dbs/mrb_db.csv
--- a/rec_dbs/tgb_1.csv
+++ b/rec_dbs/tgb_1.csv
--- a/rec_dbs/tgb_2.csv
+++ b/rec_dbs/tgb_2.csv
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,14 @@
+numpy
+scipy
+matplotlib
+networkx
+pyvis
+fuzzywuzzy
+rake_nltk
+ptpython
+requests
+pandas
+plotly
+wikipedia
+argcomplete
+pyzshcomplete
--- a/zSearch.py
+++ b/zSearch.py
@ -0,0 +1,155 @@
+import requests
+from bs4 import BeautifulSoup
+
+class Search:
+
+    def __init__(self):
+        self.searchType = "general"
+        self.searchOptions = {"e": "", "yearFrom": "", \
+                            "yearTo": "", "language": "", \
+                            "extension": "", "page":"1", \
+                            "order":""}
+        self.searchResults = "books"
+        self.input = ""
+
+    def nextPage(self):
+        '''Goes to next page'''
+        self.searchOptions["page"] = str(int(self.searchOptions["page"])+1)
+
+    def previousPage(self):
+        '''Goes to previous page'''
+        if self.searchOptions["page"] != "1":
+            self.searchOptions["page"] = str(int(self.searchOptions["page"])-1)
+        else:
+            return 1
+
+    def updateSearchOptions(self):
+        '''Depending on searchType and searchResults, updates appropriately
+        searchOptions
+        '''
+        if self.searchType == "general":
+            self.searchOptions.pop("matchPhrase", None)
+        else: 
+            self.searchOptions["matchPhrase"] = ""
+    
+        if self.searchResults == "books":
+            self.searchOptions["language"] = ""
+            self.searchOptions["extension"] = ""
+        else:
+            self.searchOptions.pop("language", None)
+            self.searchOptions.pop("extension", None)
+            
+    def executeSearch(self):
+        '''Executes get request and returns response'''
+        if self.searchResults == "books":
+            url = "http://b-ok.cc"
+        else:
+            url = "http://booksc.xyz"
+
+        r = requests.get(url+"/s/"+self.input, params=self.searchOptions)
+        return r
+
+
+    def getResults(self):
+        '''Fetches search results. Returns a list of books
+        '''
+        r = self.executeSearch()
+        soup = BeautifulSoup(r.text, "html.parser")
+        match = soup.find_all(class_="resItemBox")
+        counter = 1
+        results=[]
+        for e in match:
+            title = e.find(itemprop="name")
+            author = e.find(class_="authors")
+            year = e.find(class_="bookProperty property_year")
+            language = e.find(class_="bookProperty property_language")
+            fileInfo = e.find(class_="bookProperty property__file")
+            link = e.find("a", href=True)
+            link = link["href"]
+
+            if self.searchResults == "books":
+                fullLink = "https://b-ok.cc" + link
+            else:
+                fullLink = "booksc.xyz" + link
+            
+            title = isNone(title)
+            author = isNone(author)
+            year = isNone(year)
+            language = isNone(language)
+            fileInfo = isNone(fileInfo)
+            
+            book = Book(title,author,year,language,fileInfo,fullLink)
+            results += [book]
+        
+        return results
+
+    def reset(self):
+        '''Clears the search and resets to default options'''
+        self.searchType = "general"
+        self.searchOptions = {"e": "", "yearFrom": "", \
+                            "yearTo": "", "language": "", \
+                            "extension": "", "page":"1", \
+                            "order":""}
+        self.searchResults = "books"
+        self.input = ""
+        
+
+def isNone(e):
+    if e != None:
+        return "".join(e.text.splitlines())
+    else:
+        return ""
+
+class Book:
+    
+    def __init__(self,title,author,year,language,fileInfo,link):
+        self.title = title
+        self.author = author
+        self.year = year
+        self.language = language
+        self.fType, self.size = fileInfo.split(', ')
+        self.link = link
+        self.dlLink = None
+        self.page = None
+
+    def __repr__(self):
+        return '<zBook: '+str(self)+'>'
+
+    def __str__(self):
+        return " / ".join([self.title,self.author,self.year,\
+                self.language,self.fType,self.size])
+
+    def getDetails(self):
+        '''Returns more specific info about the book. The info is retrieved by the
+        link attribute
+        '''
+        if self.page == None:
+            self.page = requests.get(self.link).text
+        
+        soup = BeautifulSoup(self.page, "html.parser")
+        # for some reason, bookProperty also shows properties from other books
+        # the line below prevents this
+        soup = soup.find(class_="row cardBooks")
+        match = soup.find_all(class_="bookProperty")
+        
+        results = ""
+        for e in match:
+            results += "".join(e.text.splitlines())
+            results += "\n"
+
+        # this makes writing the category easier for some books
+        results = results.replace("\\\\", " \\ ")
+        return results
+    
+    def getDownloadURL(self):
+        if self.dlLink == None:
+            if self.page == None:
+                self.page = requests.get(self.link).text
+            soup = BeautifulSoup(self.page, "html.parser")
+            self.dlLink = soup.find('a', 'btn btn-primary dlButton addDownloadedBook')['href']
+        return 'https://b-ok.cc' + self.dlLink + '?dsource=recommend'
+
+    def saveBook(self, path):
+        r = requests.get(self.getDownloadURL(), allow_redirects=True)
+        with open(path, 'wb') as f:
+            f.write(r.content)
Author	SHA1	Message	Date
Dominik Roth	4580f67920	cli autocomplete and defered imports	2023-01-17 23:18:26 +01:00
Dominik Roth	2f2da60626	Fix curiosity tanh bug	2022-11-30 17:44:12 +01:00
Dominik Roth	73c7e5e6c2	Syntax Bug fixed	2022-11-19 16:35:04 +01:00
Dominik Roth	e45423aaa4	Dont show score for tags	2022-11-19 16:28:06 +01:00
Dominik Roth	9a473edfdc	Allow configurable curiosity and bug fixes	2022-11-19 16:18:58 +01:00
Dominik Roth	1c34d2876f	New feature: Dissonance	2022-09-11 18:56:47 +02:00
Dominik Roth	6af38c686f	Updated requirements.txt	2022-07-01 12:19:41 +02:00
Dominik Roth	5ef60d340e	Made README domain independent	2022-06-19 15:18:40 +02:00
Dominik Roth	29b5959623	Added .idea to .gitignore	2022-06-16 20:31:13 +02:00
Dominik Roth	4f116bc8bf	Remove calice-score from read books	2022-03-20 18:11:56 +01:00
Dominik Roth	6ebe7d03fc	Reduced Calice Score to only 6 digits	2022-03-19 12:21:48 +01:00
Dominik Roth	8e8592bb29	Added Calice Score; Renamed Calice to Calice Rating	2022-03-19 12:18:14 +01:00
Dominik Roth	f9c70a8ee4	Fixed bugs in createCaliceColumn	2022-03-19 11:42:27 +01:00
Dominik Roth	0f35ae691e	Info about half-stars added	2022-03-19 11:39:10 +01:00
Dominik Roth	9193e6b3e6	New command to add calice-column (no half-stars yet)	2022-03-19 11:37:24 +01:00
Dominik Roth	63895953c0	Allow inserting scores into calibreDB	2022-03-19 11:35:30 +01:00
Dominik Roth	ac6d85fa99	Enabled Imgs per default; Chrome recommendet	2022-03-08 14:21:36 +01:00
Dominik Roth	44c0c189d7	Better recommender scores	2022-03-07 13:51:26 +01:00
Dominik Roth	b75ede5d89	Added a default toggle for --imgs / --no-imgs	2022-03-07 13:26:29 +01:00
Dominik Roth	84deaa2f64	Better training and reenabled median	2022-03-07 13:21:16 +01:00
Dominik Roth	26527e83c3	Ignore all neuralWeights endings	2022-02-25 00:46:44 +01:00
Dominik Roth	2642423289	Revert "implemented neuralBins (performance is bad...)" This reverts commit `bd53a83058`.	2022-02-24 20:19:00 +01:00
Dominik Roth	53a7b07c06	Merged perfTests and fixed bugs	2022-02-22 15:26:04 +01:00
Dominik Roth	20afb205e3	Added perf-testing	2022-02-22 15:02:48 +01:00
Dominik Roth	bd53a83058	implemented neuralBins (performance is bad...)	2022-02-22 10:37:16 +01:00
Dominik Roth	880cb6ba7e	Added tgb (but disabled, because adds no accuracy)	2022-02-15 19:54:14 +01:00
Dominik Roth	0529172af2	Fixed MRB bug	2022-02-15 19:35:03 +01:00
Dominik Roth	3a14e32e58	Tiny change	2022-02-11 18:16:43 +01:00
Dominik Roth	07da41f1f5	Allow a little more recommenders	2022-02-11 18:12:49 +01:00
Dominik Roth	87433cc97f	Tiny change in recommender	2022-02-11 18:04:47 +01:00
Dominik Roth	05a51cd5e6	Better newBooks	2022-02-11 17:50:07 +01:00
Dominik Roth	bcec24fbf7	'analyze' also finds newBooks	2022-02-11 17:37:23 +01:00
Dominik Roth	08fb19c6b9	Fixed Bug (getting images for analyze-center)	2022-02-11 14:07:51 +01:00
Dominik Roth	e8871f823e	Also cache, if a wikipage does not exist	2022-02-11 12:28:03 +01:00
Dominik Roth	558b9d4eba	No more neuralWeights in the repo	2022-02-11 12:17:01 +01:00
Dominik Roth	5e6dc9ffe2	Lookup images for authors and recommenders on WikiPedia	2022-02-11 12:14:24 +01:00
Dominik Roth	f5c3077cb4	Updated Image in Readme	2022-02-10 18:18:36 +01:00
Dominik Roth	cbca64aaf3	Newly trained net	2022-02-10 18:18:24 +01:00
Dominik Roth	6d502af029	Tweaking Recommendation-Visualization	2022-02-10 18:18:05 +01:00
Dominik Roth	685d4b2ccd	Playing with adding z-Libary binding (download gets blocked by server)	2022-02-10 15:16:44 +01:00
Dominik Roth	e9eae631b4	Newly trained net	2022-02-10 15:16:27 +01:00
Dominik Roth	0486bf1384	Newly trained net	2022-02-07 19:57:14 +01:00
Dominik Roth	93230c4285	Simpler and faster training	2022-02-07 19:57:03 +01:00
Dominik Roth	dfa2f89f29	Allow analyzing tags	2022-02-06 22:30:44 +01:00
Dominik Roth	d6dda44ae8	Improved HTML rendering (no scrolling in Fullscreen)	2022-02-06 22:17:23 +01:00
Dominik Roth	46fe3390b0	Less pre-filter for newBook	2022-02-06 21:45:08 +01:00
Dominik Roth	5f812a6b85	Lower default dist for Analyze	2022-02-06 21:38:41 +01:00
Dominik Roth	ab97f8e390	Prune Recommender on Analyze	2022-02-06 21:35:00 +01:00
Dominik Roth	7c1585120e	Removed Debug Print	2022-02-06 21:31:02 +01:00
Dominik Roth	51f80e0867	Display author for new books	2022-02-06 19:10:21 +01:00
Dominik Roth	4d3925b193	Newly trained net	2022-02-06 18:57:05 +01:00
Dominik Roth	e599a851cf	Smol Tweaks (for newBooks)	2022-02-06 18:56:53 +01:00
Dominik Roth	9373e66e29	Removed debug print	2022-02-06 18:38:07 +01:00
Dominik Roth	1a82cac16f	smoller bug fix	2022-02-06 18:30:55 +01:00
Dominik Roth	936558779b	smol bug fix	2022-02-06 18:28:35 +01:00
Dominik Roth	3032dea8a0	Better rankings-list in cli and newly trained net	2022-02-06 18:22:46 +01:00
Dominik Roth	a2d747e41e	Integrated MRB-DB and added new-book-finder (from MRB)	2022-02-06 17:59:21 +01:00
Dominik Roth	7c168f3532	Newly trained net	2022-02-05 22:08:09 +01:00
Dominik Roth	92d69cf7cb	Newly trained net	2022-02-04 20:43:13 +01:00
Dominik Roth	5a229a4b2b	Approximate Pagerank while training (+performance) and implemented batch-training (disabled do to non-convergence)	2022-02-04 20:34:59 +01:00
Dominik Roth	aa2e5a41bb	Fixed 'progress' command	2022-02-04 17:20:46 +01:00
Dominik Roth	9529cf2970	Newly trained net	2022-02-03 15:24:19 +01:00
Dominik Roth	b854b38349	Fixed tiny bug when training with --full	2022-02-03 15:24:06 +01:00
Dominik Roth	54e9f0e8fc	Implemented median; disabled median & se (low utility)	2022-02-03 15:10:26 +01:00
Dominik Roth	b45561ceca	Newly trained net	2022-02-01 12:22:46 +01:00
Dominik Roth	bd17ac4906	Better Recommendation Generation	2022-02-01 12:22:13 +01:00
Dominik Roth	6e64e76310	Better error-message on pagerank ev-iter fail	2022-01-31 14:02:00 +01:00
Dominik Roth	3c0f1b18b4	Newly trained net	2022-01-31 13:45:44 +01:00
Dominik Roth	33ba27e2d0	Implemented Pagerank	2022-01-31 13:45:26 +01:00
Dominik Roth	96258ae19b	Newly trained	2022-01-27 18:20:01 +01:00
Dominik Roth	b729bb61c4	Typo in gitignore	2022-01-24 16:15:51 +01:00
Dominik Roth	9ce65f52f1	Merge branch 'master' of dominik.roth.ml:dodox/CaliGraph	2022-01-24 16:15:32 +01:00
Dominik Roth	f3bc194488	Trying to make to ignore the neuralWeights file	2022-01-24 16:15:27 +01:00
Dominik Roth	9bcd35a48d	Trying to make to ignore the neuralWeights file	2022-01-24 16:14:51 +01:00
Dominik Roth	496a733cb2	Revert "Delete 'neuralWeights.json'" This reverts commit `d5f9a0b1b4`.	2022-01-23 22:44:59 +01:00
Dominik Roth	7d5a3ad2c5	Merge branch 'master' of dominik.roth.ml:dodox/CaliGraph	2022-01-23 22:43:10 +01:00
Dominik Roth	f1021fd6a0	New gitignore	2022-01-23 22:42:43 +01:00
Dominik Moritz Roth	d5f9a0b1b4	Delete 'neuralWeights.json'	2022-01-23 22:39:49 +01:00
Dominik Roth	077f850808	No more nets in the repo	2022-01-23 22:38:03 +01:00
Dominik Roth	c13a500420	Newly trained net	2022-01-22 18:13:46 +01:00
Dominik Roth	e5d2ca532c	Shorter Training	2022-01-22 18:12:57 +01:00
Dominik Roth	efc309c962	Newly trained net	2022-01-22 17:51:03 +01:00
Dominik Roth	a6d9a75030	Sanitize description of books before rake	2021-12-18 18:09:45 +01:00
Dominik Roth	7af5109e7f	Newly trained net	2021-12-11 14:02:08 +01:00
Dominik Roth	c9c2d75377	Updated requirements.txt	2021-12-11 14:00:48 +01:00
Dominik Roth	199fab7875	New shell-command, nltk for keyword extraction from description	2021-12-11 13:58:01 +01:00
Dominik Roth	36baf1aaec	Bug: Typo in train	2021-12-11 11:54:25 +01:00
Dominik Roth	81fa6ca4d6	Faster training (earlier stopping)	2021-12-11 11:52:49 +01:00
Dominik Roth	1f5dea6aff	Added a requirements.txt	2021-12-08 11:54:27 +01:00
Dominik Roth	16cc68dfed	chmod +x (again)	2021-12-05 19:57:05 +01:00
Dominik Roth	da9569fd4c	Added 'competence' command (displays recommenders and justifications of their scores)	2021-12-05 19:56:26 +01:00
Dominik Roth	aa95a9b16b	chmod +x	2021-12-05 19:54:03 +01:00
Dominik Roth	39930d1233	kp	2021-12-05 19:53:30 +01:00
Dominik Roth	f2fad859dc	Added a progress-command (and newly trained weights)	2021-11-24 22:35:39 +01:00
Dominik Roth	ef7fceacea	Better 'Gradientenabstieg'	2021-11-23 20:51:24 +01:00
Dominik Roth	5f7366e556	Newly trained net	2021-11-07 11:41:01 +01:00
Dominik Roth	7f51f139f2	Actually, we will also train on whitepapers (because otherwise I would have to fix an ugly bug)	2021-10-17 15:52:52 +02:00
Dominik Roth	ce99e5301b	Smashed bug with argument-parsing	2021-10-17 15:51:26 +02:00
Dominik Roth	8778cfdae6	Also dont train on whitepapers	2021-10-17 15:50:33 +02:00
Dominik Roth	3588587c92	Earlier removal of whitepapers (dont extrapolate from them)	2021-10-17 15:48:44 +02:00
Dominik Roth	65e8948202	Filter out the whitepapers I have in my libary	2021-10-17 15:47:37 +02:00
				`@ -1 +0,0 @@`
				`{"topList": 0.8269026935523768, "recommender": 0.5687397308864482, "author": 0.9602300111040548, "series": 0.0015315822252870478, "tag": 0.0028789924118737056, "mu": 0.48730278196967397, "sigma": 0.02394878388858184, "se": 0.45207554705083647, "bias": 0.555294008129175}`