Compare commits

...

101 Commits

Author SHA1 Message Date
4580f67920 cli autocomplete and defered imports 2023-01-17 23:18:26 +01:00
2f2da60626 Fix curiosity tanh bug 2022-11-30 17:44:12 +01:00
73c7e5e6c2 Syntax Bug fixed 2022-11-19 16:35:04 +01:00
e45423aaa4 Dont show score for tags 2022-11-19 16:28:06 +01:00
9a473edfdc Allow configurable curiosity and bug fixes 2022-11-19 16:18:58 +01:00
1c34d2876f New feature: Dissonance 2022-09-11 18:56:47 +02:00
6af38c686f Updated requirements.txt 2022-07-01 12:19:41 +02:00
5ef60d340e Made README domain independent 2022-06-19 15:18:40 +02:00
29b5959623 Added .idea to .gitignore 2022-06-16 20:31:13 +02:00
4f116bc8bf Remove calice-score from read books 2022-03-20 18:11:56 +01:00
6ebe7d03fc Reduced Calice Score to only 6 digits 2022-03-19 12:21:48 +01:00
8e8592bb29 Added Calice Score; Renamed Calice to Calice Rating 2022-03-19 12:18:14 +01:00
f9c70a8ee4 Fixed bugs in createCaliceColumn 2022-03-19 11:42:27 +01:00
0f35ae691e Info about half-stars added 2022-03-19 11:39:10 +01:00
9193e6b3e6 New command to add calice-column (no half-stars yet) 2022-03-19 11:37:24 +01:00
63895953c0 Allow inserting scores into calibreDB 2022-03-19 11:35:30 +01:00
ac6d85fa99 Enabled Imgs per default; Chrome recommendet 2022-03-08 14:21:36 +01:00
44c0c189d7 Better recommender scores 2022-03-07 13:51:26 +01:00
b75ede5d89 Added a default toggle for --imgs / --no-imgs 2022-03-07 13:26:29 +01:00
84deaa2f64 Better training and reenabled median 2022-03-07 13:21:16 +01:00
26527e83c3 Ignore all neuralWeights endings 2022-02-25 00:46:44 +01:00
2642423289 Revert "implemented neuralBins (performance is bad...)"
This reverts commit bd53a83058.
2022-02-24 20:19:00 +01:00
53a7b07c06 Merged perfTests and fixed bugs 2022-02-22 15:26:04 +01:00
20afb205e3 Added perf-testing 2022-02-22 15:02:48 +01:00
bd53a83058 implemented neuralBins (performance is bad...) 2022-02-22 10:37:16 +01:00
880cb6ba7e Added tgb (but disabled, because adds no accuracy) 2022-02-15 19:54:14 +01:00
0529172af2 Fixed MRB bug 2022-02-15 19:35:03 +01:00
3a14e32e58 Tiny change 2022-02-11 18:16:43 +01:00
07da41f1f5 Allow a little more recommenders 2022-02-11 18:12:49 +01:00
87433cc97f Tiny change in recommender 2022-02-11 18:04:47 +01:00
05a51cd5e6 Better newBooks 2022-02-11 17:50:07 +01:00
bcec24fbf7 'analyze' also finds newBooks 2022-02-11 17:37:23 +01:00
08fb19c6b9 Fixed Bug (getting images for analyze-center) 2022-02-11 14:07:51 +01:00
e8871f823e Also cache, if a wikipage does not exist 2022-02-11 12:28:03 +01:00
558b9d4eba No more neuralWeights in the repo 2022-02-11 12:17:01 +01:00
5e6dc9ffe2 Lookup images for authors and recommenders on WikiPedia 2022-02-11 12:14:24 +01:00
f5c3077cb4 Updated Image in Readme 2022-02-10 18:18:36 +01:00
cbca64aaf3 Newly trained net 2022-02-10 18:18:24 +01:00
6d502af029 Tweaking Recommendation-Visualization 2022-02-10 18:18:05 +01:00
685d4b2ccd Playing with adding z-Libary binding (download gets blocked by server) 2022-02-10 15:16:44 +01:00
e9eae631b4 Newly trained net 2022-02-10 15:16:27 +01:00
0486bf1384 Newly trained net 2022-02-07 19:57:14 +01:00
93230c4285 Simpler and faster training 2022-02-07 19:57:03 +01:00
dfa2f89f29 Allow analyzing tags 2022-02-06 22:30:44 +01:00
d6dda44ae8 Improved HTML rendering (no scrolling in Fullscreen) 2022-02-06 22:17:23 +01:00
46fe3390b0 Less pre-filter for newBook 2022-02-06 21:45:08 +01:00
5f812a6b85 Lower default dist for Analyze 2022-02-06 21:38:41 +01:00
ab97f8e390 Prune Recommender on Analyze 2022-02-06 21:35:00 +01:00
7c1585120e Removed Debug Print 2022-02-06 21:31:02 +01:00
51f80e0867 Display author for new books 2022-02-06 19:10:21 +01:00
4d3925b193 Newly trained net 2022-02-06 18:57:05 +01:00
e599a851cf Smol Tweaks (for newBooks) 2022-02-06 18:56:53 +01:00
9373e66e29 Removed debug print 2022-02-06 18:38:07 +01:00
1a82cac16f smoller bug fix 2022-02-06 18:30:55 +01:00
936558779b smol bug fix 2022-02-06 18:28:35 +01:00
3032dea8a0 Better rankings-list in cli and newly trained net 2022-02-06 18:22:46 +01:00
a2d747e41e Integrated MRB-DB and added new-book-finder (from MRB) 2022-02-06 17:59:21 +01:00
7c168f3532 Newly trained net 2022-02-05 22:08:09 +01:00
92d69cf7cb Newly trained net 2022-02-04 20:43:13 +01:00
5a229a4b2b Approximate Pagerank while training (+performance) and implemented
batch-training (disabled do to non-convergence)
2022-02-04 20:34:59 +01:00
aa2e5a41bb Fixed 'progress' command 2022-02-04 17:20:46 +01:00
9529cf2970 Newly trained net 2022-02-03 15:24:19 +01:00
b854b38349 Fixed tiny bug when training with --full 2022-02-03 15:24:06 +01:00
54e9f0e8fc Implemented median; disabled median & se (low utility) 2022-02-03 15:10:26 +01:00
b45561ceca Newly trained net 2022-02-01 12:22:46 +01:00
bd17ac4906 Better Recommendation Generation 2022-02-01 12:22:13 +01:00
6e64e76310 Better error-message on pagerank ev-iter fail 2022-01-31 14:02:00 +01:00
3c0f1b18b4 Newly trained net 2022-01-31 13:45:44 +01:00
33ba27e2d0 Implemented Pagerank 2022-01-31 13:45:26 +01:00
96258ae19b Newly trained 2022-01-27 18:20:01 +01:00
b729bb61c4 Typo in gitignore 2022-01-24 16:15:51 +01:00
9ce65f52f1 Merge branch 'master' of dominik.roth.ml:dodox/CaliGraph 2022-01-24 16:15:32 +01:00
f3bc194488 Trying to make to ignore the neuralWeights file 2022-01-24 16:15:27 +01:00
9bcd35a48d Trying to make to ignore the neuralWeights file 2022-01-24 16:14:51 +01:00
496a733cb2 Revert "Delete 'neuralWeights.json'"
This reverts commit d5f9a0b1b4.
2022-01-23 22:44:59 +01:00
7d5a3ad2c5 Merge branch 'master' of dominik.roth.ml:dodox/CaliGraph 2022-01-23 22:43:10 +01:00
f1021fd6a0 New gitignore 2022-01-23 22:42:43 +01:00
d5f9a0b1b4 Delete 'neuralWeights.json' 2022-01-23 22:39:49 +01:00
077f850808 No more nets in the repo 2022-01-23 22:38:03 +01:00
c13a500420 Newly trained net 2022-01-22 18:13:46 +01:00
e5d2ca532c Shorter Training 2022-01-22 18:12:57 +01:00
efc309c962 Newly trained net 2022-01-22 17:51:03 +01:00
a6d9a75030 Sanitize description of books before rake 2021-12-18 18:09:45 +01:00
7af5109e7f Newly trained net 2021-12-11 14:02:08 +01:00
c9c2d75377 Updated requirements.txt 2021-12-11 14:00:48 +01:00
199fab7875 New shell-command, nltk for keyword extraction from description 2021-12-11 13:58:01 +01:00
36baf1aaec Bug: Typo in train 2021-12-11 11:54:25 +01:00
81fa6ca4d6 Faster training (earlier stopping) 2021-12-11 11:52:49 +01:00
1f5dea6aff Added a requirements.txt 2021-12-08 11:54:27 +01:00
16cc68dfed chmod +x (again) 2021-12-05 19:57:05 +01:00
da9569fd4c Added 'competence' command (displays recommenders and justifications of
their scores)
2021-12-05 19:56:26 +01:00
aa95a9b16b chmod +x 2021-12-05 19:54:03 +01:00
39930d1233 kp 2021-12-05 19:53:30 +01:00
f2fad859dc Added a progress-command (and newly trained weights) 2021-11-24 22:35:39 +01:00
ef7fceacea Better 'Gradientenabstieg' 2021-11-23 20:51:24 +01:00
5f7366e556 Newly trained net 2021-11-07 11:41:01 +01:00
7f51f139f2 Actually, we will also train on whitepapers (because otherwise I would
have to fix an ugly bug)
2021-10-17 15:52:52 +02:00
ce99e5301b Smashed bug with argument-parsing 2021-10-17 15:51:26 +02:00
8778cfdae6 Also dont train on whitepapers 2021-10-17 15:50:33 +02:00
3588587c92 Earlier removal of whitepapers (dont extrapolate from them) 2021-10-17 15:48:44 +02:00
65e8948202 Filter out the whitepapers I have in my libary 2021-10-17 15:47:37 +02:00
11 changed files with 8086 additions and 114 deletions

4
.gitignore vendored
View File

@ -1,3 +1,7 @@
__pycache__
*.html
.venv
neuralWeights.json
neuralWeights.json.*
.imgLinkCache.json
.idea

View File

@ -1,4 +1,4 @@
# CaliGraph
CaliGraph connects to the database of your local Calibre-Instance in order to recommend which unread books are likely to be enjoyed. The recommendations are displayed in a graph which explains the recommendation by showing correlations to previously read books, authors, recommendations by same individuals, tags...
![Screenshot](https://gitea.dominik.roth.ml/dodox/CaliGraph/raw/branch/master/Screenshot_README.png)
![Screenshot](./Screenshot_README.png)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 428 KiB

After

Width:  |  Height:  |  Size: 243 KiB

File diff suppressed because it is too large Load Diff

View File

@ -1 +0,0 @@
{"topList": 0.8269026935523768, "recommender": 0.5687397308864482, "author": 0.9602300111040548, "series": 0.0015315822252870478, "tag": 0.0028789924118737056, "mu": 0.48730278196967397, "sigma": 0.02394878388858184, "se": 0.45207554705083647, "bias": 0.555294008129175}

BIN
perfTests/1645539905.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.5 MiB

3004
rec_dbs/mrb_db.csv Normal file

File diff suppressed because it is too large Load Diff

2691
rec_dbs/tgb_1.csv Normal file

File diff suppressed because it is too large Load Diff

1318
rec_dbs/tgb_2.csv Normal file

File diff suppressed because it is too large Load Diff

14
requirements.txt Normal file
View File

@ -0,0 +1,14 @@
numpy
scipy
matplotlib
networkx
pyvis
fuzzywuzzy
rake_nltk
ptpython
requests
pandas
plotly
wikipedia
argcomplete
pyzshcomplete

155
zSearch.py Normal file
View File

@ -0,0 +1,155 @@
import requests
from bs4 import BeautifulSoup
class Search:
def __init__(self):
self.searchType = "general"
self.searchOptions = {"e": "", "yearFrom": "", \
"yearTo": "", "language": "", \
"extension": "", "page":"1", \
"order":""}
self.searchResults = "books"
self.input = ""
def nextPage(self):
'''Goes to next page'''
self.searchOptions["page"] = str(int(self.searchOptions["page"])+1)
def previousPage(self):
'''Goes to previous page'''
if self.searchOptions["page"] != "1":
self.searchOptions["page"] = str(int(self.searchOptions["page"])-1)
else:
return 1
def updateSearchOptions(self):
'''Depending on searchType and searchResults, updates appropriately
searchOptions
'''
if self.searchType == "general":
self.searchOptions.pop("matchPhrase", None)
else:
self.searchOptions["matchPhrase"] = ""
if self.searchResults == "books":
self.searchOptions["language"] = ""
self.searchOptions["extension"] = ""
else:
self.searchOptions.pop("language", None)
self.searchOptions.pop("extension", None)
def executeSearch(self):
'''Executes get request and returns response'''
if self.searchResults == "books":
url = "http://b-ok.cc"
else:
url = "http://booksc.xyz"
r = requests.get(url+"/s/"+self.input, params=self.searchOptions)
return r
def getResults(self):
'''Fetches search results. Returns a list of books
'''
r = self.executeSearch()
soup = BeautifulSoup(r.text, "html.parser")
match = soup.find_all(class_="resItemBox")
counter = 1
results=[]
for e in match:
title = e.find(itemprop="name")
author = e.find(class_="authors")
year = e.find(class_="bookProperty property_year")
language = e.find(class_="bookProperty property_language")
fileInfo = e.find(class_="bookProperty property__file")
link = e.find("a", href=True)
link = link["href"]
if self.searchResults == "books":
fullLink = "https://b-ok.cc" + link
else:
fullLink = "booksc.xyz" + link
title = isNone(title)
author = isNone(author)
year = isNone(year)
language = isNone(language)
fileInfo = isNone(fileInfo)
book = Book(title,author,year,language,fileInfo,fullLink)
results += [book]
return results
def reset(self):
'''Clears the search and resets to default options'''
self.searchType = "general"
self.searchOptions = {"e": "", "yearFrom": "", \
"yearTo": "", "language": "", \
"extension": "", "page":"1", \
"order":""}
self.searchResults = "books"
self.input = ""
def isNone(e):
if e != None:
return "".join(e.text.splitlines())
else:
return ""
class Book:
def __init__(self,title,author,year,language,fileInfo,link):
self.title = title
self.author = author
self.year = year
self.language = language
self.fType, self.size = fileInfo.split(', ')
self.link = link
self.dlLink = None
self.page = None
def __repr__(self):
return '<zBook: '+str(self)+'>'
def __str__(self):
return " / ".join([self.title,self.author,self.year,\
self.language,self.fType,self.size])
def getDetails(self):
'''Returns more specific info about the book. The info is retrieved by the
link attribute
'''
if self.page == None:
self.page = requests.get(self.link).text
soup = BeautifulSoup(self.page, "html.parser")
# for some reason, bookProperty also shows properties from other books
# the line below prevents this
soup = soup.find(class_="row cardBooks")
match = soup.find_all(class_="bookProperty")
results = ""
for e in match:
results += "".join(e.text.splitlines())
results += "\n"
# this makes writing the category easier for some books
results = results.replace("\\\\", " \\ ")
return results
def getDownloadURL(self):
if self.dlLink == None:
if self.page == None:
self.page = requests.get(self.link).text
soup = BeautifulSoup(self.page, "html.parser")
self.dlLink = soup.find('a', 'btn btn-primary dlButton addDownloadedBook')['href']
return 'https://b-ok.cc' + self.dlLink + '?dsource=recommend'
def saveBook(self, path):
r = requests.get(self.getDownloadURL(), allow_redirects=True)
with open(path, 'wb') as f:
f.write(r.content)