Compare commits

..

1 Commits

Author SHA1 Message Date
0fbc9d7729 WIP 2021-10-13 15:43:11 +02:00
11 changed files with 129 additions and 8086 deletions

4
.gitignore vendored
View File

@ -1,7 +1,3 @@
__pycache__
*.html
.venv
neuralWeights.json
neuralWeights.json.*
.imgLinkCache.json
.idea

View File

@ -1,4 +1,4 @@
# CaliGraph
CaliGraph connects to the database of your local Calibre-Instance in order to recommend which unread books are likely to be enjoyed. The recommendations are displayed in a graph which explains the recommendation by showing correlations to previously read books, authors, recommendations by same individuals, tags...
![Screenshot](./Screenshot_README.png)
![Screenshot](https://gitea.dominik.roth.ml/dodox/CaliGraph/raw/branch/master/Screenshot_README.png)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 243 KiB

After

Width:  |  Height:  |  Size: 428 KiB

File diff suppressed because it is too large Load Diff

1
neuralWeights.json Normal file
View File

@ -0,0 +1 @@
{"topList": 0.6813597954924836, "recommender": 0.7373820050627435, "author": 0.9322587113850848, "series": 0.9256656037821412, "tag": 0.9917141164721258, "mu": 0.5619155530016923, "sigma": 0.05411831944351521, "se": 0.6823826056339866, "bias": 0.7987740969341441, "norm": 0.9443733783393993}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.5 MiB

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,14 +0,0 @@
numpy
scipy
matplotlib
networkx
pyvis
fuzzywuzzy
rake_nltk
ptpython
requests
pandas
plotly
wikipedia
argcomplete
pyzshcomplete

View File

@ -1,155 +0,0 @@
import requests
from bs4 import BeautifulSoup
class Search:
def __init__(self):
self.searchType = "general"
self.searchOptions = {"e": "", "yearFrom": "", \
"yearTo": "", "language": "", \
"extension": "", "page":"1", \
"order":""}
self.searchResults = "books"
self.input = ""
def nextPage(self):
'''Goes to next page'''
self.searchOptions["page"] = str(int(self.searchOptions["page"])+1)
def previousPage(self):
'''Goes to previous page'''
if self.searchOptions["page"] != "1":
self.searchOptions["page"] = str(int(self.searchOptions["page"])-1)
else:
return 1
def updateSearchOptions(self):
'''Depending on searchType and searchResults, updates appropriately
searchOptions
'''
if self.searchType == "general":
self.searchOptions.pop("matchPhrase", None)
else:
self.searchOptions["matchPhrase"] = ""
if self.searchResults == "books":
self.searchOptions["language"] = ""
self.searchOptions["extension"] = ""
else:
self.searchOptions.pop("language", None)
self.searchOptions.pop("extension", None)
def executeSearch(self):
'''Executes get request and returns response'''
if self.searchResults == "books":
url = "http://b-ok.cc"
else:
url = "http://booksc.xyz"
r = requests.get(url+"/s/"+self.input, params=self.searchOptions)
return r
def getResults(self):
'''Fetches search results. Returns a list of books
'''
r = self.executeSearch()
soup = BeautifulSoup(r.text, "html.parser")
match = soup.find_all(class_="resItemBox")
counter = 1
results=[]
for e in match:
title = e.find(itemprop="name")
author = e.find(class_="authors")
year = e.find(class_="bookProperty property_year")
language = e.find(class_="bookProperty property_language")
fileInfo = e.find(class_="bookProperty property__file")
link = e.find("a", href=True)
link = link["href"]
if self.searchResults == "books":
fullLink = "https://b-ok.cc" + link
else:
fullLink = "booksc.xyz" + link
title = isNone(title)
author = isNone(author)
year = isNone(year)
language = isNone(language)
fileInfo = isNone(fileInfo)
book = Book(title,author,year,language,fileInfo,fullLink)
results += [book]
return results
def reset(self):
'''Clears the search and resets to default options'''
self.searchType = "general"
self.searchOptions = {"e": "", "yearFrom": "", \
"yearTo": "", "language": "", \
"extension": "", "page":"1", \
"order":""}
self.searchResults = "books"
self.input = ""
def isNone(e):
if e != None:
return "".join(e.text.splitlines())
else:
return ""
class Book:
def __init__(self,title,author,year,language,fileInfo,link):
self.title = title
self.author = author
self.year = year
self.language = language
self.fType, self.size = fileInfo.split(', ')
self.link = link
self.dlLink = None
self.page = None
def __repr__(self):
return '<zBook: '+str(self)+'>'
def __str__(self):
return " / ".join([self.title,self.author,self.year,\
self.language,self.fType,self.size])
def getDetails(self):
'''Returns more specific info about the book. The info is retrieved by the
link attribute
'''
if self.page == None:
self.page = requests.get(self.link).text
soup = BeautifulSoup(self.page, "html.parser")
# for some reason, bookProperty also shows properties from other books
# the line below prevents this
soup = soup.find(class_="row cardBooks")
match = soup.find_all(class_="bookProperty")
results = ""
for e in match:
results += "".join(e.text.splitlines())
results += "\n"
# this makes writing the category easier for some books
results = results.replace("\\\\", " \\ ")
return results
def getDownloadURL(self):
if self.dlLink == None:
if self.page == None:
self.page = requests.get(self.link).text
soup = BeautifulSoup(self.page, "html.parser")
self.dlLink = soup.find('a', 'btn btn-primary dlButton addDownloadedBook')['href']
return 'https://b-ok.cc' + self.dlLink + '?dsource=recommend'
def saveBook(self, path):
r = requests.get(self.getDownloadURL(), allow_redirects=True)
with open(path, 'wb') as f:
f.write(r.content)