CaliGraph/zSearch.py

156 lines
5.1 KiB
Python
Raw Permalink Normal View History

import requests
from bs4 import BeautifulSoup
class Search:
def __init__(self):
self.searchType = "general"
self.searchOptions = {"e": "", "yearFrom": "", \
"yearTo": "", "language": "", \
"extension": "", "page":"1", \
"order":""}
self.searchResults = "books"
self.input = ""
def nextPage(self):
'''Goes to next page'''
self.searchOptions["page"] = str(int(self.searchOptions["page"])+1)
def previousPage(self):
'''Goes to previous page'''
if self.searchOptions["page"] != "1":
self.searchOptions["page"] = str(int(self.searchOptions["page"])-1)
else:
return 1
def updateSearchOptions(self):
'''Depending on searchType and searchResults, updates appropriately
searchOptions
'''
if self.searchType == "general":
self.searchOptions.pop("matchPhrase", None)
else:
self.searchOptions["matchPhrase"] = ""
if self.searchResults == "books":
self.searchOptions["language"] = ""
self.searchOptions["extension"] = ""
else:
self.searchOptions.pop("language", None)
self.searchOptions.pop("extension", None)
def executeSearch(self):
'''Executes get request and returns response'''
if self.searchResults == "books":
url = "http://b-ok.cc"
else:
url = "http://booksc.xyz"
r = requests.get(url+"/s/"+self.input, params=self.searchOptions)
return r
def getResults(self):
'''Fetches search results. Returns a list of books
'''
r = self.executeSearch()
soup = BeautifulSoup(r.text, "html.parser")
match = soup.find_all(class_="resItemBox")
counter = 1
results=[]
for e in match:
title = e.find(itemprop="name")
author = e.find(class_="authors")
year = e.find(class_="bookProperty property_year")
language = e.find(class_="bookProperty property_language")
fileInfo = e.find(class_="bookProperty property__file")
link = e.find("a", href=True)
link = link["href"]
if self.searchResults == "books":
fullLink = "https://b-ok.cc" + link
else:
fullLink = "booksc.xyz" + link
title = isNone(title)
author = isNone(author)
year = isNone(year)
language = isNone(language)
fileInfo = isNone(fileInfo)
book = Book(title,author,year,language,fileInfo,fullLink)
results += [book]
return results
def reset(self):
'''Clears the search and resets to default options'''
self.searchType = "general"
self.searchOptions = {"e": "", "yearFrom": "", \
"yearTo": "", "language": "", \
"extension": "", "page":"1", \
"order":""}
self.searchResults = "books"
self.input = ""
def isNone(e):
if e != None:
return "".join(e.text.splitlines())
else:
return ""
class Book:
def __init__(self,title,author,year,language,fileInfo,link):
self.title = title
self.author = author
self.year = year
self.language = language
self.fType, self.size = fileInfo.split(', ')
self.link = link
self.dlLink = None
self.page = None
def __repr__(self):
return '<zBook: '+str(self)+'>'
def __str__(self):
return " / ".join([self.title,self.author,self.year,\
self.language,self.fType,self.size])
def getDetails(self):
'''Returns more specific info about the book. The info is retrieved by the
link attribute
'''
if self.page == None:
self.page = requests.get(self.link).text
soup = BeautifulSoup(self.page, "html.parser")
# for some reason, bookProperty also shows properties from other books
# the line below prevents this
soup = soup.find(class_="row cardBooks")
match = soup.find_all(class_="bookProperty")
results = ""
for e in match:
results += "".join(e.text.splitlines())
results += "\n"
# this makes writing the category easier for some books
results = results.replace("\\\\", " \\ ")
return results
def getDownloadURL(self):
if self.dlLink == None:
if self.page == None:
self.page = requests.get(self.link).text
soup = BeautifulSoup(self.page, "html.parser")
self.dlLink = soup.find('a', 'btn btn-primary dlButton addDownloadedBook')['href']
return 'https://b-ok.cc' + self.dlLink + '?dsource=recommend'
def saveBook(self, path):
r = requests.get(self.getDownloadURL(), allow_redirects=True)
with open(path, 'wb') as f:
f.write(r.content)