import requests from bs4 import BeautifulSoup class Search: def __init__(self): self.searchType = "general" self.searchOptions = {"e": "", "yearFrom": "", \ "yearTo": "", "language": "", \ "extension": "", "page":"1", \ "order":""} self.searchResults = "books" self.input = "" def nextPage(self): '''Goes to next page''' self.searchOptions["page"] = str(int(self.searchOptions["page"])+1) def previousPage(self): '''Goes to previous page''' if self.searchOptions["page"] != "1": self.searchOptions["page"] = str(int(self.searchOptions["page"])-1) else: return 1 def updateSearchOptions(self): '''Depending on searchType and searchResults, updates appropriately searchOptions ''' if self.searchType == "general": self.searchOptions.pop("matchPhrase", None) else: self.searchOptions["matchPhrase"] = "" if self.searchResults == "books": self.searchOptions["language"] = "" self.searchOptions["extension"] = "" else: self.searchOptions.pop("language", None) self.searchOptions.pop("extension", None) def executeSearch(self): '''Executes get request and returns response''' if self.searchResults == "books": url = "http://b-ok.cc" else: url = "http://booksc.xyz" r = requests.get(url+"/s/"+self.input, params=self.searchOptions) return r def getResults(self): '''Fetches search results. Returns a list of books ''' r = self.executeSearch() soup = BeautifulSoup(r.text, "html.parser") match = soup.find_all(class_="resItemBox") counter = 1 results=[] for e in match: title = e.find(itemprop="name") author = e.find(class_="authors") year = e.find(class_="bookProperty property_year") language = e.find(class_="bookProperty property_language") fileInfo = e.find(class_="bookProperty property__file") link = e.find("a", href=True) link = link["href"] if self.searchResults == "books": fullLink = "https://b-ok.cc" + link else: fullLink = "booksc.xyz" + link title = isNone(title) author = isNone(author) year = isNone(year) language = isNone(language) fileInfo = isNone(fileInfo) book = Book(title,author,year,language,fileInfo,fullLink) results += [book] return results def reset(self): '''Clears the search and resets to default options''' self.searchType = "general" self.searchOptions = {"e": "", "yearFrom": "", \ "yearTo": "", "language": "", \ "extension": "", "page":"1", \ "order":""} self.searchResults = "books" self.input = "" def isNone(e): if e != None: return "".join(e.text.splitlines()) else: return "" class Book: def __init__(self,title,author,year,language,fileInfo,link): self.title = title self.author = author self.year = year self.language = language self.fType, self.size = fileInfo.split(', ') self.link = link self.dlLink = None self.page = None def __repr__(self): return '' def __str__(self): return " / ".join([self.title,self.author,self.year,\ self.language,self.fType,self.size]) def getDetails(self): '''Returns more specific info about the book. The info is retrieved by the link attribute ''' if self.page == None: self.page = requests.get(self.link).text soup = BeautifulSoup(self.page, "html.parser") # for some reason, bookProperty also shows properties from other books # the line below prevents this soup = soup.find(class_="row cardBooks") match = soup.find_all(class_="bookProperty") results = "" for e in match: results += "".join(e.text.splitlines()) results += "\n" # this makes writing the category easier for some books results = results.replace("\\\\", " \\ ") return results def getDownloadURL(self): if self.dlLink == None: if self.page == None: self.page = requests.get(self.link).text soup = BeautifulSoup(self.page, "html.parser") self.dlLink = soup.find('a', 'btn btn-primary dlButton addDownloadedBook')['href'] return 'https://b-ok.cc' + self.dlLink + '?dsource=recommend' def saveBook(self, path): r = requests.get(self.getDownloadURL(), allow_redirects=True) with open(path, 'wb') as f: f.write(r.content)