Parassharmaa/booky.py

## booky.py
import requests
from bs4 import BeautifulSoup

base_url = "http://gen.lib.rus.ec"


headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
           'Accept-Encoding': 'gzip, deflate, sdch',
           'Accept-Language': 'en-US,en;q=0.8',
           'Upgrade-Insecure-Requests': '1',
           'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:41.0) Gecko/20100101 Firefox/41.0'}


def get_download_url(u):
	u = "http://libgen.io/ads.php?md5="+u
	d = requests.get(u, headers=headers)
	soup = BeautifulSoup(d.text, "html.parser")
	urs =  soup.find_all("a")[1].attrs['href']
	return urs

class Booky:
	def __init__(self, name):
		self.name = name
		self.data = {"books":[]}

	def crawl(self):
		url = "http://libgen.io/search.php?req={}&open=0&res=25&view=detailed&phrase=0&column=def".format(self.name)
		d = requests.get(url, headers=headers)
		soup = BeautifulSoup(d.content, "html.parser")
		data = soup.find_all("table")[3:17]
		if len(data):
			for i in data:
				trs = i.find_all('tr')
				if len(trs)==14:
					img_url = base_url+trs[1].find_all("img")[0].attrs['src']
					dwn_link = base_url+trs[1].find_all("a")[0].attrs['href']
					book_name = trs[1].find_all('td')[2].text
					author = trs[2].find_all('td')[1].text.split(',')[0]
					ext = trs[9].find_all('td')[3].text
					size = trs[9].find_all('td')[1].text
					temp = {
						"image":img_url,
						"title": book_name,
						"author": author,
						"file": dwn_link,
						"ext": ext,
						"size": size
					}
					self.data['books'].append(temp)


if __name__=="__main__":
	t = Booky("Python")
	t.crawl()
	print(t.data)
	import requests
	from bs4 import BeautifulSoup

	base_url = "http://gen.lib.rus.ec"



	headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,/;q=0.8',
	'Accept-Encoding': 'gzip, deflate, sdch',
	'Accept-Language': 'en-US,en;q=0.8',
	'Upgrade-Insecure-Requests': '1',
	'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:41.0) Gecko/20100101 Firefox/41.0'}


	def get_download_url(u):
	u = "http://libgen.io/ads.php?md5="+u
	d = requests.get(u, headers=headers)
	soup = BeautifulSoup(d.text, "html.parser")
	urs = soup.find_all("a")[1].attrs['href']
	return urs

	class Booky:
	def __init__(self, name):
	self.name = name
	self.data = {"books":[]}

	def crawl(self):
	url = "http://libgen.io/search.php?req={}&open=0&res=25&view=detailed&phrase=0&column=def".format(self.name)
	d = requests.get(url, headers=headers)
	soup = BeautifulSoup(d.content, "html.parser")
	data = soup.find_all("table")[3:17]
	if len(data):
	for i in data:
	trs = i.find_all('tr')
	if len(trs)==14:
	img_url = base_url+trs[1].find_all("img")[0].attrs['src']
	dwn_link = base_url+trs[1].find_all("a")[0].attrs['href']
	book_name = trs[1].find_all('td')[2].text
	author = trs[2].find_all('td')[1].text.split(',')[0]
	ext = trs[9].find_all('td')[3].text
	size = trs[9].find_all('td')[1].text
	temp = {
	"image":img_url,
	"title": book_name,
	"author": author,
	"file": dwn_link,
	"ext": ext,
	"size": size
	}
	self.data['books'].append(temp)


	if __name__=="__main__":
	t = Booky("Python")
	t.crawl()
	print(t.data)