shymega/scrape.py

## scrape.py
#!/usr/bin/env python3

# Original code from @sircmpwm
# License: wtfpl

# Modified by @shymega , to print to stdout the top quotes from bash.org - *without* postgresql.

import requests
import sys
from bs4 import BeautifulSoup

def handle_page():
    url = "http://www.bash.org/?top"
    html = BeautifulSoup(requests.get(url).text, "html.parser")
    quote_headers = [p for p in html.find_all('p') if p.get('class') == ['quote']]
    quote_data = [p for p in html.find_all('p') if p.get('class') == ['qt']]
    for i in range(0, len(quote_headers)):
        header = quote_headers[i]
        text = quote_data[i].get_text().replace('\r', '')

        number = int(header.a.get('href')[1:])
        score = int(header.text.split('(')[1].split(')')[0])
        print("{}\n%".format(text))

if __name__ == "__main__":
    handle_page()
	#!/usr/bin/env python3

	# Original code from @sircmpwm
	# License: wtfpl

	# Modified by @shymega , to print to stdout the top quotes from bash.org - without postgresql.

	import requests
	import sys
	from bs4 import BeautifulSoup

	def handle_page():
	url = "http://www.bash.org/?top"
	html = BeautifulSoup(requests.get(url).text, "html.parser")
	quote_headers = [p for p in html.find_all('p') if p.get('class') == ['quote']]
	quote_data = [p for p in html.find_all('p') if p.get('class') == ['qt']]
	for i in range(0, len(quote_headers)):
	header = quote_headers[i]
	text = quote_data[i].get_text().replace('\r', '')

	number = int(header.a.get('href')[1:])
	score = int(header.text.split('(')[1].split(')')[0])
	print("{}\n%".format(text))

	if __name__ == "__main__":
	handle_page()