Skip to content

Instantly share code, notes, and snippets.

@shymega
Last active July 29, 2019 01:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shymega/17e51057f129e5f1cf3f to your computer and use it in GitHub Desktop.
Save shymega/17e51057f129e5f1cf3f to your computer and use it in GitHub Desktop.
scrape.py
#!/usr/bin/env python3
# Original code from @sircmpwm
# License: wtfpl
# Modified by @shymega , to print to stdout the top quotes from bash.org - *without* postgresql.
import requests
import sys
from bs4 import BeautifulSoup
def handle_page():
url = "http://www.bash.org/?top"
html = BeautifulSoup(requests.get(url).text, "html.parser")
quote_headers = [p for p in html.find_all('p') if p.get('class') == ['quote']]
quote_data = [p for p in html.find_all('p') if p.get('class') == ['qt']]
for i in range(0, len(quote_headers)):
header = quote_headers[i]
text = quote_data[i].get_text().replace('\r', '')
number = int(header.a.get('href')[1:])
score = int(header.text.split('(')[1].split(')')[0])
print("{}\n%".format(text))
if __name__ == "__main__":
handle_page()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment