Skip to content

Instantly share code, notes, and snippets.

@Kevin-Mok
Last active April 15, 2019 12:39
Show Gist options
  • Save Kevin-Mok/ab854bcf02aae0799955716b5fcbed55 to your computer and use it in GitHub Desktop.
Save Kevin-Mok/ab854bcf02aae0799955716b5fcbed55 to your computer and use it in GitHub Desktop.
Scrape Top Box Office movies from Rotten Tomatoes.
#!/usr/bin/python3
# Display Top Box Office movies from Rotten Tomatoes.
import requests
import colored
from pprint import pprint
from bs4 import BeautifulSoup
# from colored import fg, bg, attr
from colored import stylize
from prettytable import PrettyTable
# get the data
data = requests.get('https://www.rottentomatoes.com/')
# load data into bs4
soup = BeautifulSoup(data.text, 'html.parser')
leaderboard = soup.find('table', { 'id': 'Top-Box-Office' })
movie_table = PrettyTable()
movie_table.field_names = ["Score", "Name", "BO"]
movie_table.align["Name"] = "l"
movie_table.align["BO"] = "l"
for tr in leaderboard.find_all('tr'):
try:
movie_score = tr.find('td', class_="left_col").find('span', class_="tMeterScore").getText()
except AttributeError:
movie_score = 'n/a'
movie_name = tr.find('td', class_="middle_col").find('a').getText()
box_office = tr.find('td', class_="right_col").find('a').getText()
# print(stylize(movie_score, colored.bg("red")), movie_name, box_office)
movie_table.add_row([movie_score, movie_name, box_office])
# print(movie_score, movie_name, box_office)
print(movie_table)
  • venv
  • put data into table (with coloring)
    • header
    • learn pandas?
  • convert % into number
  • opening movies
  • sort
  • cache
    • show diff since last checked
  • pip
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment