Skip to content

Instantly share code, notes, and snippets.

@drewgillievfx
Created July 30, 2018 23:45
Show Gist options
  • Save drewgillievfx/2ce3ddc634da18ed7ef2dfcdec756cd4 to your computer and use it in GitHub Desktop.
Save drewgillievfx/2ce3ddc634da18ed7ef2dfcdec756cd4 to your computer and use it in GitHub Desktop.
version 1.0
import urllib2
from urllib import urlopen as uReq
from bs4 import BeautifulSoup as soup
## Create a .csv file ##
filename = "episodes.csv"
f = open(filename, "w")
headers = "Title, Season, Episode, Description\n"
f.write(headers)
################################################################################
# add new shows here and in the list
# to find a shows code, go to IMDb and search a show. then go to episode listing
# and select a season. it is the number that starts with tt
Monk = 'tt0312172'
Psych = 'tt0491738'
HIMYM = 'tt0460649'
# list for grabbing episodes from
shows = [Monk, Psych, HIMYM]
## start for loop for getting episode info
for j in shows:
show = j
seasons = 8
if j == HIMYM:
seasons = 9
else:
seasons =8
z = seasons + 1
for i in range(1, z):
seasons = i
my_url = "https://www.imdb.com/title/{0}/episodes?season={1}".format(show, seasons)
# print my_url
# open connection and grab page
uClient = uReq(my_url)
# put into a variable
page_html = uClient.read()
# close connection
uClient.close()
#html parsing
page_soup = soup(page_html, "html.parser")
# finds the episodes in this particular season
containers = page_soup.findAll("div", {"class": "list_item"})
# how many episodes were found
hm = len(containers)
# print str(hm) + " Episodes"
# print
# contain = containers[0]
container = containers[0]
for container in containers:
# Episode Title
episodeTitle = container.div.div.img["alt"]
# Episode Number
episode = container.div.div.div
episodeNumber = episode.text
# Episode Description
description = container.findAll("div", {"class": "item_description"})
episodeDescription = description[0].text.strip()
# print("episodeTitle: " + episodeTitle)
# print("episodeNumber: " + episodeNumber)
# print("episodeDescription: " + episodeDescription)
# print
f.write(episodeTitle.replace(",", " ") + "," + episodeNumber + "," + "\n" ) #episodeDescription.replace(",", "u'\xe4", " ") + "\n")
f.close()
print 'done'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment