drewgillievfx/tvScramble

## tvScramble
import urllib2
from urllib import urlopen as uReq
from bs4 import BeautifulSoup as soup

## Create a .csv file ##
filename = "episodes.csv"
f = open(filename, "w")

headers = "Title, Season, Episode, Description\n"
f.write(headers)

################################################################################
# add new shows here and in the list
# to find a shows code, go to IMDb and search a show. then go to episode listing
# and select a season. it is the number that starts with tt
Monk = 'tt0312172'
Psych = 'tt0491738'
HIMYM = 'tt0460649'

# list for grabbing episodes from
shows = [Monk, Psych, HIMYM]


## start for loop for getting episode info
for j in shows:

    show = j
    seasons = 8
    if j == HIMYM:
        seasons = 9
    else:
        seasons =8

    z = seasons + 1


    for i in range(1, z):
        seasons = i

        my_url = "https://www.imdb.com/title/{0}/episodes?season={1}".format(show, seasons)
        # print my_url

        # open connection and grab page
        uClient = uReq(my_url)

        # put into a variable
        page_html = uClient.read()

        # close connection
        uClient.close()

        #html parsing
        page_soup = soup(page_html, "html.parser")

        # finds the episodes in this particular season
        containers = page_soup.findAll("div", {"class": "list_item"})
        # how many episodes were found
        hm  = len(containers)

        # print str(hm) + " Episodes"
        # print

        # contain = containers[0]
        container = containers[0]


        for container in containers:
            # Episode Title
            episodeTitle = container.div.div.img["alt"]

            # Episode Number
            episode = container.div.div.div
            episodeNumber = episode.text

            # Episode Description
            description = container.findAll("div", {"class": "item_description"})
            episodeDescription = description[0].text.strip()


            # print("episodeTitle: " + episodeTitle)
            # print("episodeNumber: " + episodeNumber)
            # print("episodeDescription: " + episodeDescription)
            # print

            f.write(episodeTitle.replace(",", " ") + "," + episodeNumber + "," + "\n" ) #episodeDescription.replace(",", "u'\xe4", " ") + "\n")
f.close()
print 'done'
	import urllib2
	from urllib import urlopen as uReq
	from bs4 import BeautifulSoup as soup

	## Create a .csv file ##
	filename = "episodes.csv"
	f = open(filename, "w")

	headers = "Title, Season, Episode, Description\n"
	f.write(headers)

	################################################################################
	# add new shows here and in the list
	# to find a shows code, go to IMDb and search a show. then go to episode listing
	# and select a season. it is the number that starts with tt
	Monk = 'tt0312172'
	Psych = 'tt0491738'
	HIMYM = 'tt0460649'

	# list for grabbing episodes from
	shows = [Monk, Psych, HIMYM]



	## start for loop for getting episode info
	for j in shows:

	show = j
	seasons = 8
	if j == HIMYM:
	seasons = 9
	else:
	seasons =8

	z = seasons + 1


	for i in range(1, z):
	seasons = i

	my_url = "https://www.imdb.com/title/{0}/episodes?season={1}".format(show, seasons)
	# print my_url

	# open connection and grab page
	uClient = uReq(my_url)

	# put into a variable
	page_html = uClient.read()

	# close connection
	uClient.close()

	#html parsing
	page_soup = soup(page_html, "html.parser")

	# finds the episodes in this particular season
	containers = page_soup.findAll("div", {"class": "list_item"})
	# how many episodes were found
	hm = len(containers)

	# print str(hm) + " Episodes"
	# print

	# contain = containers[0]
	container = containers[0]



	for container in containers:
	# Episode Title
	episodeTitle = container.div.div.img["alt"]

	# Episode Number
	episode = container.div.div.div
	episodeNumber = episode.text

	# Episode Description
	description = container.findAll("div", {"class": "item_description"})
	episodeDescription = description[0].text.strip()


	# print("episodeTitle: " + episodeTitle)
	# print("episodeNumber: " + episodeNumber)
	# print("episodeDescription: " + episodeDescription)
	# print

	f.write(episodeTitle.replace(",", " ") + "," + episodeNumber + "," + "\n" ) #episodeDescription.replace(",", "u'\xe4", " ") + "\n")
	f.close()
	print 'done'