siemiatj/movies.py

## movies.py
import bs4 as bs
import lxml
import urllib.request
import csv

def get_html():
    html = urllib.request.urlopen("http://www.imdb.com/chart/top?ref=ft_250").read()
    soup = bs.BeautifulSoup(html, "lxml")
    return soup.find_all('a')

def find_movies(anchors):
    id_list = []
    number = 0
    for paragraph in anchors:
        paragraph_str = str(paragraph)
        if paragraph_str[16:25] not in id_list and paragraph.get('href') and "title/tt0" in paragraph_str:
            id_list.append(paragraph_str[16:25])
            number +=1
            if number == 100:
                break
    return id_list

def get_movies_details(id_list):
    list_of_movies =[]
    address = "http://www.omdbapi.com/?i="
    for id_number in id_list:
        html2 = urllib.request.urlopen(addres+id_number).read()
        soup2 = bs.BeautifulSoup(html2, "lxml")
        list_of_movies.append(str(soup2.find_all("p")))
    return list_of_movies

def get_movies_data(list_of_movies):
    year_title = []
    for movie in list_of_movies:
        first = '"Title":"'
        last = '","Year":'
        start = movie.index( first ) + len( first )
        end = movie.index( last, start )
        title = movie[start:end]
        first = '"Year":"'
        last = '","Rated"'
        start = movie.index( first ) + len( first )
        end = movie.index( last, start )
        year = movie[start:end]
        movie = [title, year]
        year_title.append(movie)

    year_title = sorted(year_title,  key = lambda x: int(x[1]))
    year_title.insert(0, ["title", "year"])

    return year_title

def save_data_to_file(movies_data):
    with open ('%s.csv' % "movies", 'w', newline='') as file:
        a = csv.writer(file, delimiter=',')
        a.writerows(year_title)

def main():
    html = get_html()
    movies_ids = find_movies(html)
    movies_details = get_movies_details(movies_ids)
    movies_data = get_movies_data(movies_details)
    save_data_to_file(movies_data)

    print("Done!")

if __name__ == "__main__":
    main()
	import bs4 as bs
	import lxml
	import urllib.request
	import csv

	def get_html():
	html = urllib.request.urlopen("http://www.imdb.com/chart/top?ref=ft_250").read()
	soup = bs.BeautifulSoup(html, "lxml")
	return soup.find_all('a')

	def find_movies(anchors):
	id_list = []
	number = 0
	for paragraph in anchors:
	paragraph_str = str(paragraph)
	if paragraph_str[16:25] not in id_list and paragraph.get('href') and "title/tt0" in paragraph_str:
	id_list.append(paragraph_str[16:25])
	number +=1
	if number == 100:
	break
	return id_list

	def get_movies_details(id_list):
	list_of_movies =[]
	address = "http://www.omdbapi.com/?i="
	for id_number in id_list:
	html2 = urllib.request.urlopen(addres+id_number).read()
	soup2 = bs.BeautifulSoup(html2, "lxml")
	list_of_movies.append(str(soup2.find_all("p")))
	return list_of_movies

	def get_movies_data(list_of_movies):
	year_title = []
	for movie in list_of_movies:
	first = '"Title":"'
	last = '","Year":'
	start = movie.index( first ) + len( first )
	end = movie.index( last, start )
	title = movie[start:end]
	first = '"Year":"'
	last = '","Rated"'
	start = movie.index( first ) + len( first )
	end = movie.index( last, start )
	year = movie[start:end]
	movie = [title, year]
	year_title.append(movie)

	year_title = sorted(year_title, key = lambda x: int(x[1]))
	year_title.insert(0, ["title", "year"])

	return year_title

	def save_data_to_file(movies_data):
	with open ('%s.csv' % "movies", 'w', newline='') as file:
	a = csv.writer(file, delimiter=',')
	a.writerows(year_title)

	def main():
	html = get_html()
	movies_ids = find_movies(html)
	movies_details = get_movies_details(movies_ids)
	movies_data = get_movies_data(movies_details)
	save_data_to_file(movies_data)

	print("Done!")

	if __name__ == "__main__":
	main()