Created
June 11, 2015 09:07
-
-
Save vikas17a/cdf576e0dfcf7a05047c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
####Importing Libraries##### | |
from bs4 import BeautifulSoup | |
import requests | |
import sys | |
############################# | |
########## IMDB URL ############# | |
url = 'http://www.imdb.com/chart/top' | |
################################# | |
##### No of top movies to be extracted ######## | |
n = input() | |
############################################### | |
###### Intiating Request to the url ########### | |
response = requests.get(url) | |
# The library Beautiful Soup pasing the response got from above request ##### | |
soup = BeautifulSoup(response.text) | |
############################################################################# | |
# Movies #### | |
movies = soup.select('td.titleColumn') | |
# Link to their cast #### | |
links = [a.attrs.get('href') for a in soup.select('td.titleColumn a')] | |
#list of cast dictionary | |
casts = {} | |
#list of movies with index as order | |
movies_base = {}; | |
for index in range(0, n): | |
try: | |
movies_base[index] = str(movies[index].get_text()).replace('\n',' ') | |
except: | |
continue | |
url2 = "http://www.imdb.com/"+str(links[index]) | |
response = requests.get(url2) #Request to cast list | |
soup = BeautifulSoup(response.text) #Parsing response | |
cast_t = soup.select('td.itemprop a span') #Exracting casts name# | |
for i in range(0, len(cast_t)): | |
try: | |
name = str(cast_t[i].get_text()) | |
except: | |
continue | |
if name in casts: | |
casts[name].append(index) | |
else: | |
casts[name] = [] | |
casts[name].append(index) | |
###################### User Action to get results ############################ | |
inp = 'Y' | |
while inp == 'Y' or inp == 'y': | |
actor=raw_input("Actor Name:") | |
m=raw_input("Top M movies:") | |
actor = str(actor) | |
m = int(m) | |
for i in range(0,m): | |
try: | |
print(movies_base[casts[actor][i]]) | |
except: | |
break | |
inp = raw_input('Want to enter another name (Y/N):') | |
inp = str(inp) | |
########################## End of Program #################################### |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment