Created
February 26, 2012 19:22
-
-
Save mushfiq/1918439 to your computer and use it in GitHub Desktop.
IMDB Movie Info CRAWLER
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import requests | |
import urllib | |
import json | |
# MOVIE_NAME = 'The Other Dream Team' | |
BASE_URL = 'http://www.imdbapi.com/?' | |
NAME_LIST = file('movies.txt','r') | |
def get_movie_info(movi_name): | |
query = {'i': '', 't': movi_name ,'tomatoes':'true'} | |
part = urllib.urlencode(query) | |
url = BASE_URL+part | |
response = requests.get(url) | |
output = json.dumps(response.content, separators=(',',':')) | |
movie_info = {} | |
info_list = ['Plot','Title','Director','tomatoRating', 'Rating'] | |
for info in info_list: | |
if info == 'Rating': | |
movie_info['IMDB Rating'] = get_and_clean_data(info, output) | |
movie_info[info] = get_and_clean_data(info, output) | |
return movie_info | |
def get_and_clean_data(tag,data): | |
try: | |
temp_data = data.split(tag)[1].split(",")[0] | |
data = re.sub(r':\\"+','',temp_data).replace('\\"','') | |
except IndexError,e: | |
print "Error Occured! %s" %e | |
return "" | |
return data | |
def get_movi_name(name_list): | |
for name in name_list: | |
print "Getting Movi %s " % name | |
print get_movie_info(name) | |
return | |
if __name__=='__main__': | |
# get_imdb_id('The Pianist') | |
get_movi_name(NAME_LIST) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment