Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
IMDB Movie Info CRAWLER
import re
import requests
import urllib
import json
# MOVIE_NAME = 'The Other Dream Team'
BASE_URL = 'http://www.imdbapi.com/?'
NAME_LIST = file('movies.txt','r')
def get_movie_info(movi_name):
query = {'i': '', 't': movi_name ,'tomatoes':'true'}
part = urllib.urlencode(query)
url = BASE_URL+part
response = requests.get(url)
output = json.dumps(response.content, separators=(',',':'))
movie_info = {}
info_list = ['Plot','Title','Director','tomatoRating', 'Rating']
for info in info_list:
if info == 'Rating':
movie_info['IMDB Rating'] = get_and_clean_data(info, output)
movie_info[info] = get_and_clean_data(info, output)
return movie_info
def get_and_clean_data(tag,data):
try:
temp_data = data.split(tag)[1].split(",")[0]
data = re.sub(r':\\"+','',temp_data).replace('\\"','')
except IndexError,e:
print "Error Occured! %s" %e
return ""
return data
def get_movi_name(name_list):
for name in name_list:
print "Getting Movi %s " % name
print get_movie_info(name)
return
if __name__=='__main__':
# get_imdb_id('The Pianist')
get_movi_name(NAME_LIST)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.