Skip to content

Instantly share code, notes, and snippets.

@mushfiq
Created February 26, 2012 19:22
Show Gist options
  • Save mushfiq/1918439 to your computer and use it in GitHub Desktop.
Save mushfiq/1918439 to your computer and use it in GitHub Desktop.
IMDB Movie Info CRAWLER
import re
import requests
import urllib
import json
# MOVIE_NAME = 'The Other Dream Team'
BASE_URL = 'http://www.imdbapi.com/?'
NAME_LIST = file('movies.txt','r')
def get_movie_info(movi_name):
query = {'i': '', 't': movi_name ,'tomatoes':'true'}
part = urllib.urlencode(query)
url = BASE_URL+part
response = requests.get(url)
output = json.dumps(response.content, separators=(',',':'))
movie_info = {}
info_list = ['Plot','Title','Director','tomatoRating', 'Rating']
for info in info_list:
if info == 'Rating':
movie_info['IMDB Rating'] = get_and_clean_data(info, output)
movie_info[info] = get_and_clean_data(info, output)
return movie_info
def get_and_clean_data(tag,data):
try:
temp_data = data.split(tag)[1].split(",")[0]
data = re.sub(r':\\"+','',temp_data).replace('\\"','')
except IndexError,e:
print "Error Occured! %s" %e
return ""
return data
def get_movi_name(name_list):
for name in name_list:
print "Getting Movi %s " % name
print get_movie_info(name)
return
if __name__=='__main__':
# get_imdb_id('The Pianist')
get_movi_name(NAME_LIST)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment