Skip to content

Instantly share code, notes, and snippets.

@vinaychittora
Created April 24, 2014 13:10
Show Gist options
  • Save vinaychittora/11254080 to your computer and use it in GitHub Desktop.
Save vinaychittora/11254080 to your computer and use it in GitHub Desktop.
To fetch summary of any movie from IMDB
import urllib2
from BeautifulSoup import BeautifulSoup
from mechanize import Browser
import re
def getunicode(soup):
body=''
if isinstance(soup, unicode):
soup = soup.replace("'","'")
soup = soup.replace('"','"')
soup = soup.replace(' ',' ')
body = body + soup
else:
if not soup.contents:
return ''
con_list = soup.contents
for con in con_list:
body = body + getunicode(con)
return body
def main():
movie = str(raw_input('Tell me the Movie Name: '))
movie_search = '+'.join(movie.split())
base_url = 'http://www.imdb.com/find?q='
url = base_url+movie_search+'&s=all'
title_search = re.compile('/title/tt\d+')
br = Browser()
br.open(url)
link = br.find_link(url_regex = re.compile(r'/title/tt.*'))
res = br.follow_link(link)
soup = BeautifulSoup(res.read())
movie_title = getunicode(soup.find('title'))
rate = soup.find('span',itemprop='ratingValue')
rating = getunicode(rate)
actors=[]
actors_soup = soup.find(**{'class':"cast_list"}).findAll('span',itemprop='name')
for i in range(len(actors_soup)):
actors.append(getunicode(actors_soup[i]))
des = soup.find('meta',{'name':'description'})['content']
genre=[]
infobar = soup.find('div',{'class':'infobar'})
r = infobar.find('',{'title':True})['title']
genrelist = infobar.findAll('a',{'href':True})
for i in range(len(genrelist)-1):
genre.append(getunicode(genrelist[i]))
release_date = getunicode(genrelist[-1])
print "+"*80
print movie_title,rating+'/10.0'
print 'Relase Date:',release_date
print 'Rated',r
print ''
print 'Genre:',
print ', '.join(genre)
print '\nActors:',
print ', '.join(actors)
print '\nDescription:'
print des
print "-"*80
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment