Skip to content

Instantly share code, notes, and snippets.

@jayrambhia
Last active April 15, 2023 07:57
Show Gist options
  • Star 8 You must be signed in to star a gist
  • Fork 6 You must be signed in to fork a gist
  • Save jayrambhia/1678382 to your computer and use it in GitHub Desktop.
Save jayrambhia/1678382 to your computer and use it in GitHub Desktop.
Fetch movie information from IMDB using Python!
'''
Author : Jay Rambhia
Git : https://github.com/jayrambhia
gist : https://gist.github.com/jayrambhia
'''
import urllib2
from BeautifulSoup import BeautifulSoup
from mechanize import Browser
import re
def getunicode(soup):
body=''
if isinstance(soup, unicode):
soup = soup.replace(''',"'")
soup = soup.replace('"','"')
soup = soup.replace(' ',' ')
body = body + soup
else:
if not soup.contents:
return ''
con_list = soup.contents
for con in con_list:
body = body + getunicode(con)
return body
def main():
movie = str(raw_input('Movie Name: '))
movie_search = '+'.join(movie.split())
base_url = 'http://www.imdb.com/find?q='
url = base_url+movie_search+'&s=all'
title_search = re.compile('/title/tt\d+')
br = Browser()
br.set_proxies({'http':'http://username:password@proxy:port',
'https':'https://username:password@proxy:port'})
br.open(url)
link = br.find_link(url_regex = re.compile(r'/title/tt.*'))
res = br.follow_link(link)
soup = BeautifulSoup(res.read())
movie_title = getunicode(soup.find('title'))
rate = soup.find('span',itemprop='ratingValue')
rating = getunicode(rate)
actors=[]
actors_soup = soup.findAll('a',itemprop='actors')
for i in range(len(actors_soup)):
actors.append(getunicode(actors_soup[i]))
des = soup.find('meta',{'name':'description'})['content']
genre=[]
infobar = soup.find('div',{'class':'infobar'})
r = infobar.find('',{'title':True})['title']
genrelist = infobar.findAll('a',{'href':True})
for i in range(len(genrelist)-1):
genre.append(getunicode(genrelist[i]))
release_date = getunicode(genrelist[-1])
print movie_title,rating+'/10.0'
print 'Relase Date:',release_date
print 'Rated',r
print ''
print 'Genre:',
print ', '.join(genre)
print '\nActors:',
print ', '.join(actors)
print '\nDescription:'
print des
if __name__ == '__main__':
main()
'''
Author : Jay Rambhia
Git : https://github.com/jayrambhia
gist : https://gist.github.com/jayrambhia
'''
import urllib2
from BeautifulSoup import BeautifulSoup
from mechanize import Browser
import re
def getunicode(soup):
body=''
if isinstance(soup, unicode):
soup = soup.replace(''',"'")
soup = soup.replace('"','"')
soup = soup.replace(' ',' ')
body = body + soup
else:
if not soup.contents:
return ''
con_list = soup.contents
for con in con_list:
body = body + getunicode(con)
return body
def main():
movie = str(raw_input('Movie Name: '))
movie_search = '+'.join(movie.split())
base_url = 'http://www.imdb.com/find?q='
url = base_url+movie_search+'&s=all'
title_search = re.compile('/title/tt\d+')
br = Browser()
br.set_proxies({'http':'http://username:password@proxy:port',
'https':'https://username:password@proxy:port'})
br.open(url)
link = br.find_link(url_regex = re.compile(r'/title/tt.*'))
res = br.follow_link(link)
soup = BeautifulSoup(res.read())
movie_title = getunicode(soup.find('title'))
rate = soup.find('span',itemprop='ratingValue')
rating = getunicode(rate)
actors=[]
actors_soup = soup.findAll('a',itemprop='actors')
for i in range(len(actors_soup)):
actors.append(getunicode(actors_soup[i]))
des = soup.find('meta',{'name':'description'})['content']
genre=[]
infobar = soup.find('div',{'class':'infobar'})
r = infobar.find('',{'title':True})['title']
genrelist = infobar.findAll('a',{'href':True})
for i in range(len(genrelist)-1):
genre.append(getunicode(genrelist[i]))
release_date = getunicode(genrelist[-1])
print movie_title,rating+'/10.0'
print 'Relase Date:',release_date
print 'Rated',r
print ''
print 'Genre:',
print ', '.join(genre)
print '\nActors:',
print ', '.join(actors)
print '\nDescription:'
print des
if __name__ == '__main__':
main()
@rpryzant
Copy link

rpryzant commented Aug 22, 2017

+1 ... same here!

@shubhamjanhere @ricardorqr this does the same thing but works for me:
https://gist.github.com/rpryzant/cb4fe2c4d676262d667a68fcbf4e4c91

@DineshReddyK
Copy link

@shubhamjanhere @ricardorqr you have to give your proxy address for br.set_proxies function.

@Dob-The-Duilder
Copy link

Dob-The-Duilder commented Dec 10, 2022

Remade after IMDb changed the way their page works

https://gist.github.com/Dob-The-Duilder/98f0765ce6dd9c1e11c3a649619654ac

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment