Skip to content

Instantly share code, notes, and snippets.

@Dob-The-Duilder
Created December 10, 2022 22:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Dob-The-Duilder/98f0765ce6dd9c1e11c3a649619654ac to your computer and use it in GitHub Desktop.
Save Dob-The-Duilder/98f0765ce6dd9c1e11c3a649619654ac to your computer and use it in GitHub Desktop.
Extracts key information from IMDb pages based on movie name
from bs4 import BeautifulSoup
from mechanize import Browser
import re, json
def main():
movie = str(input('Movie Name: '))
movie_search = '+'.join(movie.split())
base_url = 'http://www.imdb.com/find?q='
url = base_url+movie_search+'&s=all'
title_search = re.compile('/title/tt\d+')
br = Browser()
br.set_handle_robots(False)
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
#br.set_proxies({'http':'http://username:password@proxy:port','https':'https://username:password@proxy:port'})
br.open(url)
link = br.find_link(url_regex = re.compile(r'/title/tt.*'))
res = br.follow_link(link)
soup = BeautifulSoup(res.read(), features="lxml")
jsonFile = json.loads(soup.find('script', type='application/ld+json').text.replace("'", '"').replace("'", "'"))
movie_title = jsonFile["name"]
rating = jsonFile["aggregateRating"]["ratingValue"]
r = jsonFile["contentRating"]
actors=[]
for person in jsonFile["actor"]:
actors.append(person["name"])
des = jsonFile["description"]
genre = jsonFile["genre"]
release_date = jsonFile["datePublished"]
print (movie_title,str(rating)+'/10.0')
print ('Relase Date:',release_date)
print ('Rated',r)
print ('')
print ('Genre:',)
print (', '.join(genre))
print ('\nActors:',)
print (', '.join(actors))
print ('\nDescription:')
print (des)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment