Skip to content

Instantly share code, notes, and snippets.

@michelkana
Created September 22, 2021 13:17
Show Gist options
  • Save michelkana/861c381f228f5503fadb2c3fe8f717a6 to your computer and use it in GitHub Desktop.
Save michelkana/861c381f228f5503fadb2c3fe8f717a6 to your computer and use it in GitHub Desktop.
# find all star <div> elements
stars = star_soup.find_all(class_="lister-item mode-detail")
star_table = []
rank = 1
# loop through each star
for star in stars:
# find the star name
name_soup = star.find(class_='lister-item-header').find('a')
name = name_soup.get_text().replace('\n','').strip()
# find the star's 2020 movie
best_movie_soup = stars[0].find(class_='text-muted text-small').find('a')
best_movie = best_movie_soup.get_text().replace('\n','').strip()
# open the star's profile page
url = "https://www.imdb.com" + name_soup["href"]
gender = 'Female' if (star.find('p').get_text().count('Actress')) > 0 else 'Male'
star_page = BeautifulSoup(requests.get(url).content, 'html.parser')
# find the star's birth date
try:
year_born = star_page.find(id='name-born-info').find('time').find_all('a')[1].get_text()
except Exception:
year_born = 0
# find the star's first movie's title and date
try:
year_first_movie_soup = star_page.find(id='filmography').find_all('div')[1].find_all('div', class_='filmo-row')[-1]
year_first_movie = year_first_movie_soup.find('span').get_text().replace('\n','').replace('\xa0', '').split('-')[0].split('/')[0]
year_first_movie = 2020 if year_first_movie == '' else year_first_movie
first_movie = year_first_movie_soup.find('a').get_text().replace('\n','').replace('\xa0', '')
except Exception:
year_first_movie = 2020
first_movie = ''
# find the star's credits
credits = star_page.find(id='filmography').find('div').get_text().split('(')[1].split('credits')[0].strip()
# store all data in a dictionary
star_dict = {'name': name,
'rank': rank,
'gender': gender,
'url': url,
'year_born':year_born,
'first_movie': first_movie,
'year_first_movie': year_first_movie,
'credits':credits}
star_table.append(star_dict)
rank = rank + 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment