Created
September 22, 2021 13:17
-
-
Save michelkana/861c381f228f5503fadb2c3fe8f717a6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# find all star <div> elements | |
stars = star_soup.find_all(class_="lister-item mode-detail") | |
star_table = [] | |
rank = 1 | |
# loop through each star | |
for star in stars: | |
# find the star name | |
name_soup = star.find(class_='lister-item-header').find('a') | |
name = name_soup.get_text().replace('\n','').strip() | |
# find the star's 2020 movie | |
best_movie_soup = stars[0].find(class_='text-muted text-small').find('a') | |
best_movie = best_movie_soup.get_text().replace('\n','').strip() | |
# open the star's profile page | |
url = "https://www.imdb.com" + name_soup["href"] | |
gender = 'Female' if (star.find('p').get_text().count('Actress')) > 0 else 'Male' | |
star_page = BeautifulSoup(requests.get(url).content, 'html.parser') | |
# find the star's birth date | |
try: | |
year_born = star_page.find(id='name-born-info').find('time').find_all('a')[1].get_text() | |
except Exception: | |
year_born = 0 | |
# find the star's first movie's title and date | |
try: | |
year_first_movie_soup = star_page.find(id='filmography').find_all('div')[1].find_all('div', class_='filmo-row')[-1] | |
year_first_movie = year_first_movie_soup.find('span').get_text().replace('\n','').replace('\xa0', '').split('-')[0].split('/')[0] | |
year_first_movie = 2020 if year_first_movie == '' else year_first_movie | |
first_movie = year_first_movie_soup.find('a').get_text().replace('\n','').replace('\xa0', '') | |
except Exception: | |
year_first_movie = 2020 | |
first_movie = '' | |
# find the star's credits | |
credits = star_page.find(id='filmography').find('div').get_text().split('(')[1].split('credits')[0].strip() | |
# store all data in a dictionary | |
star_dict = {'name': name, | |
'rank': rank, | |
'gender': gender, | |
'url': url, | |
'year_born':year_born, | |
'first_movie': first_movie, | |
'year_first_movie': year_first_movie, | |
'credits':credits} | |
star_table.append(star_dict) | |
rank = rank + 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment