Skip to content

Instantly share code, notes, and snippets.

@zperk13
Last active August 7, 2019 22:34
Show Gist options
  • Save zperk13/c4b2f9775bf6fd7e5ee456222c72465e to your computer and use it in GitHub Desktop.
Save zperk13/c4b2f9775bf6fd7e5ee456222c72465e to your computer and use it in GitHub Desktop.
You give it multiple imdb title id's (eg: tt4154796) and it will look at the full cast and crew and find people who were in all of them
import re
import requests
from time import sleep
def unique(iter):
result = []
for x in iter:
if x not in result:
result.append(x)
return result
class Movie(object):
def __init__(self, id):
self.id = id
self.cast_url = f'https://www.imdb.com/title/{id}/fullcredits'
self.cast_txt = requests.get(self.cast_url).text
self.title = re.findall(r'>.+?</a>', re.findall(fr"<a href=\"/title/{id}/\"\sitemprop='url'>.+?</a>", self.cast_txt)[0])[0][1:-4]
self.cast = unique([re.findall(r'/"\n>.+?\n</a>', x)[0][5:-5].lower() for x in re.findall(r'<a href="/name/.+?/"\s>.+?\s</a>', self.cast_txt)])
def print_cast(self):
print(' ', ', '.join([' '.join([x.capitalize() for x in y.split(' ')]) for y in self.cast]))
movies = []
num_of_movies = int(input('How many movies/episodes/shows do you want to check? '))
if num_of_movies < 2:
raise ValueError('Number of movies must be at least 2')
for x in range(1, num_of_movies+1):
movies.append(Movie(input(f'Please type in the IMDB title id for #{x}: ')))
print(movies[-1].title, 'added.', end=' ')
movies[-1].print_cast()
print('\n\n')
duplicates = []
for person in movies[0].cast:
person_in_all = True
for movie in movies[1:]:
if person not in movie.cast:
person_in_all = False
break
if person_in_all:
duplicates.append(person)
duplicates = [' '.join([x.capitalize() for x in y.split(' ')]) for y in duplicates]
num_of_duplicates = len(duplicates)
print('\n\n')
if num_of_duplicates == 0:
print('No duplicates found')
elif num_of_duplicates == 1:
print('One duplicate found:')
print(f' {duplicates[0]}')
else:
print(f'{num_of_duplicates} duplicates found:')
sleep(3)
for dupe in duplicates:
print(' ', dupe)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment