-
-
Save eparikh/36e08f304cba67428c56345ea28bd93d to your computer and use it in GitHub Desktop.
Using IMDbPY to get information about TV shows using show titles gathered from Wikipedia as the search term.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
wiki_fields = ["title", "original_network", "running_time", "url", "start_date"] | |
imdb_fields = ["rating", "votes", "series years", "genres"] | |
wiki_imdb_df = pd.DataFrame(columns = wiki_fields + imdb_fields) | |
def populate_wiki_imdb__df(row): | |
global wiki_imdb_df | |
global i | |
# initialize the row dictionary with the fields coming from wiki | |
# then update the dict to be all NaN for imdb fields | |
d = dict(row) | |
d.update(dict.fromkeys(imdb_fields, [np.NaN])) | |
# parse year from row start_date | |
year = year_re.search(d["start_date"]).group() | |
# get show | |
try: | |
temp = i.search_movie(d["title"]) | |
#make sure searched show is within x years of the one from Wikipedia | |
for show in temp: | |
if abs(int(show["year"]) - int(year)) <= 3: | |
temp = show | |
break | |
else: | |
continue | |
if(not isinstance(temp, imdb.Movie.Movie)): | |
return None | |
i.update(temp) | |
for field in imdb_fields: | |
if field in temp.keys(): | |
if field == "genres": | |
d[field] = ["|".join(temp[field])] | |
else: | |
d[field] = [temp[field]] | |
else: | |
d[field] = [np.NaN] | |
except: | |
pass | |
wiki_imdb_df = wiki_imdb_df.append(pd.DataFrame(d), ignore_index=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment