Using IMDbPY to get information about TV shows using show titles gathered from Wikipedia as the search term.
wiki_fields = ["title", "original_network", "running_time", "url", "start_date"] | |
imdb_fields = ["rating", "votes", "series years", "genres"] | |
wiki_imdb_df = pd.DataFrame(columns = wiki_fields + imdb_fields) | |
def populate_wiki_imdb__df(row): | |
global wiki_imdb_df | |
global i | |
# initialize the row dictionary with the fields coming from wiki | |
# then update the dict to be all NaN for imdb fields | |
d = dict(row) | |
d.update(dict.fromkeys(imdb_fields, [np.NaN])) | |
# parse year from row start_date | |
year = year_re.search(d["start_date"]).group() | |
# get show | |
try: | |
temp = i.search_movie(d["title"]) | |
#make sure searched show is within x years of the one from Wikipedia | |
for show in temp: | |
if abs(int(show["year"]) - int(year)) <= 3: | |
temp = show | |
break | |
else: | |
continue | |
if(not isinstance(temp, imdb.Movie.Movie)): | |
return None | |
i.update(temp) | |
for field in imdb_fields: | |
if field in temp.keys(): | |
if field == "genres": | |
d[field] = ["|".join(temp[field])] | |
else: | |
d[field] = [temp[field]] | |
else: | |
d[field] = [np.NaN] | |
except: | |
pass | |
wiki_imdb_df = wiki_imdb_df.append(pd.DataFrame(d), ignore_index=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment