Instantly share code, notes, and snippets.

@eparikh /get_imdb_data.py Secret
Last active Feb 19, 2017

Embed
What would you like to do?
Using IMDbPY to get information about TV shows using show titles gathered from Wikipedia as the search term.
wiki_fields = ["title", "original_network", "running_time", "url", "start_date"]
imdb_fields = ["rating", "votes", "series years", "genres"]
wiki_imdb_df = pd.DataFrame(columns = wiki_fields + imdb_fields)
def populate_wiki_imdb__df(row):
global wiki_imdb_df
global i
# initialize the row dictionary with the fields coming from wiki
# then update the dict to be all NaN for imdb fields
d = dict(row)
d.update(dict.fromkeys(imdb_fields, [np.NaN]))
# parse year from row start_date
year = year_re.search(d["start_date"]).group()
# get show
try:
temp = i.search_movie(d["title"])
#make sure searched show is within x years of the one from Wikipedia
for show in temp:
if abs(int(show["year"]) - int(year)) <= 3:
temp = show
break
else:
continue
if(not isinstance(temp, imdb.Movie.Movie)):
return None
i.update(temp)
for field in imdb_fields:
if field in temp.keys():
if field == "genres":
d[field] = ["|".join(temp[field])]
else:
d[field] = [temp[field]]
else:
d[field] = [np.NaN]
except:
pass
wiki_imdb_df = wiki_imdb_df.append(pd.DataFrame(d), ignore_index=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment