Skip to content

Instantly share code, notes, and snippets.

@eparikh
Last active February 19, 2017 14:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save eparikh/36e08f304cba67428c56345ea28bd93d to your computer and use it in GitHub Desktop.
Save eparikh/36e08f304cba67428c56345ea28bd93d to your computer and use it in GitHub Desktop.
Using IMDbPY to get information about TV shows using show titles gathered from Wikipedia as the search term.
wiki_fields = ["title", "original_network", "running_time", "url", "start_date"]
imdb_fields = ["rating", "votes", "series years", "genres"]
wiki_imdb_df = pd.DataFrame(columns = wiki_fields + imdb_fields)
def populate_wiki_imdb__df(row):
global wiki_imdb_df
global i
# initialize the row dictionary with the fields coming from wiki
# then update the dict to be all NaN for imdb fields
d = dict(row)
d.update(dict.fromkeys(imdb_fields, [np.NaN]))
# parse year from row start_date
year = year_re.search(d["start_date"]).group()
# get show
try:
temp = i.search_movie(d["title"])
#make sure searched show is within x years of the one from Wikipedia
for show in temp:
if abs(int(show["year"]) - int(year)) <= 3:
temp = show
break
else:
continue
if(not isinstance(temp, imdb.Movie.Movie)):
return None
i.update(temp)
for field in imdb_fields:
if field in temp.keys():
if field == "genres":
d[field] = ["|".join(temp[field])]
else:
d[field] = [temp[field]]
else:
d[field] = [np.NaN]
except:
pass
wiki_imdb_df = wiki_imdb_df.append(pd.DataFrame(d), ignore_index=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment