eparikh/get_imdb_data.py Secret

## get_imdb_data.py
wiki_fields = ["title", "original_network", "running_time", "url", "start_date"]
imdb_fields = ["rating", "votes", "series years", "genres"]
wiki_imdb_df = pd.DataFrame(columns = wiki_fields + imdb_fields)

def populate_wiki_imdb__df(row):
    global wiki_imdb_df
    global i

    # initialize the row dictionary with the fields coming from wiki
    # then update the dict to be all NaN for imdb fields
    d = dict(row)
    d.update(dict.fromkeys(imdb_fields, [np.NaN]))

    # parse year from row start_date
    year = year_re.search(d["start_date"]).group()

    # get show
    try:
        temp = i.search_movie(d["title"])

        #make sure searched show is within x years of the one from Wikipedia
        for show in temp:
            if abs(int(show["year"]) - int(year)) <= 3:
                temp = show
                break
            else:
                continue

        if(not isinstance(temp, imdb.Movie.Movie)):
            return None

        i.update(temp)

        for field in imdb_fields:
            if field in temp.keys():
                if field == "genres":
                    d[field] = ["|".join(temp[field])]
                else:
                    d[field] = [temp[field]]
            else:
                d[field] = [np.NaN]
    except:
        pass

    wiki_imdb_df = wiki_imdb_df.append(pd.DataFrame(d), ignore_index=True)
	wiki_fields = ["title", "original_network", "running_time", "url", "start_date"]
	imdb_fields = ["rating", "votes", "series years", "genres"]
	wiki_imdb_df = pd.DataFrame(columns = wiki_fields + imdb_fields)

	def populate_wiki_imdb__df(row):
	global wiki_imdb_df
	global i

	# initialize the row dictionary with the fields coming from wiki
	# then update the dict to be all NaN for imdb fields
	d = dict(row)
	d.update(dict.fromkeys(imdb_fields, [np.NaN]))

	# parse year from row start_date
	year = year_re.search(d["start_date"]).group()

	# get show
	try:
	temp = i.search_movie(d["title"])

	#make sure searched show is within x years of the one from Wikipedia
	for show in temp:
	if abs(int(show["year"]) - int(year)) <= 3:
	temp = show
	break
	else:
	continue

	if(not isinstance(temp, imdb.Movie.Movie)):
	return None

	i.update(temp)

	for field in imdb_fields:
	if field in temp.keys():
	if field == "genres":
	d[field] = ["\|".join(temp[field])]
	else:
	d[field] = [temp[field]]
	else:
	d[field] = [np.NaN]
	except:
	pass

	wiki_imdb_df = wiki_imdb_df.append(pd.DataFrame(d), ignore_index=True)