amankharwal/Title Generator Secret

## Title Generator
def category_extractor(data):
    i_d = [data['items'][i]['id'] for i in range(len(data['items']))]
    title = [data['items'][i]['snippet']["title"] for i in range(len(data['items']))]
    i_d = list(map(int, i_d))
    category = zip(i_d, title)
    category = dict(category)
    return category

#create a new category column by mapping the category names to their id
df1['category_title'] = df1['category_id'].map(category_extractor(data1))
df2['category_title'] = df2['category_id'].map(category_extractor(data2))
df3['category_title'] = df3['category_id'].map(category_extractor(data3))

#join the dataframes
df = pd.concat([df1, df2, df3], ignore_index=True)

#drop rows based on duplicate videos
df = df.drop_duplicates('video_id')

#collect only titles of entertainment videos
#feel free to use any category of video that you want
entertainment = df[df['category_title'] == 'Entertainment']['title']
entertainment = entertainment.tolist()


#remove punctuations and convert text to lowercase
def clean_text(text):
    text = ''.join(e for e in text if e not in string.punctuation).lower()

    text = text.encode('utf8').decode('ascii', 'ignore')
    return text

corpus = [clean_text(e) for e in entertainment]
	def category_extractor(data):
	i_d = [data['items'][i]['id'] for i in range(len(data['items']))]
	title = [data['items'][i]['snippet']["title"] for i in range(len(data['items']))]
	i_d = list(map(int, i_d))
	category = zip(i_d, title)
	category = dict(category)
	return category

	#create a new category column by mapping the category names to their id
	df1['category_title'] = df1['category_id'].map(category_extractor(data1))
	df2['category_title'] = df2['category_id'].map(category_extractor(data2))
	df3['category_title'] = df3['category_id'].map(category_extractor(data3))

	#join the dataframes
	df = pd.concat([df1, df2, df3], ignore_index=True)

	#drop rows based on duplicate videos
	df = df.drop_duplicates('video_id')

	#collect only titles of entertainment videos
	#feel free to use any category of video that you want
	entertainment = df[df['category_title'] == 'Entertainment']['title']
	entertainment = entertainment.tolist()



	#remove punctuations and convert text to lowercase
	def clean_text(text):
	text = ''.join(e for e in text if e not in string.punctuation).lower()

	text = text.encode('utf8').decode('ascii', 'ignore')
	return text

	corpus = [clean_text(e) for e in entertainment]