Created Apr 1, 2021
from sklearn.feature_extraction import text
ted_talks = data["transcript"].tolist()
bi_tfidf = text.TfidfVectorizer(input=ted_talks, stop_words="english", ngram_range=(1,2))
bi_matrix = bi_tfidf.fit_transform(ted_talks)
uni_tfidf = text.TfidfVectorizer(input=ted_talks, stop_words="english")
uni_matrix = uni_tfidf.fit_transform(ted_talks)
from sklearn.metrics.pairwise import cosine_similarity
bi_sim = cosine_similarity(bi_matrix)
uni_sim = cosine_similarity(uni_matrix)
