GeorgeSeif/scikit_learn_1.py

## scikit_learn_1.py
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

def get_tf_idf(vectorizer):
  feature_names = vectorizer.get_feature_names()
  dense_vec = vectors.todense()
  dense_list = dense_vec.tolist()
  tfidf_data = pd.DataFrame(dense_list, columns=feature_names)
  return tfidf_data


vectorizer = TfidfVectorizer()

doc_1 = "TF-IDF uses statistics to measure how important a word is to " \
        "a particular document"
doc_2 = "The TF-IDF is perfectly balanced, considering both local and global " \
        "levels of statistics for the target word."
doc_3 = "Words that occur more frequently in a document are weighted higher, " \
        "but only if they're more rare within the whole document."
documents_list = [doc_1, doc_2, doc_3]

vectors = vectorizer.fit_transform(documents_list)

tfidf_data = get_tf_idf(vectorizer)

print(tfidf_data)
# Prints the TF-IDF data for all words across all documents
	import pandas as pd
	from sklearn.feature_extraction.text import TfidfVectorizer

	def get_tf_idf(vectorizer):
	feature_names = vectorizer.get_feature_names()
	dense_vec = vectors.todense()
	dense_list = dense_vec.tolist()
	tfidf_data = pd.DataFrame(dense_list, columns=feature_names)
	return tfidf_data


	vectorizer = TfidfVectorizer()

	doc_1 = "TF-IDF uses statistics to measure how important a word is to " \
	"a particular document"
	doc_2 = "The TF-IDF is perfectly balanced, considering both local and global " \
	"levels of statistics for the target word."
	doc_3 = "Words that occur more frequently in a document are weighted higher, " \
	"but only if they're more rare within the whole document."
	documents_list = [doc_1, doc_2, doc_3]

	vectors = vectorizer.fit_transform(documents_list)

	tfidf_data = get_tf_idf(vectorizer)

	print(tfidf_data)
	# Prints the TF-IDF data for all words across all documents