vporiz/TF_IDF_Sklearn.py

## TF_IDF_Sklearn.py
from sklearn.feature_extraction.text import TfidfVectorizer
corpus = [
        'This is the first document.',
        'This document is the second document.',
        'And this is the third one.',
        'Is this the first document?',
    ]
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(corpus)
print(vectorizer.get_feature_names())
# returns ['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third', 'this']

print(X.shape)
# returns (4, 9)
	from sklearn.feature_extraction.text import TfidfVectorizer
	corpus = [
	'This is the first document.',
	'This document is the second document.',
	'And this is the third one.',
	'Is this the first document?',
	]
	vectorizer = TfidfVectorizer()
	X = vectorizer.fit_transform(corpus)
	print(vectorizer.get_feature_names())
	# returns ['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third', 'this']

	print(X.shape)
	# returns (4, 9)