sharma-ji/gist:a0ea008e54768b874b8004496f4450b4

## gistfile1.txt
from sklearn.feature_extraction.text import CountVectorizer

# list of text documents
text = ["this is test doc", "this is another test doc"]

# create the transform
vector = CountVectorizer()

# tokenize and build vocab
vector.fit(text)

# Print the summary
print(vectorizer.vocabulary_)

# Transform document
X_Train = vector.transform(text)

# Print summary of transformed vector
print(X_Train.shape)
print(type(X_Train))
	from sklearn.feature_extraction.text import CountVectorizer

	# list of text documents
	text = ["this is test doc", "this is another test doc"]

	# create the transform
	vector = CountVectorizer()

	# tokenize and build vocab
	vector.fit(text)

	# Print the summary
	print(vectorizer.vocabulary_)

	# Transform document
	X_Train = vector.transform(text)

	# Print summary of transformed vector
	print(X_Train.shape)
	print(type(X_Train))