lievcin/setting_up.py

## setting_up.py
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(data['review_text'], data['sentiment'], test_size=0.33, random_state=1)

# Initialize a CountVectorizer and Tfidf objects
count_vectorizer = CountVectorizer(stop_words='english')
tfidf_vectorizer = TfidfVectorizer(stop_words="english", max_df=0.7)

# Transform the training data using only the 'text' column values: count_train
count_train = count_vectorizer.fit_transform(X_train)
tfidf_train = tfidf_vectorizer.fit_transform(X_train)

# Transform the test data using only the 'review_text' column values: count_test
count_test = count_vectorizer.transform(X_test)
tfidf_test = tfidf_vectorizer.transform(X_test)
	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.model_selection import train_test_split

	X_train, X_test, y_train, y_test = train_test_split(data['review_text'], data['sentiment'], test_size=0.33, random_state=1)

	# Initialize a CountVectorizer and Tfidf objects
	count_vectorizer = CountVectorizer(stop_words='english')
	tfidf_vectorizer = TfidfVectorizer(stop_words="english", max_df=0.7)

	# Transform the training data using only the 'text' column values: count_train
	count_train = count_vectorizer.fit_transform(X_train)
	tfidf_train = tfidf_vectorizer.fit_transform(X_train)

	# Transform the test data using only the 'review_text' column values: count_test
	count_test = count_vectorizer.transform(X_test)
	tfidf_test = tfidf_vectorizer.transform(X_test)