Skip to content

Instantly share code, notes, and snippets.

plt.figure(figsize=(10,7))
sns.countplot(x='label', data=df)
plt.xlabel('News Classification')
plt.ylabel('Count')
df.isna().any()
df.dropna(inplace=True)
for i in range(0,news.shape[0]):
title = re.sub(pattern='[^a-zA-Z]', repl=' ', string=news.title[i])
title = title.lower()
words = title.split()
words = [word for word in words if word not in set(stopwords.words('english'))]
words = [ps.stem(word) for word in words]
title = ' '.join(words)
corpus.append(title)
cv = CountVectorizer(max_features=5000, ngram_range=(1,3))
X = cv.fit_transform(corpus).toarray()
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0)
from sklearn.naive_bayes import MultinomialNBnb_classifier = MultinomialNB()nb_classifier.fit(X_train, y_train)
nb_y_pred = nb_classifier.predict(X_test)
from sklearn.metrics import accuracy_score, precision_score, recall_score
score1 = accuracy_score(y_test, nb_y_pred)
score2 = precision_score(y_test, nb_y_pred)
score3 = recall_score(y_test, nb_y_pred)
print("---- Scores ----")
print("Accuracy score is: {}%".format(round(score1*100,2)))
print("Precision score is: {}".format(round(score2,2)))
from sklearn.linear_model import LogisticRegression
lr_classifier = LogisticRegression(random_state=0)lr_classifier.fit(X_train, y_train)
lr_y_pred = lr_classifier.predict(X_test)
from sklearn.metrics import accuracy_score, precision_score, recall_score
score1 = accuracy_score(y_test, lr_y_pred)
score2 = precision_score(y_test, lr_y_pred)
score3 = recall_score(y_test, lr_y_pred)
print("---- Scores ----")
print("Accuracy score is: {}%".format(round(score1*100,2)))
print("Precision score is: {}".format(round(score2,2)))
from sklearn.metrics import confusion_matrix
nb_cm = confusion_matrix(y_test, nb_y_pred)
plt.figure(figsize=(10,7))
sns.heatmap(data=nb_cm, annot=True, cmap="Blues", xticklabels=['Real', 'Fake'], yticklabels=['Real', 'Fake'])
plt.xlabel('Predicted values')
plt.ylabel('Actual values')
plt.title('Confusion Matrix for Multinomial Naive Bayes Algorithm')
plt.show()
from sklearn.metrics import confusion_matrix
lr_cm = confusion_matrix(y_test, lr_y_pred)
plt.figure(figsize=(10,7))
sns.heatmap(data=nb_cm, annot=True, cmap="Blues", xticklabels=['Real', 'Fake'], yticklabels=['Real', 'Fake'])
plt.xlabel('Predicted values')
plt.ylabel('Actual values')
plt.title('Confusion Matrix for Logistic Regression Algorithm')
plt.show()
def fake_news(sample_news):
sample_news = re.sub(pattern='[^a-zA-Z]',repl=' ', string=sample_news)
sample_news = sample_news.lower()
sample_news_words = sample_news.split()
sample_news_words = [word for word in sample_news_words if not word in set(stopwords.words('english'))]
ps = PorterStemmer()
final_news = [ps.stem(word) for word in sample_news_words]
final_news = ' '.join(final_news)
temp = cv.transform([final_news]).toarray()