This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
plt.figure(figsize=(10,7)) | |
sns.countplot(x='label', data=df) | |
plt.xlabel('News Classification') | |
plt.ylabel('Count') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df.isna().any() | |
df.dropna(inplace=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for i in range(0,news.shape[0]): | |
title = re.sub(pattern='[^a-zA-Z]', repl=' ', string=news.title[i]) | |
title = title.lower() | |
words = title.split() | |
words = [word for word in words if word not in set(stopwords.words('english'))] | |
words = [ps.stem(word) for word in words] | |
title = ' '.join(words) | |
corpus.append(title) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cv = CountVectorizer(max_features=5000, ngram_range=(1,3)) | |
X = cv.fit_transform(corpus).toarray() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.model_selection import train_test_split | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.naive_bayes import MultinomialNBnb_classifier = MultinomialNB()nb_classifier.fit(X_train, y_train) | |
nb_y_pred = nb_classifier.predict(X_test) | |
from sklearn.metrics import accuracy_score, precision_score, recall_score | |
score1 = accuracy_score(y_test, nb_y_pred) | |
score2 = precision_score(y_test, nb_y_pred) | |
score3 = recall_score(y_test, nb_y_pred) | |
print("---- Scores ----") | |
print("Accuracy score is: {}%".format(round(score1*100,2))) | |
print("Precision score is: {}".format(round(score2,2))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.linear_model import LogisticRegression | |
lr_classifier = LogisticRegression(random_state=0)lr_classifier.fit(X_train, y_train) | |
lr_y_pred = lr_classifier.predict(X_test) | |
from sklearn.metrics import accuracy_score, precision_score, recall_score | |
score1 = accuracy_score(y_test, lr_y_pred) | |
score2 = precision_score(y_test, lr_y_pred) | |
score3 = recall_score(y_test, lr_y_pred) | |
print("---- Scores ----") | |
print("Accuracy score is: {}%".format(round(score1*100,2))) | |
print("Precision score is: {}".format(round(score2,2))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.metrics import confusion_matrix | |
nb_cm = confusion_matrix(y_test, nb_y_pred) | |
plt.figure(figsize=(10,7)) | |
sns.heatmap(data=nb_cm, annot=True, cmap="Blues", xticklabels=['Real', 'Fake'], yticklabels=['Real', 'Fake']) | |
plt.xlabel('Predicted values') | |
plt.ylabel('Actual values') | |
plt.title('Confusion Matrix for Multinomial Naive Bayes Algorithm') | |
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.metrics import confusion_matrix | |
lr_cm = confusion_matrix(y_test, lr_y_pred) | |
plt.figure(figsize=(10,7)) | |
sns.heatmap(data=nb_cm, annot=True, cmap="Blues", xticklabels=['Real', 'Fake'], yticklabels=['Real', 'Fake']) | |
plt.xlabel('Predicted values') | |
plt.ylabel('Actual values') | |
plt.title('Confusion Matrix for Logistic Regression Algorithm') | |
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def fake_news(sample_news): | |
sample_news = re.sub(pattern='[^a-zA-Z]',repl=' ', string=sample_news) | |
sample_news = sample_news.lower() | |
sample_news_words = sample_news.split() | |
sample_news_words = [word for word in sample_news_words if not word in set(stopwords.words('english'))] | |
ps = PorterStemmer() | |
final_news = [ps.stem(word) for word in sample_news_words] | |
final_news = ' '.join(final_news) | |
temp = cv.transform([final_news]).toarray() |
OlderNewer