Skip to content

Instantly share code, notes, and snippets.

@okanyenigun
Created August 15, 2022 10:57
Show Gist options
  • Save okanyenigun/1cf0b57782338f0e3b68472b8e7ac738 to your computer and use it in GitHub Desktop.
Save okanyenigun/1cf0b57782338f0e3b68472b8e7ac738 to your computer and use it in GitHub Desktop.
naive bayes
import nltk
nltk.download('stopwords')
import pandas as pd
import numpy as np
from nltk.corpus import stopwords
import string
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
df = pd.read_csv("spam.csv",encoding='iso8859_14')
df.drop(labels=df.columns[2:],axis=1,inplace=True)
df.columns=['target','text']
def clean_util(text):
punc_rmv = [char for char in text if char not in string.punctuation]
punc_rmv = "".join(punc_rmv)
stopword_rmv = [w.strip().lower() for w in punc_rmv.split() if w.strip().lower() not in stopwords.words('english')]
return " ".join(stopword_rmv)
df['text'] = df['text'].apply(clean_util)
cv = CountVectorizer()
X = cv.fit_transform(df['text']).toarray()
lb = LabelBinarizer()
y = lb.fit_transform(df['target']).ravel()
# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X,y)
clf = MultinomialNB(alpha=1)
clf.fit(X,y)
y_pred = clf.predict(X_test)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment