Skip to content

Instantly share code, notes, and snippets.

View letthedataconfess's full-sized avatar
🎯
Focusing

Let The Data Confess letthedataconfess

🎯
Focusing
View GitHub Profile
import pandas as pd
df = pd.read_csv('./amazonreviews.tsv',sep='\t')
df.head()
#importing all the required libraries
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk import tokenize
!pip install contractions
import contractions
# Expanding contractions
def con(text):
expand=contractions.fix(text)
return expand
df.review=df.review.apply(con)
df.review=df.review.apply(lambda x: x.lower())
import string
def remove_punc(text):
for i in string.punctuation:
text=text.replace(i,' ')
return text
df.review=df.review.apply(remove_punc)
@letthedataconfess
letthedataconfess / Tokenization
Created January 27, 2021 06:48
Sentiment Analysis
nltk.download('punkt')
df['review']=df.review.apply(word_tokenize)
df['review'][0]
@letthedataconfess
letthedataconfess / Lemmatization
Created January 27, 2021 06:56
sentiment analysis
nltk.download('wordnet')
lemmatizer=WordNetLemmatizer()
df['review']=df.review.apply(lambda x:[lemmatizer.lemmatize(word) for word in x])
@letthedataconfess
letthedataconfess / Lemmatization
Created January 27, 2021 07:00
Sentiment analysis
df.review= df.review.astype(str)
@letthedataconfess
letthedataconfess / train-test split
Created January 27, 2021 07:03
sentiment analysis
x = df.iloc[:,1].values # Features variable
y = df.iloc[:,0].values # Target variable
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,random_state=0,test_size=0.2)
@letthedataconfess
letthedataconfess / Tf-df vectorization
Created January 27, 2021 07:05
sentiment analysis
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf=TfidfVectorizer()
x_train_tfidf = tfidf.fit_transform(x_train)
x_test_tfidf = tfidf.transform(x_test)