Skip to content

Instantly share code, notes, and snippets.

View dee-walia20's full-sized avatar
🎯
Focusing

Deepak Walia dee-walia20

🎯
Focusing
  • Accenture
  • Bangalore
View GitHub Profile
@dee-walia20
dee-walia20 / model_test.py
Last active February 18, 2020 16:21
model_perf
#Prediction from test dataset
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, recall_score
model_name=[]
precision_array=[]
recall_array=[]
f1_array=[]
test_time=[]
print("Classifiation Report\n")
print("*****************************************************")
for i, pipeline in enumerate(pipelines):
@dee-walia20
dee-walia20 / model_train.py
Created February 18, 2020 16:13
model_build
#Creating a list of Pipeline with well-known ML models
from sklearn.pipeline import make_pipeline
from sklearn.naive_bayes import MultinomialNB,ComplementNB
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.tree import DecisionTreeClassifier
@dee-walia20
dee-walia20 / Noise_word_removal.py
Last active February 18, 2020 16:10
Data Cleaning_2
freq_words=df.Treated_Tweet.str.split(expand=True).stack().value_counts()[:10]
freq_words=list(freq_words.index)
rare_words=df.Treated_Tweet.str.split(expand=True).stack().value_counts()
rare_words=list(rare_words.loc[lambda x: x==1].index)
#Remove Frequent and Rare words
def remove_noise_words(text):
edited_text=text.split()
edited_text=[word for word in edited_text if word not in freq_words]
@dee-walia20
dee-walia20 / string_cleaning.py
Last active August 31, 2020 18:46
Data Cleaning
import nltk
import string
import re
from nltk.stem.snowball import SnowballStemmer
stopwords=nltk.corpus.stopwords.words('english')
snowball_stemmer=SnowballStemmer(language='english')
def treat_text(text):
edited_text=re.sub('\W'," ",text) #replace any sumbol with whitespace