Skip to content

Instantly share code, notes, and snippets.

@kaish114
Created April 2, 2020 12:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kaish114/dae810fd7381f648f7fc3d9c76621a2f to your computer and use it in GitHub Desktop.
Save kaish114/dae810fd7381f648f7fc3d9c76621a2f to your computer and use it in GitHub Desktop.
EDA
#first we need to import required libraries
#basic libraries
import pandas as pd
import numpy as np
#misc
import gc
import time
import warnings
#statistics
from scipy.misc import imread
from scipy import sparse
import scipy.stats as ss
#visualization
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns
from wordcloud import WordCloud ,STOPWORDS
from PIL import Image
import matplotlib_venn as venn
#natural language processing
import string
import re #for regex
import nltk
from nltk.corpus import stopwords
import spacy
from nltk import pos_tag
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.tokenize import word_tokenize
# Tweet tokenizer does not split at apostophes which is what we want
from nltk.tokenize import TweetTokenizer
#FeatureEngineering
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer, HashingVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_X_y, check_is_fitted
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment