Skip to content

Instantly share code, notes, and snippets.

View Davisy's full-sized avatar
🎯
Focusing

Davis David Davisy

🎯
Focusing
View GitHub Profile
@Davisy
Davisy / blog_grid.py
Created May 6, 2019 16:04 — forked from mneedham/blog_grid.py
Spooky Author Identification - Grid search for VotingClassifier
import pandas as pd
from sklearn import linear_model
from sklearn.ensemble import VotingClassifier
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import GridSearchCV
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
Y_COLUMN = "author"
TEXT_COLUMN = "text"
# import important modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams["axes.labelsize"] = 18
import warnings
warnings.filterwarnings('ignore') %matplotlib inline
# Import data
data = pd.read_csv('../data/financial_inclusion.csv')
# print shape
print('train data shape :', data.shape)
# Inspect Data by showing the first five rows
data.head()
## show Some information about the dataset
print(data.info())
# Frequency table of a variable will give us the count of each category in that Target variable.
data['bank_account'].value_counts()
# Explore Target distribution
sns.catplot(x="bank_account", kind="count", data= data)
# Explore Country distribution
sns.catplot(x="country", kind="count", data=data)
# Explore Country distribution
sns.catplot(x="country", kind="count", data=data)