Skip to content

Instantly share code, notes, and snippets.

@NMZivkovic
Created September 23, 2018 12:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save NMZivkovic/48d92ab88f492b48a2330f8862d98962 to your computer and use it in GitHub Desktop.
Save NMZivkovic/48d92ab88f492b48a2330f8862d98962 to your computer and use it in GitHub Desktop.
import numpy as np
import pandas as pd
import missingno as msno
import seaborn as sn
import matplotlib.pyplot as plt
data = pd.read_csv('Credit_Card_Applications.csv')
data = data.drop(["CustomerID"],axis=1)
# Missing data detection
msno.matrix(data,figsize=(10,3))
# Outliners detection and class imbalance
continiousData = pd.DataFrame()
continousVariableList = ["A2", "A3", "A7", "A10", "A13", "A14"]
for var in continousVariableList:
continiousData[var] = data[var].astype("float32")
fig, axes = plt.subplots(nrows=1,ncols=1)
fig.set_size_inches(10, 20)
sn.boxplot(data=continiousData,orient="v",ax=axes)
# Correlation analysis
corrMatt = data.corr()
mask = np.array(corrMatt)
mask[np.tril_indices_from(mask)] = False
fig,ax= plt.subplots()
fig.set_size_inches(20,10)
sn.heatmap(corrMatt, mask=mask,vmax=.8, square=True,annot=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment