Skip to content

Instantly share code, notes, and snippets.

@wibowotangara
Created January 18, 2024 07:53
Show Gist options
  • Save wibowotangara/310bce2fff53a175b2dbc18900c2f9ab to your computer and use it in GitHub Desktop.
Save wibowotangara/310bce2fff53a175b2dbc18900c2f9ab to your computer and use it in GitHub Desktop.
import pandas as pd
pd.set_option('display.max_columns',1000)
pd.set_option('display.max_rows',1000)
def inspect_data(df, col=None, n_rows=5):
print(f'data shape: {df.shape}')
if col is None:
col = df.columns
display(df[col].head(n_rows))
def check_missing(df, cut_off=0, sort=True):
freq=df.isnull().sum()
percent=df.isnull().sum()/df.shape[0]*100
types=df.dtypes
unique=df.apply(pd.unique).to_frame(name='Unique Values')['Unique Values']
unique_counts = df.nunique(dropna=False)
df_miss=pd.DataFrame({'missing_percentage':percent,'missing_frequency':freq,'types':types,'count_value':unique_counts,
'unique_values':unique})
if sort:df_miss.sort_values(by='missing_frequency',ascending=False, inplace=True)
return df_miss[df_miss['missing_percentage']>=cut_off]
df = pd.read_csv('your_data_here.csv')
inspect_data(df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment