Skip to content

Instantly share code, notes, and snippets.

@wibowotangara
Created January 18, 2024 07:17
Show Gist options
  • Save wibowotangara/89c231cd470b140031dbb98a53a68554 to your computer and use it in GitHub Desktop.
Save wibowotangara/89c231cd470b140031dbb98a53a68554 to your computer and use it in GitHub Desktop.
def check_missing(df, cut_off=0, sort=True):
freq = df.isnull().sum()
percent = df.isnull().sum() / df.shape[0] * 100
types = df.dtypes
unique = df.apply(pd.unique).to_frame(name='Unique Values')['Unique Values']
unique_counts = df.nunique(dropna=False)
df_miss = pd.DataFrame({
'missing_percentage': percent,
'missing_frequency': freq,
'types': types,
'count_value': unique_counts,
'unique_values': unique
})
if sort:
df_miss.sort_values(by='missing_frequency', ascending=False, inplace=True)
return df_miss[df_miss['missing_percentage'] >= cut_off]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment