Skip to content

Instantly share code, notes, and snippets.

@fuwiak
Created August 26, 2020 15:38
Show Gist options
  • Save fuwiak/96f2237cd38cb9a884f686cdf065a34b to your computer and use it in GitHub Desktop.
Save fuwiak/96f2237cd38cb9a884f686cdf065a34b to your computer and use it in GitHub Desktop.
def dataframe_metainformation(df):
meta = dict()
descr = pd.DataFrame({'dtype': df.dtypes, 'NAs': df.isna().sum()})
categorical_features = descr.loc[descr['dtype'] == 'object'].index.values.tolist()
numerical_features = descr.loc[descr['dtype'] != 'object'].index.values.tolist()
numerical_features_na = descr.loc[(descr['dtype'] != 'object') & (descr['NAs'] > 0)].index.values.tolist()
categorical_features_na = descr.loc[(descr['dtype'] == 'object') & (descr['NAs'] > 0)].index.values.tolist()
complete_features = descr.loc[descr['NAs'] == 0].index.values.tolist()
meta['description'] = descr
meta['categorical_features'] = categorical_features
meta['categorical_features'] = categorical_features
meta['categorical_features_na'] = categorical_features_na
meta['numerical_features'] = numerical_features
meta['numerical_features_na'] = numerical_features_na
meta['complete_features'] = complete_features
return meta
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment