Last active
August 11, 2020 09:40
-
-
Save ryanbehdad/913afe20ab4459bc74d703654ec6aab1 to your computer and use it in GitHub Desktop.
Print a summary of a pandas dataframe and its columns
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ======================================================================= | |
# Print a summary of a pandas dataframe and its columns | |
# ======================================================================= | |
def df_summary(df): | |
print(f'Dataframe has {df.shape[0]:,} rows and {df.shape[1]:,} columns') | |
if len(df) > 1: | |
summary = pd.DataFrame(df.dtypes, columns=['dtype']).reset_index() | |
summary.rename(columns={'index': 'feature'}, inplace=True) | |
summary['missing'] = df.isnull().sum().values | |
summary['uniques'] = df.nunique().values | |
summary['first_value'] = df.iloc[0].values | |
summary['second_value'] = df.iloc[1].values | |
summary['final_value'] = df.iloc[len(df)-1].values | |
else: | |
summary = "Not enough data to analyse" | |
return summary |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment