Last active
November 16, 2021 19:45
-
-
Save ahmedshahriar/30225c6df46529941f6a7b02f98886bc to your computer and use it in GitHub Desktop.
A simple function to check for missing values and percentage
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# data - pandas dataframe | |
def missing_value_describe(data): | |
# check missing values in the data | |
total = data.isna().sum().sort_values(ascending=False) | |
missing_value_pct_stats = (data.isnull().sum() / len(data)*100) | |
missing_value_col_count = sum(missing_value_pct_stats > 0) | |
# missing_value_stats = missing_value_pct_stats.sort_values(ascending=False)[:missing_value_col_count] | |
missing_data = pd.concat([total, missing_value_pct_stats], axis=1, keys=['Total', 'Percent']) | |
print("Number of rows with at least 1 missing values:", data.isna().any(axis = 1).sum()) | |
print("Number of columns with missing values:", missing_value_col_count) | |
if missing_value_col_count != 0: | |
# print out column names with missing value percentage | |
print("\nMissing percentage (desceding):") | |
display(missing_data[:missing_value_col_count]) | |
# plot missing values | |
missing = data.isnull().sum() | |
missing = missing[missing > 0] | |
missing.sort_values(inplace=True) | |
missing.plot.bar() | |
else: | |
print("No missing data!!!") | |
# pass a dataframe to the function | |
missing_value_describe(df) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Removes Data Duplicates while Retaining the First one | |
def remove_duplicate(data): | |
data.drop_duplicates(keep="first", inplace=True) | |
return "Checked Duplicates" | |
# Removes Duplicates from train data | |
remove_duplicate(train) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment