def drop_columns(columns , df): | |
''' | |
Given dataframe , returns updated df with removed colums | |
''' | |
for i in columns: | |
df = df.drop(i , axis=1) | |
return df | |
drop_list = ['state' , 'county' , 'community' , 'communityname' , 'fold' ] | |
df1 = drop_columns(drop_list , df1) | |
def find_missing_values(df): | |
''' | |
Gets the missing value indices | |
''' | |
missing_values = [] | |
missing_values_each_row = [] | |
for i in range(df.shape[0]): | |
if "?" in list(df.iloc[i].values): | |
missing_values.append(i) | |
for column in df.columns: | |
count = df[df[column] == "?"].shape[0] | |
missing_values_each_row.append((column , count)) | |
for column,count in missing_values_each_row: | |
if count == 1675: | |
df = df.drop(column , axis=1) | |
df = df[df["OtherPerCap"] != "?"] | |
return df | |
df1 = find_missing_values(df1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment