Skip to content

Instantly share code, notes, and snippets.

@nickinack
Last active November 22, 2020 06:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nickinack/d4d6bf75ceaf2b12a886567a9351d360 to your computer and use it in GitHub Desktop.
Save nickinack/d4d6bf75ceaf2b12a886567a9351d360 to your computer and use it in GitHub Desktop.
def drop_columns(columns , df):
'''
Given dataframe , returns updated df with removed colums
'''
for i in columns:
df = df.drop(i , axis=1)
return df
drop_list = ['state' , 'county' , 'community' , 'communityname' , 'fold' ]
df1 = drop_columns(drop_list , df1)
def find_missing_values(df):
'''
Gets the missing value indices
'''
missing_values = []
missing_values_each_row = []
for i in range(df.shape[0]):
if "?" in list(df.iloc[i].values):
missing_values.append(i)
for column in df.columns:
count = df[df[column] == "?"].shape[0]
missing_values_each_row.append((column , count))
for column,count in missing_values_each_row:
if count == 1675:
df = df.drop(column , axis=1)
df = df[df["OtherPerCap"] != "?"]
return df
df1 = find_missing_values(df1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment