Skip to content

Instantly share code, notes, and snippets.

@jiobu1
Created March 3, 2021 16:11
Show Gist options
  • Save jiobu1/6eab4270afd8e09cd234008c9839fb78 to your computer and use it in GitHub Desktop.
Save jiobu1/6eab4270afd8e09cd234008c9839fb78 to your computer and use it in GitHub Desktop.
Wrangle Function
def wrangle(X):
# Wrangle all df files to merge later
# Create copy
X = X.copy()
# Need this to create states column
k = X.iloc[0][0]
# Make sure that k has no extra characters
k = re.sub('[^a-zA-Z]', '', k)
print(k)
# Create header row
header_row = 3
X.columns = X.iloc[header_row]
# Clean column names
X.columns = X.columns.str.replace("\n", " ")
X = X.rename(columns = {'State': 'City',
'Rape1': 'Rape',
'Rape2': 'Rape',
'Arson2': 'Arson',
'Arson3': 'Arson'})
# Capture the first useful row
X = X.iloc[4:]
X = X.iloc[:, 0:12]
# Delete all columns and rows that are totally null
X = X.dropna(axis=0, thresh = 4)
# Create state column
X.insert(loc=1, column = 'State', value= return_value(k))
#Remove police department and numbers from city name
X['City'] = X['City'].str.replace(' Metropolitan Police Department', '')
X['City'] = X['City'].str.replace('Metropolitan ', '')
X['City'] = X['City'].str.replace(' Regional Police Department', '')
X['City'] = X['City'].str.replace('Metro Police Authority of ', '')
X['City'] = X['City'].str.replace(' Police Department', '')
X['City'] = X['City'].str.replace(' Sheriff', '')
X['City'] = X['City'].str.replace('3,', '')
X['City'] = X['City'].str.replace('\d+', '')
# Replace remaining NaNs with 0
X = X.replace(np.nan, 0)
return X
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment