Skip to content

Instantly share code, notes, and snippets.

@bibscy
Created June 1, 2020 13:27
Show Gist options
  • Save bibscy/4f5c52e878fff38989fccfafe6d85435 to your computer and use it in GitHub Desktop.
Save bibscy/4f5c52e878fff38989fccfafe6d85435 to your computer and use it in GitHub Desktop.
import csv
import pandas as pd
import numpy as np
import re
import string
myDataFrame = pd.read_csv('Loan_payments_data_2020_unclean.csv')
#convert assign the columns to a dictionary
columnsDict = {"columnName": list(myDataFrame.columns)}
columnsDataFrame = pd.DataFrame(columnsDict)
#apply regex to capitalize first char and any other that has '_' prepended
replacedColumns = columnsDataFrame['columnName'].str.replace(r'(?<![^_]).', lambda x: x.group().upper())
#assign the formatted columns to the columns property of myDataDrame
myDataFrame.columns = list(replacedColumns)
# =============================================================================
# genderList = myDataFrame.loc[:,"Gender"]
# =============================================================================
#clean data
myDataFrame['Gender'] = myDataFrame['Gender'].replace('f', 'Female')
myDataFrame['Gender'] = myDataFrame['Gender'].replace('m', 'Male')
myDataFrame['Gender'] = myDataFrame['Gender'].replace('male', 'Male')
myDataFrame['Gender'] = myDataFrame['Gender'].replace('female', 'Female')
#myDataFrame['Gender'] = myDataFrame['Gender'].str.replace('', 'NaN')
myDataFrame['Gender'].fillna('NaN')
myDataFrame.to_csv('new_Paymets_Loan.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment