Created
June 1, 2020 13:27
-
-
Save bibscy/4f5c52e878fff38989fccfafe6d85435 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import pandas as pd | |
import numpy as np | |
import re | |
import string | |
myDataFrame = pd.read_csv('Loan_payments_data_2020_unclean.csv') | |
#convert assign the columns to a dictionary | |
columnsDict = {"columnName": list(myDataFrame.columns)} | |
columnsDataFrame = pd.DataFrame(columnsDict) | |
#apply regex to capitalize first char and any other that has '_' prepended | |
replacedColumns = columnsDataFrame['columnName'].str.replace(r'(?<![^_]).', lambda x: x.group().upper()) | |
#assign the formatted columns to the columns property of myDataDrame | |
myDataFrame.columns = list(replacedColumns) | |
# ============================================================================= | |
# genderList = myDataFrame.loc[:,"Gender"] | |
# ============================================================================= | |
#clean data | |
myDataFrame['Gender'] = myDataFrame['Gender'].replace('f', 'Female') | |
myDataFrame['Gender'] = myDataFrame['Gender'].replace('m', 'Male') | |
myDataFrame['Gender'] = myDataFrame['Gender'].replace('male', 'Male') | |
myDataFrame['Gender'] = myDataFrame['Gender'].replace('female', 'Female') | |
#myDataFrame['Gender'] = myDataFrame['Gender'].str.replace('', 'NaN') | |
myDataFrame['Gender'].fillna('NaN') | |
myDataFrame.to_csv('new_Paymets_Loan.csv') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment