Created
June 12, 2020 12:14
-
-
Save kaiwei/f6dc672fce24d25975b5fe6270176dbf to your computer and use it in GitHub Desktop.
Standard Chartered Bank Singapore Credit Card CSV download cleaner
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import sys | |
# Create a Dataframe from CSV | |
df = pd.read_csv(sys.argv[1], thousands = ',', delimiter = ',', skiprows = 4, low_memory = False, keep_default_na = False) | |
# Cleaning: (1) Drop "Unposted" (2) Remove nonsense entries at the bottom (3) Remove "SG" from all Desc | |
df.Date = df.Date.str.strip() | |
df = df[~df.DESCRIPTION.str.contains("UNPOSTED")] | |
df['DESCRIPTION'] = df['DESCRIPTION'].str.replace('Singapore SG', '', case = False).str.strip() | |
df = df[:-6] | |
# Change CR & DR to positive or negative numbers and dtype to float | |
df['sign'] = np.where(df['SGD Amount'].str.contains("CR", na = False), '', '-') | |
df['figure'] = df['SGD Amount'].str.extract('(\d*\.\d+|\d+)') | |
df['num'] = (df.sign + df.figure) | |
# Spilt Foreign Currency Amount into different columns because I don't know how to swap values in a string | |
new = df["Foreign Currency Amount"].str.split(" ", n = 1, expand = True) | |
df["ccy"]= new[0] | |
df["amt"]= new[1] | |
# Add “@@” syntax to specify the total cost in foreign currency | |
df['cost_flag'] = np.where(df['amt'].isnull(), '', ' @@') | |
# All together now, creating tag for icsv2ledger | |
df["FCY"] = df["cost_flag"] + " " + df["amt"] + " " + df["ccy"] | |
# Write to file | |
df.to_csv('clean_' + sys.argv[1], index = False, float_format = '%.2f', encoding = 'utf-8') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment