Skip to content

Instantly share code, notes, and snippets.

@MattSegal
Last active April 27, 2023 02:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save MattSegal/78eeb7584af009eacc1a7cf85db830d2 to your computer and use it in GitHub Desktop.
Save MattSegal/78eeb7584af009eacc1a7cf85db830d2 to your computer and use it in GitHub Desktop.
from typing import List
from datetime import timedelta, datetime
import streamlit as st
import pandas as pd
IGNORE_DESCRIPTIONS = [
"Internal Transfer",
"International Transaction Fee",
]
RENT = [
"Woodards Norther", # Rent Carlton
# Rent Rose street
"NELS RENT",
"Rose St Rent",
"DEFT RENT",
"ASM - Direct Debit",
# House moving
"FANTASTIC SERVICES", # Cleaners
"MAN WITH A VAN",
]
HEALTH_INSURANCE = [
"DOCTORS' HEALTH",
"DOCTORS HEALTH",
"CARLTON FAMILY MEDIC",
"SCHNEIER PHARMACY",
]
BILLS = [
"AUSSIE BROADBAND",
"TPG Internet PTY",
"EnergyAustralia",
"ORIGIN ENERGY",
"GWW - ",
"AMBER ELECTRIC",
]
LIQUOR = [
"BWS LIQUOR",
"DAN MURPHY'S",
]
CHILDCARE = [
"Fitzroy ELC",
]
GROCERIES = [
"WOOLWORTHS",
"COLES",
"AMAZON MARKETPLACE",
"AMAZON AU",
"SUPA IGA",
"ALDI STORES",
"IGA EXPRESS",
"IGA FITZROY",
]
SHOPPING = [
"Uniqlo Australia",
"KMART",
"OFFICEWORKS",
"CWH ",
"BUNNINGS ",
"IKEA PTY LTD",
"IKEA RICHMOND",
]
COFFEE = [
"ASSEMBLY STORE",
"D O C ESPRESSO",
"Industry Beans Fitzroy",
"INDUSTRYBEANS",
"GOOD MEASURE",
"MARKET LANE COFFEE",
]
TAKEOUT = [
"UBER* EATS",
"UBER *EATS",
"7-ELEVEN",
"YO CHI",
"GRILLD",
"PAVLOVS DUCK",
"Dominos",
"Nasi Lemak House",
"MCDONALDS",
"Tamago Sushi",
]
TRANSPORT = [
"UBER* TRIP",
"UBER* TRIP",
"UBER *TRIP",
]
CATEGORIES = {
"rent": RENT,
"groceries": GROCERIES + SHOPPING,
"non-essentials": COFFEE + TAKEOUT + LIQUOR + TRANSPORT,
"childcare": CHILDCARE,
"bills": BILLS + HEALTH_INSURANCE,
}
CSV_FILE = "data/ing-transactions-20-03-2023.csv"
"""
sample rows
Date,Description,Credit,Debit,Balance
27/04/2023,AHOY ROY FISH N CHIPS - Visa Purchase - Receipt 107092In ALBERT PARK Date 25 Apr 2023 Card xxxxx,,-58.85,2765.92
27/04/2023,COPP PARKING - Visa Purchase - Receipt 107091In ST KILDA Date 25 Apr 2023 Card xxxxx,,-11.02,2824.77
27/04/2023,D O C ESPRESSO - Visa Purchase - Receipt 107090In CARLTON Date 25 Apr 2023 Card xxxxx,,-67.08,2835.79
"""
def main():
st.header("ING banking data")
df = pd.read_csv(CSV_FILE)
df = df.rename(
columns={
"Date": "date",
"Description": "description",
"Credit": "credit",
"Debit": "debit",
"Balance": "balance",
}
)
df.date = pd.to_datetime(df.date, format="%d/%m/%Y")
df = df[df["debit"].notna()]
df = df.drop(["credit", "balance"], axis=1)
df["debit"] = -1 * df["debit"]
desc = st.text_input("Description to match")
if desc:
df_preview = df[df["description"].str.contains(desc, case=False, regex=False)]
st.write(df_preview)
df = ignore_in_description(df, IGNORE_DESCRIPTIONS)
for colname, descriptions in CATEGORIES.items():
add_column_with_matching(df, descriptions, colname)
is_display_remainder = st.checkbox("Show unclassified rows")
if is_display_remainder:
st.write(df[df["debit"].notna()])
# Debit only
agg_df = df.resample(rule="M", on="date").agg(
{"debit": "sum", **{k: "sum" for k in CATEGORIES.keys()}}
)
st.subheader("Family spending")
cols = st.multiselect(
"Categories", options=agg_df.columns, default=list(agg_df.columns)
)
st.bar_chart(agg_df[cols])
start_date = st.date_input(
"Summary start date",
min_value=df.date.min(),
max_value=df.date.max(),
value=max(df.date.min(), df.date.max() - timedelta(days=90)),
)
start_dt = datetime.combine(start_date, datetime.min.time())
summary_series = (
df[df.date >= start_dt][cols]
.agg({"debit": "sum", **{k: "sum" for k in cols}})
.round()
)
summary_df = pd.DataFrame(
{
"spend": summary_series.values,
"percent": (100 * summary_series / summary_series.sum()).round(),
}
)
time_period = (df.date.max() - start_dt).days
st.subheader(
f"Total spend over last {time_period} days ({start_dt.date()} to {df.date.max().date()})"
)
st.write(summary_df)
total = summary_series.sum()
st.write(f"Total: ${total:0.0f}")
st.subheader(f"Monthly spend over last {time_period} days")
monthly_adjustment = (365 / time_period) / 12
summary_df.spend = (summary_df.spend * monthly_adjustment).round()
st.write(summary_df)
monthly_avg = monthly_adjustment * total
st.write(f"Monthly average: ${monthly_avg:0.0f}")
def add_column_with_matching(df: pd.DataFrame, matches: List[str], colname: str):
mask = None
for desc in matches:
desc_mask = df["description"].str.contains(desc, case=False, regex=False)
if mask is not None:
mask |= desc_mask
else:
mask = desc_mask
if mask is not None:
df[colname] = None
df[colname][mask] = df[mask].debit
df["debit"][mask] = None
def ignore_in_description(df: pd.DataFrame, ignores: List[str]):
mask = None
for desc in ignores:
desc_mask = ~df["description"].str.contains(desc, case=False, regex=False)
if mask is not None:
mask &= desc_mask
else:
mask = desc_mask
if mask is not None:
return df[mask]
else:
return df
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment