Skip to content

Instantly share code, notes, and snippets.

@trestletech
Last active March 3, 2019 04:05
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save trestletech/4384914 to your computer and use it in GitHub Desktop.
Save trestletech/4384914 to your computer and use it in GitHub Desktop.
Categorize mint.com transactions. Transactions can be exported from Mint and reprocessed in R.
.Rproj.user
.Rhistory
.RData
transactions/
budgets/
#' CSV file of Mint.com categories.
cat <- read.csv(text="Auto & Transport,
,Auto Insurance
,Auto Payment
,Gas & Fuel
,Parking
,Public Transportation
,Service & Parts
,Tolls
Bills & Utilities,
,Home Phone
,Internet
,Mobile Phone
,Television
,Utilities
,Cable/Internet/Phone
Business Services,
,Advertising
,Legal
,Office Supplies
,Printing
,Shipping
Education,
,Books & Supplies
,Student Loan
,Tuition
Entertainment,
,Amusement
,Arts
,Movies & DVDs
,Music
,Newspapers & Magazines
Fees & Charges,
,ATM Fee
,Bank Fee
,Finance Charge
,Late Fee
,Service Fee
,Trade Commissions
Financial,
,Financial Advisor
,Life Insurance
Food & Dining,
,Alcohol & Bars
,Coffee Shops
,Fast Food
,Groceries
,Restaurants
Gifts & Donations,
,Charity
,Gift
,Charity & Gifts
,Tithe
Health & Fitness,
,Dentist
,Doctor
,Eyecare
,Gym
,Health Insurance
,Pharmacy
,Sports
Home,
,Furnishings
,Home Improvement
,Home Insurance
,Home Services
,Home Supplies
,Lawn & Garden
,Mortgage & Rent
Income,
,Bonus
,Interest Income
,Paycheck
,Reimbursement
,Rental Income
,Returned Purchase
Investments,
,Buy
,Deposit
,Dividend & Cap Gains
,Sell
,Withdrawal
Kids,
,Allowance
,Baby Supplies
,Babysitter & Daycare
,Child Support
,Kids Activities
,Toys
Loans,
,Loan Fees and Charges
,Loan Insurance
,Loan Interest
,Loan Payment
,Loan Principal
Personal Care,
,Hair
,Laundry
,Spa & Massage
,Jeff's Allowance
,Stacie's Allowance
Pets,
,Pet Food & Supplies
,Pet Grooming
,Veterinary
Shopping,
,Books
,Clothing
,Electronics & Software
,Hobbies
,Sporting Goods
Taxes,
,Federal Tax
,Local Tax
,Property Tax
,Sales Tax
,State Tax
Transfer,
,Credit Card Payment
,Transfer for Cash Spending
Travel,
,Air Travel
,Hotel
,Rental Car & Taxi
,Vacation
Uncategorized,
,Cash & ATM
,Check", header=FALSE)
#' Takes a two-column CSV in and converts it to a list. Assumes that the first column
#' is the first level of the list and the second column is the second level. Organizes
#' the list accordingly.
#' @param csv the data.frame described above
#' @return a list representing the two-column data.frame.
csvToList <- function(csv){
trim <- function (x) gsub("^\\s+|\\s+$", "", x)
categories <- as.list(as.character(cat[trim(cat[,1]) != "",1]))
names(categories) <- trim(as.character(cat[trim(cat[,1]) != "",1]))
categories <- sapply(categories, assign, NULL)
thisCat <- ""
for (i in 1:nrow(cat)){
if (trim(cat[i,1]) == ""){
categories[[which(names(categories) == thisCat)]] <- c(categories[[which(names(categories) == thisCat)]], trim(cat[i,2]))
} else{
thisCat <- trim(cat[i,1])
}
}
categories
}
#' Find the specified category out of a two-level list and return the parent element
#' containing the given element, whether it's in the top level or the child level.
#' @param categories a two-level list of parent-child categories
#' @param categoryName the name of the category to be selected
#' @return the parent element containing the specified category name.
findCategory <- function(categories, categoryName){
if (any(categoryName == names(categories))){
return(categories[categoryName])
} else{
#Find the category within the list and remove the suffixed ID.
catName <- gsub("[[:digit:]]+$", "", names(unlist(categories))[unlist(categories) == categoryName])
if(nchar(catName) == 0){
#string not found
return(NULL)
} else{
return(categories[catName])
}
}
}
categories <- csvToList(cat)
Version: 1.0
RestoreWorkspace: Yes
SaveWorkspace: Yes
AlwaysSaveHistory: Yes
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX
library(httr)
#login
mintLogin <- function(username, password){
if (nchar(password) > 16){
warning("It seems that Mint trims passwords such as yours to only the first 16 characters. If you're unable to login, try trimming your password to the first 16 characters.")
}
response <- POST("https://wwws.mint.com/loginUserSubmit.xevent", body=list(username=username, password=password, task="L"))
#hackish solution due to non-public API. Looks like the only difference to the request is the URL.
if (response$url == "https://wwws.mint.com/app/internalError.html"){
return(TRUE)
}
return(FALSE)
}
#download
downloadMintTransactions <- function(){
response <- GET("https://wwws.mint.com/transactionDownload.event?")
if (response$url == "https://wwws.mint.com/accessDenied.html"){
stop("It looks like you're not logged in, try using mintLogin() to authenticate before trying to download your transactions.")
}
transactions <- read.csv(text=(content(response, as="text")))
transactions
}
#logout
mintLogout <- function(){
GET("https://wwws.mint.com/login.event?task=L&messageId=5&country=US")
}
# Prototype of how to read INI files to process olfactometer data
# efg, 13 June 2007
# Thanks to Gabor Grothendieck for helpful suggestions in the R-Help
# mailing list on how to parse the INI file.
parseINI <- function(INI.filename)
{
connection <- file(INI.filename)
Lines <- readLines(connection)
close(connection)
Lines <- chartr("[]", "==", Lines) # change section headers
connection <- textConnection(Lines)
d <- read.table(connection, as.is = TRUE, sep = "=", fill = TRUE, quote="")
close(connection)
L <- d$V1 == "" # location of section breaks
d <- subset(transform(d, V3 = V2[which(L)[cumsum(L)]])[1:3],
V1 != "")
ToParse <- paste("INI.list$`", d$V3, "`$", d$V1, " <- \"",
d$V2, "\"", sep="")
INI.list <- list()
eval(parse(text=ToParse))
#replace with logical values
INI.list <- rapply(INI.list, function(x){if(x=="TRUE"){return(TRUE)}; if(x=="FALSE"){return(FALSE)}; return(x)}, how="replace")
#replace numeric values
INI.list <- rapply(INI.list, function(x){if(grepl("^[[:digit:]]+$", x)){return(as.numeric(x))}; return(x)}, how="replace")
return(INI.list)
}
library(ggplot2)
library(lubridate)
library(stringr)
library(digest)
library(plyr)
trans <- read.csv("transactions/transactions.csv", header=TRUE)
trans[trans$Transaction.Type == "debit","Amount"] <- -trans[trans$Transaction.Type == "debit","Amount"]
trans$Date <- mdy(as.character(trans$Date))
#exclude anything excluded from Mint, as it's probably old or irrelevant to the budgets
trans <- trans[trans$Category != "Exclude From Mint",]
#' Plot the monthly performance of a category.
#' @param data The transaction-level data
#' @param categoryName the name(s) of the categories
#' @param budget The amount budgeted for this category
#' @param negateAmounts whether or not to negate the amounts so that negative debits
#' will be plotted as positive numbers
plotMonthlyCategory <- function(data, categoryName, budget, negateAmounts = TRUE){
if (!missing(categoryName)){
if (typeof(categoryName) == "character"){
thisCat <- data[data$Category %in% categoryName,]
friendlyCatName <- paste(categoryName, collapse="/")
}
if (typeof(categoryName) == "list"){
catNames <- c(names(categoryName), unlist(categoryName, use.names=FALSE))
thisCat <- data[data$Category %in% catNames,]
friendlyCatName <- names(categoryName)
}
} else{
thisCat <- data
}
if (negateAmounts){
thisCat$Amount <- -thisCat$Amount
}
thisCat$Month <- year(thisCat$Date) * 12 + month(thisCat$Date)
monthText <- data.frame(Date=ymd("2001-01-01"), Amount=0, Color=0)
monthText <- monthText[-1,]
allMonths <- NULL
for (month in unique(thisCat$Month)){
thisMonth <- thisCat[thisCat$Month == month,]
thisMonth <- thisMonth[order(thisMonth$Date),]
endMonth <- thisMonth[1,"Date"]
day(endMonth) <- 1
month(endMonth) <- month(endMonth)+1
thisMonthText <- list(Date=endMonth, Amount=sum(thisMonth$Amount),Color="Under")
levels(thisMonthText$Color) <- c("Under", "Over")
if (!missing(budget) & budget < sum(thisMonth$Amount)){
thisMonthText$Color <- "Over"
}
monthText <- rbind(monthText, thisMonthText)
#rbind messes up our classes, we'll have to manually recover the date column
class(monthText$Date) <- c("POSIXct", "POSIXt")
#Add the first day of the month at 0.
thisMonth <- rbind(thisMonth[1,], thisMonth)
thisMonth$Amount[1] <- 0
day(thisMonth$Date[1]) <- 1
thisMonth$Amount <- cumsum(thisMonth$Amount)
#Add the last day of the month, if it's not the last month we have
if (month != max(unique(thisCat$Month))){
thisMonth <- rbind(thisMonth, thisMonth[nrow(thisMonth),])
day(thisMonth[nrow(thisMonth),"Date"]) <- days_in_month(month(thisMonth[nrow(thisMonth),"Date"]))
}
color <- rep(1, nrow(thisMonth))
if (!missing(budget)){
color <- ifelse(budget < thisMonth$Amount, "Over", "Under")
}
thisMonth$Budget <- color
allMonths <- rbind(allMonths, thisMonth)
}
p <- ggplot(allMonths, aes(Date, ymin=0, y=Amount, ymax=Amount)) + geom_point() + geom_ribbon() +
geom_text(data=monthText, aes(Date, Amount*1.04, label=Amount)) +
geom_hline(yintercept=0)
if (!missing(budget)){
p <- p + geom_hline(yintercept=budget, col=2)
}
if (!is.null(friendlyCatName)){
p <- p + ggtitle(friendlyCatName)
}
p
}
budg <- parseINI("budgets/singleNursing.ini")
#' Get the budget of the selected category from the vector of specified budgets.
#' @param budgetList a data structure who's top-level names correspond to the budget names
#' @param category The category or categories for which we desire to get the budgets.
#' If this is a character vector, all budgets with matching titles will be extracted.
#' If this is a list, all budgets matching any element in the list will be extracted
#' and summed before returning.
getBudget <- function(budgetList, category){
if (typeof(category) == "character"){
return(budgetList[category])
}
if (typeof(category) == "list"){
innerBudget <- budgetList[unlist(category, use.names=FALSE)]
#remove empty elements
innerBudget <- innerBudget[!is.na(names(innerBudget))]
budget <- list()
budget[[names(category)[1]]] <- innerBudget
#only process frequency if all inner budgets are on the same frequency.
if(all(sapply(budget[[1]], "[[", "frequency") == budget[[1]][[1]][["frequency"]])){
budget$amount <- sum(sapply(budget[[1]], "[[", "amount"))
budget$frequency <- budget[[1]][[1]][["frequency"]]
}
return(budget)
}
}
#' Calculate the number of months in a given frequency of budget
#' @param frequency the frequency of the budget
#' @return the number of months in the specified budget
getMonths <- function(frequency=c("month", "biannual", "annual", "biennual")){
frequency <- match.arg(frequency)
switch(frequency,
"month"=1,
"biannual"=6,
"annual"=12,
"biennual"=24)
}
computeBudget <- function (budgets, transactions, startDate=min(transactions$Date), endDate=max(transactions$Date)){
transactions$month <- (year(transactions$Date)*12) + month(transactions$Date)
if (!missing(startDate)){
minMonth <- (year(startDate) * 12) + month(startDate)
} else{
minMonth <- min(transactions$month)
}
if (!missing(endDate)){
maxMonth <- (year(endDate) * 12) + month(endDate)
} else{
maxMonth <- max(transactions$month)
}
#process each month independently
for (i in minMonth:maxMonth){
#extract the transacations that occured in this month
curMonth <- i %% 12
if(curMonth == 0){
curMonth <- 12
}
curYear <- floor((i-1)/12)
#process each transaction in its category and potentially in parent category's budget
monthTrans <- transactions[month(transactions$Date) == curMonth & year(transactions$Date) == curYear,]
#process each budget separately
for (b in names(budgets)){
bud <- budgets[[b]]
if(is.null(bud$transactions)){
bud$transactions <- data.frame(list(Date=mdy(paste(curMonth, 1, curYear, sep="-"), quiet=TRUE),
Amount = bud$amount,
Transaction = "allowance"))
} else{
#only distribute allowance or reset balance on appropriate months based on
#the budget's frequency.
if ((i - minMonth) %% getMonths(bud$frequency) == 0){
if (bud$rollover){
amt <- bud$amount + bud$transactions$Amount[nrow(bud$transactions)]
} else{
amt <- bud$amount
}
bud$transactions <- rbind(bud$transactions,
data.frame(list(Date=mdy(paste(curMonth, 1, curYear, sep="-"), quiet=TRUE),
Amount = amt,
Transaction = "allowance")))
}
}
#Find this category in the list of categories so we can determine if we
# need to include the parent category.
cats <- findCategory(categories, b)
if (names(cats) == b){
#This is the parent category
cats <- unlist(cats)
}
else {
#This is a child category
cats <- c(names(cats), b)
}
thisBudTrans <- monthTrans[monthTrans$Category %in% cats,]
#order by date
thisBudTrans <- thisBudTrans[order(thisBudTrans$Date),]
bud$transactions <- rbind(bud$transactions,
data.frame(list(Date=thisBudTrans$Date,
Amount=bud$transactions$Amount[nrow(bud$transactions)] + cumsum(thisBudTrans$Amount),
Transaction=apply(thisBudTrans, 1, digest, algo="sha256"))))
budgets[[b]] <- bud
}
}
budgets
}
#' Plot a grid of the budgets specified. Defaults to all budgets
#' @param budgets the transaction-annotate list of budgets as output by the computeBudgets() function.
#' @param budgetNames the names of the budgets to include in the grid
plotBudgets <- function(budgets, budgetNames=names(budgets)){
#get all transactions into a single data.frame
buds <- lapply(budgets, "[[", "transactions")
allTrans <- NULL
for (b in budgetNames){
thisBudget <- buds[[b]]
thisBudget$Budget <- b
allTrans <- rbind(allTrans, thisBudget)
}
ggplot(allTrans, aes(Date, Amount)) + facet_wrap(~Budget, scales="free_y") +
geom_line() +
geom_point(data=allTrans[allTrans$Amount >= 0,]) +
geom_point(data=allTrans[allTrans$Amount < 0,], color="red") +
geom_hline(aes(yintercept=0))
}
#' NOTE: Only works on monthly budgets
calculateRemainder <- function(budgets,
startMonth=year(now())*12 + month(now()),
endMonth=year(now())*12 + month(now())){
#FIXME: currently only works on monthly budgets
#grab the budgets which are reset each month -- i.e. any leftover would go to savings
saveable <- names(which(!sapply(budgets,"[[","rollover") & sapply(budgets,"[[","frequency") == "month"))
remainders <- array(dim=c(endMonth-startMonth+1,
length(saveable)),
dimnames=list(
Month=as.character(startMonth:endMonth),
Budget=saveable))
for (b in saveable){
bud <- budgets[[b]]
tranDate <- year(bud$transactions$Date)*12 + month(bud$transactions$Date)
budTran <- bud$transactions[tranDate >= startMonth & tranDate <= endMonth,]
finalInd <- c(which(budTran$Transaction == "allowance")-1, nrow(budTran))
finalBal <- budTran[finalInd,"Amount"]
remainders[,b] <- finalBal
}
remainders
}
budgets <- computeBudget(budg, trans)
remainders <- calculateRemainder(budgets, startMonth=2012*12+6, endMonth=2012*12+12)
ad <- adply(remainders, 1:2)
ggplot(ad, aes(x=factor(Month), y=V1, fill=Budget)) + geom_bar(stat="identity", position="dodge")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment