Skip to content

Instantly share code, notes, and snippets.

@dirkschumacher
Last active July 29, 2016 09:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dirkschumacher/b203d8fd4625707464226a5cc641fed6 to your computer and use it in GitHub Desktop.
Save dirkschumacher/b203d8fd4625707464226a5cc641fed6 to your computer and use it in GitHub Desktop.
Query BVG billing data from gmail with R
# Use this code if you use the official BVG Berlin app
# I only tested it with credit card billing
# The BVG sends you an email for every purchase you make.
# The following code tries to find all those mails and analyzes them.
library(gmailr)
library(purrr)
library(stringr)
library(lubridate)
library(ggplot2)
library(dplyr)
library(broom)
# auth gmail
gmail_auth('client_id.json')
# get all bvg messages
all_messages <- gmailr::messages("bvg-onlineshop bestellbestätigung")
bvg_raw_messages <- all_messages[[1]]$messages %>% map(~ gmailr::message(.x$id)) %>%
map(~rawToChar(base64enc::base64decode(.x$payload$body$data)))
# extract price and time
price <- str_match_all(bvg_raw_messages, "Abbuchungsbetrag: € ([0-9]+,[0-9]+)")
time <- str_match_all(bvg_raw_messages, "Bestellung vom ([0-9]{1,2}.[0-9]{1,2}.[0-9]{4}) ([0-9]{1,2}:[0-9]{1,2}) Uhr")
# join data into a data.frame
result <- map2(price, time, function(p, t) {
data.frame(datetime = parse_date_time(paste0(t[, 2], " ", t[, 3]), "d.m.Y H:M"), price = as.numeric(str_replace(p[, 2], ",", ".")))
}) %>% bind_rows %>%
arrange(datetime) %>%
mutate(cumprice = cumsum(price))
# cumulative price over time
ggplot(data = result, aes(x = datetime, y = cumprice)) +
geom_line()
# fill months without records
dt_seq <- seq(min(result$datetime), Sys.time(), by = "1 month")
empty_records <- data.frame(datetime = dt_seq, price = NA_real_)
# data by month
# months with no data filled with 0/NA
data_by_month <- bind_rows(result, empty_records) %>%
mutate(year_month = paste0(year(datetime), "_", str_pad(month(datetime), 2, pad = "0"))) %>%
group_by(year_month) %>%
summarise(price = sum(price, na.rm = TRUE), trips = n() - 1) %>%
mutate(trips = ifelse(trips == 0, NA_integer_, trips))
arrange(year_month)
# plot price by month and the number of trips as dots.
ggplot(data = data_by_month, aes(x = year_month, y = price)) +
geom_bar(stat = "identity") +
geom_point(aes(y = trips), color = "red") +
xlab("Year_Month") +
ylab("Money spent as bars / #trips as dots") +
ggtitle("Berlin BVG spending")
# LICENSE
# This is free and unencumbered software released into the public domain.
#
# Anyone is free to copy, modify, publish, use, compile, sell, or
# distribute this software, either in source code form or as a compiled
# binary, for any purpose, commercial or non-commercial, and by any
# means.
#
# In jurisdictions that recognize copyright laws, the author or authors
# of this software dedicate any and all copyright interest in the
# software to the public domain. We make this dedication for the benefit
# of the public at large and to the detriment of our heirs and
# successors. We intend this dedication to be an overt act of
# relinquishment in perpetuity of all present and future rights to this
# software under copyright law.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
# For more information, please refer to <http://unlicense.org>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment