Skip to content

Instantly share code, notes, and snippets.

@jwinternheimer
Created November 29, 2017 17:40
Show Gist options
  • Save jwinternheimer/e581d0768d8c011bd27782a39f35d0c3 to your computer and use it in GitHub Desktop.
Save jwinternheimer/e581d0768d8c011bd27782a39f35d0c3 to your computer and use it in GitHub Desktop.
Reads and transforms Google's subscription reports
# load libraries
library(dplyr)
library(buffer)
library(data.table)
# set working directory
setwd("/Users/julianwinternheimer/Downloads/subscriptions")
# list files
filenames <- list.files(pattern = "*country.csv")
# read and merge data
data <- do.call(rbind, lapply(filenames, read.csv, fileEncoding = "UTF-16LE"))
# set solumn names
colnames(data) <- safe_names(colnames(data))
# set dates
data$date <- as.Date(data$date, format = "%Y-%m-%d")
# set prices
data <- data %>%
mutate(mrr_value = ifelse(product_id == 'org.buffer.android.iap.awesome.monthly', 10 * active_subscriptions,
ifelse(product_id == 'org.buffer.android.iap.awesome.yearly', 8.5 * active_subscriptions,
ifelse(product_id == 'org.buffer.android.iap.smallbusiness.monthly', 99 * active_subscriptions, NA))))
# group by date and product_id
grouped <- data %>%
group_by(date, package_name, product_id) %>%
summarise(new_subscriptions = sum(new_subscriptions),
cancelled_subscriptions = sum(cancelled_subscriptions),
active_subscriptions = sum(active_subscriptions),
mrr_value = sum(mrr_value))
##################################
## write to redshift
##################################
# create an empty Redshift table
create_empty_table <- function(con, tn, df) {
# Build SQL query
sql <- paste0("create table \"", tn,"\" (",paste0(collapse=',','"',names(df),'" ',sapply(df[0,],postgresqlDataType)),");");
# Execute query
dbSendQuery(con,sql)
invisible()
}
# fill the empty redshift table
insertBatch <- function(con, tn, df, size = 100L) {
cnt <- (nrow(df)-1L)%/%size+1L
for (i in seq(0L,len=cnt)) {
sql <- paste0("insert into \"",tn,"\" values (",do.call(paste,c(sep=',',collapse='),(',lapply(df[seq(i*size+1L,min(nrow(df),(i+1L)*size)),],shQuote))),");");
dbSendQuery(con,sql);
}
}
# write the results to a table in Reshift
write_to_redshift <- function(df, tn) {
print("Writing to Redshift...")
# connect to Redshift
con <- redshift_connect()
# delete existing table
print("Dropping old table...")
delete_query <- paste("drop table", tn)
query_db(delete_query, con)
# insert new forecast table
print("Creating empty table...")
create_empty_table(con, tn, df)
print("Inserting data...")
insertBatch(con, tn, df)
print("Bloop! Done!")
}
write_to_redshift(grouped, "android_subscription_report")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment