Skip to content

Instantly share code, notes, and snippets.

@strefethen
Last active May 28, 2016 06:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save strefethen/180efcc1ecda6a02b1351418e95d0a29 to your computer and use it in GitHub Desktop.
Save strefethen/180efcc1ecda6a02b1351418e95d0a29 to your computer and use it in GitHub Desktop.
Plotting Localytics Retention data using R and Shiny
{
"results": [
{
"birth_week": "2014-09-08",
"users": 1,
"week": "2014-12-29"
},
{
"birth_week": "2014-09-29",
"users": 1640,
"week": "2014-12-29"
},
{
"birth_week": "2014-10-06",
"users": 2988,
"week": "2014-12-29"
},
{
"birth_week": "2014-10-13",
"users": 4747,
"week": "2014-12-29"
},
{
"birth_week": "2014-10-20",
"users": 2443,
"week": "2014-12-29"
},
{
"birth_week": "2014-10-27",
"users": 1972,
"week": "2014-12-29"
},
{
"birth_week": "2014-11-03",
"users": 2133,
"week": "2014-12-29"
},
{
"birth_week": "2014-11-10",
"users": 2038,
"week": "2014-12-29"
},
{
"birth_week": "2014-11-17",
"users": 2038,
"week": "2014-12-29"
},
{
"birth_week": "2014-11-24",
"users": 2490,
"week": "2014-12-29"
},
{
"birth_week": "2014-12-01",
"users": 2061,
"week": "2014-12-29"
},
{
"birth_week": "2014-12-08",
"users": 2050,
"week": "2014-12-29"
},
{
"birth_week": "2014-12-15",
"users": 2092,
"week": "2014-12-29"
},
{
"birth_week": "2014-12-22",
"users": 2676,
"week": "2014-12-29"
},
{
"birth_week": "2014-12-29",
"users": 7187,
"week": "2014-12-29"
},
{
"birth_week": "2014-09-08",
"users": 1,
"week": "2015-01-05"
},
{
"birth_week": "2014-09-29",
"users": 1891,
"week": "2015-01-05"
},
{
"birth_week": "2014-10-06",
"users": 3216,
"week": "2015-01-05"
},
{
"birth_week": "2014-10-13",
"users": 5088,
"week": "2015-01-05"
},
{
"birth_week": "2014-10-20",
"users": 2628,
"week": "2015-01-05"
},
{
"birth_week": "2014-10-27",
"users": 2271,
"week": "2015-01-05"
},
{
"birth_week": "2014-11-03",
"users": 2292,
"week": "2015-01-05"
},
{
"birth_week": "2014-11-10",
"users": 2229,
"week": "2015-01-05"
},
{
"birth_week": "2014-11-17",
"users": 2221,
"week": "2015-01-05"
},
{
"birth_week": "2014-11-24",
"users": 2664,
"week": "2015-01-05"
},
{
"birth_week": "2014-12-01",
"users": 2183,
"week": "2015-01-05"
},
{
"birth_week": "2014-12-08",
"users": 2275,
"week": "2015-01-05"
},
{
"birth_week": "2014-12-15",
"users": 2234,
"week": "2015-01-05"
},
{
"birth_week": "2014-12-22",
"users": 2752,
"week": "2015-01-05"
},
{
"birth_week": "2014-12-29",
"users": 4558,
"week": "2015-01-05"
},
{
"birth_week": "2015-01-05",
"users": 5066,
"week": "2015-01-05"
}
],
"app_id": [
"91e6f7bf1c6004b029c3082-602f5774-ae2b-11e2-0c2c-004a77f8b47f"
],
"comment": null,
"created_at": "2015-09-08T14:31:00Z",
"expires_at": "2015-09-15T16:05:50Z",
"truncated": false,
"query": {
"metrics": [
"users"
],
"dimensions": [
"week",
"birth_week"
],
"conditions": {
"day": [
"between",
"2015-01-01",
"2015-01-08"
]
},
"app_id": [
"91e6f7bf1c6004b029c3082-602f5774-ae2b-11e2-0c2c-004a77f8b47f"
],
"order": [
"+week",
"+birth_week",
"-users"
],
"limit": 50000,
"translate": {
},
"comment": null,
"sampling_ratio": 1.0
},
"_links": {
"self": {
"href": "/v1/query?&app_id%5B%5D=91e6f7bf1c6004b029c3082-602f5774-ae2b-11e2-0c2c-004a77f8b47f&conditions%5Bday%5D%5B%5D=between&conditions%5Bday%5D%5B%5D=2015-01-01&conditions%5Bday%5D%5B%5D=2015-01-08&dimensions%5B%5D=week&dimensions%5B%5D=birth_week&limit=50000&metrics%5B%5D=users&order%5B%5D=%2Bweek&order%5B%5D=%2Bbirth_week&order%5B%5D=-users&sampling_ratio=1.0"
},
"app": {
"href": "/v1/apps/91e6f7bf1c6004b029c3082-602f5774-ae2b-11e2-0c2c-004a77f8b47f"
},
"root": {
"href": "/v1"
}
}
}
library(shiny)
library(RCurl)
library(jsonlite)
library(reshape2)
library(ggplot2)
# Background: http://www.stevetrefethen.com/blog/fetching-localytics-api-data-in-r-and-working-with-the-json-result
# PROPS: This blog post was a big help: http://analyzecore.com/2014/07/03/cohort-analysis-in-r-retention-charts/
# PROPS: http://stackoverflow.com/questions/24756543/r-api-connection-localytics-with-geturl-and-rcurl-error
retentionDF <- function() {
# Example data from: http://docs.localytics.com/dev/query-api.html#query-api-example-users-by-week-and-birth_week
localyticsExampleJSON <- getURL('https://gist.githubusercontent.com/strefethen/180efcc1ecda6a02b1351418e95d0a29/raw/1ad93c22488e48b5e62b017dc5428765c5c3ba0f/localyticsexampledata.json')
cohort <- fromJSON(localyticsExampleJSON)
weeks <- unique(cohort$results$week)
numweeks <- length(weeks)
# Take the JSON response and convert it to a retention matrix (all numeric for easy conversion to a dataframe) like so:
# Weekly.Cohort Users Week.1
# 1 2014-12-29 7187 4558
# 2 2015-01-05 5066 NA
i <- 1
# Create a matrix big enough to hold all of the data
m <- matrix(nrow=numweeks, ncol=numweeks + 1)
for (week in weeks) {
# Get data for all weeks of this cohort
d <- cohort$results[cohort$results$birth_week==week,][,2]
lencohort <- length(d)
for (n in 1:lencohort) {
# Skip the first column using "+ 1" below which will be Weekly.Cohort (date)
m[i,n + 1] <- d[n]
}
i <- i + 1
}
# Convert matrix to a dataframe
df <- as.data.frame(m)
# Set values of the first column to the cohort dates
df$V1 <- weeks
# Set the column names accordingly
colnames(df) <- c("Weekly.Cohort", "Users", paste0("Week.", rep(1:(numweeks-1))))
return(df)
}
# Calculate percentages based on # of users for each week
retenCalc <- function(reten.data, cohort.clients.r) {
#calculate retention (1)
x <- reten.data[,c(2:length(reten.data))]
y <- reten.data[,2]
reten.r <- apply(x, 2, function(x) x/y )
return(data.frame(cohort=(cohort.clients.r$Weekly.Cohort), reten.r))
}
cohortdf <- function(reten.data) {
cohort.clients.r <- reten.data #create new data frame
totcols <- ncol(cohort.clients.r) #count number of columns in data set
for (i in 1:nrow(cohort.clients.r)) { #for loop for shifting each row
df <- cohort.clients.r[i,] #select row from data frame
#df <- df[ , !df[]==0] #remove columns with zeros
partcols <- ncol(df) #count number of columns in row (w/o zeros)
#fill columns after values by zeros
if (partcols < totcols) df[, c((partcols+1):totcols)] <- 0
cohort.clients.r[i,] <- df #replace initial row by new one
}
return(cohort.clients.r)
}
fetchRetentionTable <- function() {
data <- retentionDF()
cohort.clients.r.data <- cohortdf(data)
reten.r.data <- retenCalc(data, cohort.clients.r.data)
# reten.r.data <- reten.r.data[,-2] #remove first column of data because it is always 100%
return(reten.r.data)
}
shinyServer(function(input, output) {
output$table <- DT::renderDataTable(
retentionDF()
)
output$percentTable <- DT::renderDataTable(
# skip the users column here since it's always 1 (or 100%)
# In a real world example you would need to allow for N columns.
datatable(fetchRetentionTable()[,c(1,3)]) %>% formatPercentage("Week.1")
)
output$retentionPlot <- renderPlot({
withProgress(message="Bulding Chart", {
cohort.chart1 <- melt(fetchRetentionTable(), id.vars = 'cohort')
colnames(cohort.chart1) <- c('cohort', 'week', 'retention')
p <- ggplot(cohort.chart1, aes(x=week, y=retention, group=cohort, colour=cohort))
p + geom_line(size=2, alpha=1/2) +
geom_point(size=3, alpha=1) +
geom_smooth(aes(group=1), method = 'loess', size=2, colour='red', se=FALSE) +
labs(title="Weekly Cohort Retention")
})
})
})
library(shiny)
library(DT)
shinyUI(fluidPage(
# Application title
titlePanel("Localytics Retention Chart"),
# Show a plot of the generated distribution
mainPanel(width=12,
plotOutput("retentionPlot"),
fluidRow(
column(width=6,
DT::dataTableOutput("table")
),
column(width=6,
DT::dataTableOutput("percentTable")
)
)
)
)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment