Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Plotting Localytics Retention data using R and Shiny
{
"results": [
{
"birth_week": "2014-09-08",
"users": 1,
"week": "2014-12-29"
},
{
"birth_week": "2014-09-29",
"users": 1640,
"week": "2014-12-29"
},
{
"birth_week": "2014-10-06",
"users": 2988,
"week": "2014-12-29"
},
{
"birth_week": "2014-10-13",
"users": 4747,
"week": "2014-12-29"
},
{
"birth_week": "2014-10-20",
"users": 2443,
"week": "2014-12-29"
},
{
"birth_week": "2014-10-27",
"users": 1972,
"week": "2014-12-29"
},
{
"birth_week": "2014-11-03",
"users": 2133,
"week": "2014-12-29"
},
{
"birth_week": "2014-11-10",
"users": 2038,
"week": "2014-12-29"
},
{
"birth_week": "2014-11-17",
"users": 2038,
"week": "2014-12-29"
},
{
"birth_week": "2014-11-24",
"users": 2490,
"week": "2014-12-29"
},
{
"birth_week": "2014-12-01",
"users": 2061,
"week": "2014-12-29"
},
{
"birth_week": "2014-12-08",
"users": 2050,
"week": "2014-12-29"
},
{
"birth_week": "2014-12-15",
"users": 2092,
"week": "2014-12-29"
},
{
"birth_week": "2014-12-22",
"users": 2676,
"week": "2014-12-29"
},
{
"birth_week": "2014-12-29",
"users": 7187,
"week": "2014-12-29"
},
{
"birth_week": "2014-09-08",
"users": 1,
"week": "2015-01-05"
},
{
"birth_week": "2014-09-29",
"users": 1891,
"week": "2015-01-05"
},
{
"birth_week": "2014-10-06",
"users": 3216,
"week": "2015-01-05"
},
{
"birth_week": "2014-10-13",
"users": 5088,
"week": "2015-01-05"
},
{
"birth_week": "2014-10-20",
"users": 2628,
"week": "2015-01-05"
},
{
"birth_week": "2014-10-27",
"users": 2271,
"week": "2015-01-05"
},
{
"birth_week": "2014-11-03",
"users": 2292,
"week": "2015-01-05"
},
{
"birth_week": "2014-11-10",
"users": 2229,
"week": "2015-01-05"
},
{
"birth_week": "2014-11-17",
"users": 2221,
"week": "2015-01-05"
},
{
"birth_week": "2014-11-24",
"users": 2664,
"week": "2015-01-05"
},
{
"birth_week": "2014-12-01",
"users": 2183,
"week": "2015-01-05"
},
{
"birth_week": "2014-12-08",
"users": 2275,
"week": "2015-01-05"
},
{
"birth_week": "2014-12-15",
"users": 2234,
"week": "2015-01-05"
},
{
"birth_week": "2014-12-22",
"users": 2752,
"week": "2015-01-05"
},
{
"birth_week": "2014-12-29",
"users": 4558,
"week": "2015-01-05"
},
{
"birth_week": "2015-01-05",
"users": 5066,
"week": "2015-01-05"
}
],
"app_id": [
"91e6f7bf1c6004b029c3082-602f5774-ae2b-11e2-0c2c-004a77f8b47f"
],
"comment": null,
"created_at": "2015-09-08T14:31:00Z",
"expires_at": "2015-09-15T16:05:50Z",
"truncated": false,
"query": {
"metrics": [
"users"
],
"dimensions": [
"week",
"birth_week"
],
"conditions": {
"day": [
"between",
"2015-01-01",
"2015-01-08"
]
},
"app_id": [
"91e6f7bf1c6004b029c3082-602f5774-ae2b-11e2-0c2c-004a77f8b47f"
],
"order": [
"+week",
"+birth_week",
"-users"
],
"limit": 50000,
"translate": {
},
"comment": null,
"sampling_ratio": 1.0
},
"_links": {
"self": {
"href": "/v1/query?&app_id%5B%5D=91e6f7bf1c6004b029c3082-602f5774-ae2b-11e2-0c2c-004a77f8b47f&conditions%5Bday%5D%5B%5D=between&conditions%5Bday%5D%5B%5D=2015-01-01&conditions%5Bday%5D%5B%5D=2015-01-08&dimensions%5B%5D=week&dimensions%5B%5D=birth_week&limit=50000&metrics%5B%5D=users&order%5B%5D=%2Bweek&order%5B%5D=%2Bbirth_week&order%5B%5D=-users&sampling_ratio=1.0"
},
"app": {
"href": "/v1/apps/91e6f7bf1c6004b029c3082-602f5774-ae2b-11e2-0c2c-004a77f8b47f"
},
"root": {
"href": "/v1"
}
}
}
library(shiny)
library(RCurl)
library(jsonlite)
library(reshape2)
library(ggplot2)
# Background: http://www.stevetrefethen.com/blog/fetching-localytics-api-data-in-r-and-working-with-the-json-result
# PROPS: This blog post was a big help: http://analyzecore.com/2014/07/03/cohort-analysis-in-r-retention-charts/
# PROPS: http://stackoverflow.com/questions/24756543/r-api-connection-localytics-with-geturl-and-rcurl-error
retentionDF <- function() {
# Example data from: http://docs.localytics.com/dev/query-api.html#query-api-example-users-by-week-and-birth_week
localyticsExampleJSON <- getURL('https://gist.githubusercontent.com/strefethen/180efcc1ecda6a02b1351418e95d0a29/raw/1ad93c22488e48b5e62b017dc5428765c5c3ba0f/localyticsexampledata.json')
cohort <- fromJSON(localyticsExampleJSON)
weeks <- unique(cohort$results$week)
numweeks <- length(weeks)
# Take the JSON response and convert it to a retention matrix (all numeric for easy conversion to a dataframe) like so:
# Weekly.Cohort Users Week.1
# 1 2014-12-29 7187 4558
# 2 2015-01-05 5066 NA
i <- 1
# Create a matrix big enough to hold all of the data
m <- matrix(nrow=numweeks, ncol=numweeks + 1)
for (week in weeks) {
# Get data for all weeks of this cohort
d <- cohort$results[cohort$results$birth_week==week,][,2]
lencohort <- length(d)
for (n in 1:lencohort) {
# Skip the first column using "+ 1" below which will be Weekly.Cohort (date)
m[i,n + 1] <- d[n]
}
i <- i + 1
}
# Convert matrix to a dataframe
df <- as.data.frame(m)
# Set values of the first column to the cohort dates
df$V1 <- weeks
# Set the column names accordingly
colnames(df) <- c("Weekly.Cohort", "Users", paste0("Week.", rep(1:(numweeks-1))))
return(df)
}
# Calculate percentages based on # of users for each week
retenCalc <- function(reten.data, cohort.clients.r) {
#calculate retention (1)
x <- reten.data[,c(2:length(reten.data))]
y <- reten.data[,2]
reten.r <- apply(x, 2, function(x) x/y )
return(data.frame(cohort=(cohort.clients.r$Weekly.Cohort), reten.r))
}
cohortdf <- function(reten.data) {
cohort.clients.r <- reten.data #create new data frame
totcols <- ncol(cohort.clients.r) #count number of columns in data set
for (i in 1:nrow(cohort.clients.r)) { #for loop for shifting each row
df <- cohort.clients.r[i,] #select row from data frame
#df <- df[ , !df[]==0] #remove columns with zeros
partcols <- ncol(df) #count number of columns in row (w/o zeros)
#fill columns after values by zeros
if (partcols < totcols) df[, c((partcols+1):totcols)] <- 0
cohort.clients.r[i,] <- df #replace initial row by new one
}
return(cohort.clients.r)
}
fetchRetentionTable <- function() {
data <- retentionDF()
cohort.clients.r.data <- cohortdf(data)
reten.r.data <- retenCalc(data, cohort.clients.r.data)
# reten.r.data <- reten.r.data[,-2] #remove first column of data because it is always 100%
return(reten.r.data)
}
shinyServer(function(input, output) {
output$table <- DT::renderDataTable(
retentionDF()
)
output$percentTable <- DT::renderDataTable(
# skip the users column here since it's always 1 (or 100%)
# In a real world example you would need to allow for N columns.
datatable(fetchRetentionTable()[,c(1,3)]) %>% formatPercentage("Week.1")
)
output$retentionPlot <- renderPlot({
withProgress(message="Bulding Chart", {
cohort.chart1 <- melt(fetchRetentionTable(), id.vars = 'cohort')
colnames(cohort.chart1) <- c('cohort', 'week', 'retention')
p <- ggplot(cohort.chart1, aes(x=week, y=retention, group=cohort, colour=cohort))
p + geom_line(size=2, alpha=1/2) +
geom_point(size=3, alpha=1) +
geom_smooth(aes(group=1), method = 'loess', size=2, colour='red', se=FALSE) +
labs(title="Weekly Cohort Retention")
})
})
})
library(shiny)
library(DT)
shinyUI(fluidPage(
# Application title
titlePanel("Localytics Retention Chart"),
# Show a plot of the generated distribution
mainPanel(width=12,
plotOutput("retentionPlot"),
fluidRow(
column(width=6,
DT::dataTableOutput("table")
),
column(width=6,
DT::dataTableOutput("percentTable")
)
)
)
)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.