Skip to content

Instantly share code, notes, and snippets.

@githoov
Last active September 1, 2016 17:22
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save githoov/820635927da0a16966fd to your computer and use it in GitHub Desktop.
Save githoov/820635927da0a16966fd to your computer and use it in GitHub Desktop.
R Script to Create a Survival Plot and to Generate a Sample Data Set
# preliminaires
library("ggplot2")
library("zoo")
set.seed(111)
# generate plot of survival curve
x <- sort(dexp(seq(0, 1, 0.01)), decreasing = TRUE)
ggplot(data.frame(x = c(0, 5)), aes(x)) + stat_function(fun = dexp, args = list(rate = 1)) + scale_x_continuous(labels=c(expression(t["0"], t["1"], t["2"], t["3"], t["4"], t["5"]))) + labs(x = "Time", y = expression(y = P(T > t["i"])), title = "Survival Function")
# simulate subscription data
# preliminaries
n_users = 10000
start_date = "2014-06-01"
# helper functions
generate_users <- function(users = NULL) {
return(seq(1, users))
}
generate_signup_dates <- function(number_of_dates = NULL, start_date = NULL, end_date = Sys.Date()) {
all_dates <- seq(as.Date(start_date), as.Date(end_date), by = "days")
signup_dates <- c()
for (i in 1:number_of_dates){
signup_dates[i] <- sample(all_dates, 1, replace = TRUE, prob = sort(1/1:length(all_dates)))
}
return(as.Date(signup_dates, origin = "1970-01-01"))
}
generate_effective_lifetime <- function(signup = NULL) {
potential_lifetime <- (as.yearmon(Sys.Date()) - as.yearmon(signup)) * 12
return(sample(0:potential_lifetime, 1))
}
generate_lifetime <- function(signup = NULL) {
potential_lifetime <- (as.yearmon(Sys.Date()) - as.yearmon(signup)) * 12
return(potential_lifetime)
}
generate_order_amounts <- function(tenure = NULL) {
monthly_fee <- rep(sample(c(25, 30, 50), 1, prob = c(0.75, 0.125, 0.125)), tenure)
return(monthly_fee)
}
generate_order_months <- function(signup_dates = NULL, tenures = NULL) {
order_months <- list()
for (i in 1:length(signup_date)) {
if(tenure[i] > 0) {
order_months[[i]] <- seq(as.Date(signup_date[i]), by = "month", length = tenure[i])
} else {
order_months[[i]] <- NULL
}
}
return(order_months)
}
# create users table
user_id <- generate_users(n_users)
signup_channel <- sample(c("google", "facebook", "bing"), size = n_users, replace = TRUE, prob = c(0.3, 0.5, 0.2))
signup_date <- sort(generate_signup_dates(number_of_dates = length(generate_users(n_users)), start_date = start_date))
# useful for other things
months_since_signup <- sapply(signup_date, generate_lifetime)
tenure <- sapply(signup_date, generate_effective_lifetime)
# create orders table
orders.subscription_amount <- unlist(lapply(tenure, generate_order_amounts))
orders.user_id <- rep(user_id, tenure)
orders.created_month <- as.Date(unlist(generate_order_months(signup_dates = signup_date, tenures = tenure)))
users <- data.frame(user_id, signup_date, signup_channel)
orders <- data.frame(orders.user_id, orders.created_month, orders.subscription_amount)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment