Skip to content

Instantly share code, notes, and snippets.

@chelsyx
Created June 22, 2016 02:43
Show Gist options
  • Save chelsyx/22c1d65ac7538e84849da9d3e8b5df6b to your computer and use it in GitHub Desktop.
Save chelsyx/22c1d65ac7538e84849da9d3e8b5df6b to your computer and use it in GitHub Desktop.
Hero Drivers in Gotham
#####
# Goal:
# 1) suggest cutoff(no. of trips), and bonus($)
# 2) Expected no of additional trips
# 3) Total expenditure of the promotion
# 4) Other metrics to pay attention to
####
library(dplyr)
library(ggplot2)
data0 <- read.csv("go_data_challenge.csv")
data0 <- data0[data0$fare>0, ]
data0$request_timestamp <- strptime(data0$request_timestamp, "%Y-%m-%d %H:%M:%S")
data0$week <- strftime(data0$request_timestamp,format="%W")
range(data0$request_timestamp) # "2015-12-31 16:00:26 PST" "2016-04-07 09:46:36 PDT"
table(data0$week)
n_driver <- length(unique(data0$driver_id)) # 2401 drivers
data0$extra_fare <- (1-1/data0$surge_multiplier)*data0$fare
data0$surge_trip <- data0$surge_multiplier > 1
trip_count <- data0[,-2] %>%
group_by(driver_id, week) %>%
summarise(n_trip=length(X), extra_rev=sum(extra_fare), extra_trip=sum(surge_trip))
trip_count <- as.data.frame(trip_count)
# Delete week00, 52, 14. reason: 1)holiday 2)not a 7-day week
week_mask <- trip_count$week %in% c("00","52","14")
trip_count1 <- trip_count[!week_mask,]
plot1 <- qplot(trip_count1$n_trip, xlab="Number of Trips per Week") # highly skewed to zero, long tail
plot2 <- qplot(trip_count1$extra_rev, trip_count1$extra_trip, xlab="Extra Revenue", ylab="Number of Surge Trips(Extra Trips)")
##########
uber_perc <- 0.25
xxmask <- trip_count1$extra_rev > 0
yy <- trip_count1$extra_trip[xxmask]
xx <- (1-uber_perc)*trip_count1$extra_rev[xxmask]
fit_lm <- lm(yy~xx)
summary(fit_lm)
bonus <- 100
extra_trips <- coef(fit_lm)[1] + coef(fit_lm)[2]*bonus
driver_count <- trip_count1 %>%
group_by(week) %>%
summarise(n_driver=n())
# num of driver is increasing
avg_n_driver <- mean(driver_count$n_driver)
##########
uber_perc <- 0.25
for (cutoff in ceiling(extra_trips):max(trip_count1$n_trip)){
mot_perc <- sum(trip_count1$n_trip < cutoff)/dim(trip_count1)[1]
add_trip <- extra_trips*avg_n_driver*mot_perc
total_extra_rev <- mean(data0$fare)*add_trip*uber_perc
driver_mask <- trip_count1$n_trip > (cutoff-extra_trips)
driver_perc <- sum(driver_mask)/dim(trip_count1)[1]
to_expense <- avg_n_driver*driver_perc*bonus
if(to_expense <= total_extra_rev){
least_cutoff <- cutoff
break
}
}
##########
uber_perc <- 0.25
bonus_list <- c(25,50,75,100,125,150,175,200,225,250)
results <- data.frame(Bonus=bonus_list, Least_Cutoff=rep(NA, length(bonus_list)),
Add_Trips=rep(NA, length(bonus_list)), Expenditure=rep(NA, length(bonus_list)) )
for (i in 1:length(bonus_list)){
bonus <- bonus_list[i]
extra_trips <- coef(fit_lm)[1] + coef(fit_lm)[2]*bonus
for (cutoff in ceiling(extra_trips):max(trip_count1$n_trip)){
mot_perc <- sum(trip_count1$n_trip < cutoff)/dim(trip_count1)[1]
add_trip <- extra_trips*avg_n_driver*mot_perc
total_extra_rev <- mean(data0$fare)*add_trip*uber_perc
driver_mask <- trip_count1$n_trip > (cutoff-extra_trips)
driver_perc <- sum(driver_mask)/dim(trip_count1)[1]
to_expense <- avg_n_driver*driver_perc*bonus
if(to_expense <= total_extra_rev){
results[i, "Least_Cutoff"] <- cutoff
results[i, "Add_Trips"] <- add_trip
results[i, "Expenditure"] <- to_expense
break
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment