chelsyx/hero_driver.R

## hero_driver.R
#####
# Goal:
# 1) suggest cutoff(no. of trips), and bonus($)
# 2) Expected no of additional trips
# 3) Total expenditure of the promotion
# 4) Other metrics to pay attention to
####

library(dplyr)
library(ggplot2)

data0 <- read.csv("go_data_challenge.csv")
data0 <- data0[data0$fare>0, ]
data0$request_timestamp <- strptime(data0$request_timestamp, "%Y-%m-%d %H:%M:%S")
data0$week <- strftime(data0$request_timestamp,format="%W")

range(data0$request_timestamp) # "2015-12-31 16:00:26 PST" "2016-04-07 09:46:36 PDT"
table(data0$week)
n_driver <- length(unique(data0$driver_id)) # 2401 drivers

data0$extra_fare <- (1-1/data0$surge_multiplier)*data0$fare
data0$surge_trip <- data0$surge_multiplier > 1

trip_count <- data0[,-2] %>%
group_by(driver_id, week) %>%
summarise(n_trip=length(X), extra_rev=sum(extra_fare), extra_trip=sum(surge_trip))
trip_count <- as.data.frame(trip_count)

# Delete week00, 52, 14. reason: 1)holiday 2)not a 7-day week
week_mask <- trip_count$week %in% c("00","52","14")
trip_count1 <- trip_count[!week_mask,]
plot1 <- qplot(trip_count1$n_trip, xlab="Number of Trips per Week") # highly skewed to zero, long tail
plot2 <- qplot(trip_count1$extra_rev, trip_count1$extra_trip, xlab="Extra Revenue", ylab="Number of Surge Trips(Extra Trips)")

##########

uber_perc <- 0.25

xxmask <- trip_count1$extra_rev > 0
yy <- trip_count1$extra_trip[xxmask]
xx <- (1-uber_perc)*trip_count1$extra_rev[xxmask]
fit_lm <- lm(yy~xx)
summary(fit_lm)


bonus <- 100
extra_trips <- coef(fit_lm)[1] + coef(fit_lm)[2]*bonus

driver_count <- trip_count1 %>%
group_by(week) %>%
summarise(n_driver=n())
# num of driver is increasing
avg_n_driver <- mean(driver_count$n_driver)

##########

uber_perc <- 0.25

for (cutoff in ceiling(extra_trips):max(trip_count1$n_trip)){
	mot_perc <- sum(trip_count1$n_trip < cutoff)/dim(trip_count1)[1]
	add_trip <- extra_trips*avg_n_driver*mot_perc
	total_extra_rev <- mean(data0$fare)*add_trip*uber_perc

	driver_mask <- trip_count1$n_trip > (cutoff-extra_trips)
	driver_perc <- sum(driver_mask)/dim(trip_count1)[1]

	to_expense <- avg_n_driver*driver_perc*bonus
	if(to_expense <= total_extra_rev){
		least_cutoff <- cutoff
		break
	}
}


##########

uber_perc <- 0.25
bonus_list <- c(25,50,75,100,125,150,175,200,225,250)

results <- data.frame(Bonus=bonus_list, Least_Cutoff=rep(NA, length(bonus_list)),
Add_Trips=rep(NA, length(bonus_list)), Expenditure=rep(NA, length(bonus_list)) )

for (i in 1:length(bonus_list)){
	bonus <- bonus_list[i]
	extra_trips <- coef(fit_lm)[1] + coef(fit_lm)[2]*bonus

	for (cutoff in ceiling(extra_trips):max(trip_count1$n_trip)){

		mot_perc <- sum(trip_count1$n_trip < cutoff)/dim(trip_count1)[1]
		add_trip <- extra_trips*avg_n_driver*mot_perc
		total_extra_rev <- mean(data0$fare)*add_trip*uber_perc

		driver_mask <- trip_count1$n_trip > (cutoff-extra_trips)
		driver_perc <- sum(driver_mask)/dim(trip_count1)[1]

		to_expense <- avg_n_driver*driver_perc*bonus
		if(to_expense <= total_extra_rev){
			results[i, "Least_Cutoff"] <- cutoff
			results[i, "Add_Trips"] <- add_trip
			results[i, "Expenditure"] <- to_expense
			break
		}
	}

}
	#####
	# Goal:
	# 1) suggest cutoff(no. of trips), and bonus($)
	# 2) Expected no of additional trips
	# 3) Total expenditure of the promotion
	# 4) Other metrics to pay attention to
	####

	library(dplyr)
	library(ggplot2)

	data0 <- read.csv("go_data_challenge.csv")
	data0 <- data0[data0$fare>0, ]
	data0$request_timestamp <- strptime(data0$request_timestamp, "%Y-%m-%d %H:%M:%S")
	data0$week <- strftime(data0$request_timestamp,format="%W")

	range(data0$request_timestamp) # "2015-12-31 16:00:26 PST" "2016-04-07 09:46:36 PDT"
	table(data0$week)
	n_driver <- length(unique(data0$driver_id)) # 2401 drivers

	data0$extra_fare <- (1-1/data0$surge_multiplier)*data0$fare
	data0$surge_trip <- data0$surge_multiplier > 1

	trip_count <- data0[,-2] %>%
	group_by(driver_id, week) %>%
	summarise(n_trip=length(X), extra_rev=sum(extra_fare), extra_trip=sum(surge_trip))
	trip_count <- as.data.frame(trip_count)

	# Delete week00, 52, 14. reason: 1)holiday 2)not a 7-day week
	week_mask <- trip_count$week %in% c("00","52","14")
	trip_count1 <- trip_count[!week_mask,]
	plot1 <- qplot(trip_count1$n_trip, xlab="Number of Trips per Week") # highly skewed to zero, long tail
	plot2 <- qplot(trip_count1$extra_rev, trip_count1$extra_trip, xlab="Extra Revenue", ylab="Number of Surge Trips(Extra Trips)")

	##########

	uber_perc <- 0.25

	xxmask <- trip_count1$extra_rev > 0
	yy <- trip_count1$extra_trip[xxmask]
	xx <- (1-uber_perc)*trip_count1$extra_rev[xxmask]
	fit_lm <- lm(yy~xx)
	summary(fit_lm)


	bonus <- 100
	extra_trips <- coef(fit_lm)[1] + coef(fit_lm)[2]*bonus

	driver_count <- trip_count1 %>%
	group_by(week) %>%
	summarise(n_driver=n())
	# num of driver is increasing
	avg_n_driver <- mean(driver_count$n_driver)

	##########

	uber_perc <- 0.25

	for (cutoff in ceiling(extra_trips):max(trip_count1$n_trip)){
	mot_perc <- sum(trip_count1$n_trip < cutoff)/dim(trip_count1)[1]
	add_trip <- extra_tripsavg_n_drivermot_perc
	total_extra_rev <- mean(data0$fare)add_tripuber_perc

	driver_mask <- trip_count1$n_trip > (cutoff-extra_trips)
	driver_perc <- sum(driver_mask)/dim(trip_count1)[1]

	to_expense <- avg_n_driverdriver_percbonus
	if(to_expense <= total_extra_rev){
	least_cutoff <- cutoff
	break
	}
	}


	##########

	uber_perc <- 0.25
	bonus_list <- c(25,50,75,100,125,150,175,200,225,250)

	results <- data.frame(Bonus=bonus_list, Least_Cutoff=rep(NA, length(bonus_list)),
	Add_Trips=rep(NA, length(bonus_list)), Expenditure=rep(NA, length(bonus_list)) )

	for (i in 1:length(bonus_list)){
	bonus <- bonus_list[i]
	extra_trips <- coef(fit_lm)[1] + coef(fit_lm)[2]*bonus

	for (cutoff in ceiling(extra_trips):max(trip_count1$n_trip)){

	mot_perc <- sum(trip_count1$n_trip < cutoff)/dim(trip_count1)[1]
	add_trip <- extra_tripsavg_n_drivermot_perc
	total_extra_rev <- mean(data0$fare)add_tripuber_perc

	driver_mask <- trip_count1$n_trip > (cutoff-extra_trips)
	driver_perc <- sum(driver_mask)/dim(trip_count1)[1]

	to_expense <- avg_n_driverdriver_percbonus
	if(to_expense <= total_extra_rev){
	results[i, "Least_Cutoff"] <- cutoff
	results[i, "Add_Trips"] <- add_trip
	results[i, "Expenditure"] <- to_expense
	break
	}
	}

	}