Skip to content

Instantly share code, notes, and snippets.

@marcovivero
Last active January 21, 2016 14:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save marcovivero/0968807923b70c79862e to your computer and use it in GitHub Desktop.
Save marcovivero/0968807923b70c79862e to your computer and use it in GitHub Desktop.
# 1. Load packages. Install any packages if necessary.
library(ggplot2)
library(MCMCpack)
library(rootSolve)
# 2. Extract data.
cv = read.csv("<FILE_PATH>/customerValue.csv", header = F)[, 1]
pcv = cv[which(cv > 0)]
pcvDF = data.frame(pcv)
names(pcvDF) = c("pcv")
### Data histogram.
ggplot(data = pcvDF) +
labs(
title = "Positive CV Score Histogram",
x ="CV Score (Dollars)",
y = NULL) +
theme(plot.title = element_text(face = "bold", size = 25)) +
theme_bw() +
geom_histogram(
aes(x = pcv),
fill = "orange",
alpha = 1,
colour = "grey",
lwd = 0.5,
binwidth = 25) +
coord_cartesian(xlim = c(0, 1000))
# 3. From blog post, we set p to be 0.9
p = 0.9
# 4. Computing CV Cutoff using empirical quantiles.
### Function for computing pth quantile of
computeQuantile = function(x) {
quantile(x, c(p))[1]
}
### Compute the pth quantile of positive CV score data: 514.938
pcvQuantile = computeQuantile(pcv)
### Function for getting sample of size n with replacement.
getSample = function(n) {
sample(pcv, n, replace = T)
}
### Function for computing boostrap sample for a numerical statistic.
getBootstrapEstimates = function(data, B, n, func) {
samples = numeric(B)
for (i in 1:B) {
bootstrapSample = getSample(n)
samples[i] = func(bootstrapSample)
}
return(samples)
}
### Get bootstrap sample for pth quantile estimates using B = 2500, n = 5000.
B = 5000
n = 7500
### Generate bootstrap quantile estimates.
quantileEstimates = getBootstrapEstimates(pcv, B, n, computeQuantile)
### Sorted estimates with actual positive CV score quantile.
sortedEstimatesWithPCVQuantile = sort(c(quantileEstimates, pcvQuantile))
### Get estimate mean quantile q with respect to rest of bootstrap estimates.
q = which(sortedEstimatesWithPCVQuantile == pcvQuantile)[1]/ (B + 1)
upperQuantile = min(q + (0.95 / 2), 1)
lowerQuantile = max(q - (0.95 / 2), 0)
confidenceInterval = quantile(quantileEstimates, c(lowerQuantile, upperQuantile))
### Confidence interval plot.
quantileEstimatesDF = data.frame(quantileEstimates)
names(quantileEstimatesDF)[1] = "x"
confidenceDF = data.frame(
c(
rep(confidenceInterval[1], 2),
rep(confidenceInterval[2], 2),
confidenceInterval[1]),
c(0, 1200, 1200, 0, 0))
names(confidenceDF) = c("x", "y")
### Create bootstrap estimate plot with shaded confidence region.
ggplot() +
labs(
title = "Empirical Quantile Bootsrap Estimates With Shaded Confidence Set",
x ="90% Quantile Estimate",
y = NULL) +
theme(plot.title = element_text(face = "bold", size = 25)) +
theme_bw() +
geom_histogram(
aes(x = x),
data = quantileEstimatesDF,
fill = "orange",
alpha = 1,
colour = "grey",
lwd = 0.2,
binwidth = 3) +
geom_polygon(
aes(x = x, y = y),
data = confidenceDF,
fill = "red",
lwd = 0,
alpha = 0.5) +
geom_vline(xintercept = pcvQuantile, colour = "blue") +
coord_cartesian(xlim = c(400, 550))
### Compute quantile using kernel density estimates.
densityQuantile = ewcdf(pcv, c(p))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment