Skip to content

Instantly share code, notes, and snippets.

@ccagrawal
Created August 4, 2013 22:23
Show Gist options
  • Save ccagrawal/6152192 to your computer and use it in GitHub Desktop.
Save ccagrawal/6152192 to your computer and use it in GitHub Desktop.
WordPress Views and MCMC
# For each day, find the appropriate Poisson Parameter and use it to calculate the likelihood of the actual view count
# Likelihood = P(D|H)
Calc.Likelihood <- function(changes, poissons, stats, periods) {
likelihood <- 0
for (day in 1:nrow(stats)) {
proceed <- FALSE
period <- 1
while (!proceed) {
if (day < changes[period]) {
likelihood <- likelihood + dpois(stats[day, 2], poissons[period], log = TRUE) # Add log likelihoods
proceed <- TRUE
}
period <- period + 1
}
}
return(likelihood)
}
api.key <- "blah blah" # Insert your own API Key
blog.id <- 696969 # Insert your own blog ID
table <- "views" # View counts
days <- -1 # Unlimited number of days
url <- paste("http://stats.wordpress.com/csv.php?api_key=", api.key,
"&blog_id=", blog.id, "&table=", table, "&days=", days, sep = "")
stats <- read.csv(url, header = TRUE)
stats$date <- as.Date(stats$date)
# Stats does not include days with 0 views, manually add those by creating a new data frame with all dates in between
adj.stats <- as.data.frame(seq(from = min(stats$date), to = max(stats$date), by = 1))
colnames(adj.stats) <- "date"
stats <- merge(stats, adj.stats, by = "date", all = TRUE)
stats[is.na(stats)] <- 0
thin <- 10 # To reduce autocorrelation, use thinning to only keep 1 of every 10 samples
burn <- 2000 # Use a "burn" period since we're starting with vague priors
sample <- 10000
periods <- 3 # My view count seems to be divided in 3 general periods
posteriors <- matrix(data = 0, nrow = burn + thin * sample, ncol = periods * 2)
# Assume each "period" consists of at least 30 days
# Thus, randomly sample starting from day 30 to day (end - 30)
# Make last "change day" equal to last blog day
# Highest 10 day avg views was about 21, so randomly sample poisson parameters from 0 to 22
changes <- c(sort(sample(30:nrow(stats) - 30, periods - 1, replace = TRUE)), nrow(stats) + 1)
poissons <- sample(0:22, periods, replace = TRUE)
posteriors[1, ] <- c(changes, poissons)
old.prob <- Calc.Likelihood(changes, poissons, stats, periods)
for (iter in 2:nrow(posteriors)) {
parameter <- iter %% (periods * 2) # Only change one parameter at a time
if (parameter < periods) {
changes <- posteriors[iter - 1, c(1:periods)]
if ((parameter + 1) != periods) { # Don't change the last day
changes[parameter + 1] <- changes[parameter + 1] + round(rnorm(1, 0, 15))
}
changes[changes < 30] <- 30 # Minimum change day is 30
changes <- sort(changes)
for (change in periods:2) {
if (changes[change] - changes[change - 1] < 30) # Make sure gaps are at least 30 days
changes[change - 1] = changes[change] - 30
}
}
else {
poissons <- posteriors[iter - 1, c((periods + 1):(2 * periods))]
poissons[parameter - periods + 1] <- poissons[parameter - periods + 1] + round(rnorm(1, 0, 2))
poissons[poissons < 0] <- 0 # Poisson distributions cannot have negative parameters
}
new.prob <- Calc.Likelihood(changes, poissons, stats, periods)
switch.prob <- min(1, exp(new.prob - old.prob))
random <- runif(1, min = 0, max = 1)
if (random < switch.prob) {
posteriors[iter, ] <- c(changes, poissons)
old.prob <- new.prob
}
else {
posteriors[iter, ] <- posteriors[iter - 1, ]
}
}
posteriors <- posteriors[seq(from = burn + 1, to = burn + thin * sample, by = thin), ]
# For each day, average the number of views from each posterior
expected.views <- numeric(nrow(stats))
for (day in 1:nrow(stats)) {
sum <- 0
for (posterior in 1:nrow(posteriors)) {
proceed <- FALSE
period <- 1
while (!proceed) {
if (day < posteriors[posterior, period]) {
sum <- sum + posteriors[posterior, period + periods]
proceed <- TRUE
}
period <- period + 1
}
}
expected.views[day] <- sum/nrow(posteriors)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment