Skip to content

Instantly share code, notes, and snippets.

@mrdwab
Created March 25, 2013 08:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mrdwab/5235666 to your computer and use it in GitHub Desktop.
Save mrdwab/5235666 to your computer and use it in GitHub Desktop.
Copy the data at stackoverflow.com/reputation and read it into R to get a nice data.frame from which you can do fun stuff.
parse_so_rep_page <- function(rep_file) {
# Authors: Paul Hiemstra, Ananda Mahto
all_data <- readLines(rep_file)
all_data <- all_data[-1]
## Deal with bonuses
all_data <- gsub("-- bonuses\\s+(.*)", " 99 NA \\1", all_data)
date_entries <- grep("^-", all_data)
actions_per_day <- c(date_entries[1], diff(date_entries)) - 1
dat <- read.table(
text = all_data[-c(date_entries,
date_entries[length(date_entries)]:length(all_data))])
names(dat) <- c("action_id", "question_id", "rep_change")
dat$rep_change <- as.numeric(gsub("\\(|\\)|\\[|\\]", "", dat$rep_change))
dat$date <- rep(all_data[date_entries], times = actions_per_day)
dat$date <- as.Date(gsub("-- (.*) rep.*", "\\1", dat$date))
actions <- as.character(dat$action_id)
actions[dat$action_id == 1] <- with(
dat[dat$action_id == 1, ],
ifelse(rep_change == 15, "YourAnswerAccepted", "AnswerAcceptedByYou"))
actions[dat$action_id == 3] <- with(
dat[dat$action_id == 3, ],
ifelse(rep_change == -1, "YouDownvoted", "YouWereDownvoted"))
dat$actions <- factor(
actions, levels = c("AnswerAcceptedByYou", "YourAnswerAccepted", 2,
"YouDownvoted", "YouWereDownvoted", 4, 8, 9, 12, 16, 99),
labels = c("AnswerAcceptedByYou", "YourAnswerAccepted", "Upvote", "YouDownvoted",
"YouWereDownvoted", "Penalty-Offensive", "BountyOffered",
"BountyReceived", "Penalty-Spam", "EditApproved", "Bonus"))
dat$action_id <- factor(dat$action_id, c(1:4, 8, 9, 12, 16, 99))
dat
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment