Instantly share code, notes, and snippets.

Embed
What would you like to do?
#Libraries and options
library(RMySQL)
library(ggplot2)
library(data.table)
options(quit = "no", scipen = 500)
npp <- function(){
#Retrieve and parse data
con <- dbConnect(drv = "MySQL",
host = "analytics-store.eqiad.wmnet",
dbname = "enwiki")
data <- dbGetQuery(con,
statement = "SELECT
log_action AS action,
log_user AS user,
log_title AS page,
LEFT(user_registration,6) AS registration,
user_editcount AS edits
FROM logging INNER JOIN user
ON log_user = user_id
WHERE log_type = 'pagetriage-curation'
AND log_action IN('reviewed','unreviewed')
AND log_timestamp >= '20140520162601';")
dbDisconnect(con)
data <- as.data.table(data)
data <- merge(x = data[data$action == "reviewed",],
y = data[data$action == "unreviewed",],
by = "page", all.x = TRUE,
allow.cartesian = TRUE)
#Editor plots
editor_density <- ggplot(data = data[!duplicated(data$user.x),], aes(edits.x)) +
geom_density(colour = "blue", fill = "blue") +
labs(title = "distribution of unique page patrollers, by edit count",
x = "edits")
ggsave("editor_density.png",
editor_density)
log_editor_density <- ggplot(data = data[!duplicated(data$user.x),], aes(log10(edits.x))) +
geom_density(colour = "blue", fill = "blue") +
labs(title = "distribution of unique page patrollers, by edit count, log10",
x = "edits (log10)")
ggsave("log10_editor_density.png",
log_editor_density)
#Quantiles
quantile(data$edits.x[!duplicated(data$user.x)])
# 0% 25% 50% 75% 100%
# 11.0 470.0 2500.0 11428.5 331811.0
#Patrol plots and quantiles
patrol_density <- ggplot(data = data, aes(edits.x)) +
geom_density(colour = "blue", fill = "blue") +
labs(title = "distribution of patrol actions, by patroller edit count",
x = "edits")
ggsave("patrol_density.png",
patrol_density)
log_patrol_density <- ggplot(data = data, aes(log10(edits.x))) +
geom_density(colour = "blue", fill = "blue") +
labs(title = "distribution of patrol actions, by patroller edit count, log10",
x = "edits (log10)")
ggsave("log10_patrol_density.png",
log_patrol_density)
#Quantiles
quantile(data$edits.x)
#0% 25% 50% 75% 100%
#11 1017 3350 19131 331811
#unreviewed patrols
unreviewed <- data[!is.na(data$action.y),]
#Plot editors
unreviewed_editor_density <- ggplot(data = unreviewed[!duplicated(unreviewed$user.x),], aes(edits.x)) +
geom_density(colour = "blue", fill = "blue") +
labs(title = "distribution of unique reverted page patrollers, by edit count",
x = "edits")
ggsave("unreviwed_editor_density.png",
unreviewed_editor_density)
unreviewed_log_unreviewed_editor_density <- ggplot(data = unreviewed[!duplicated(unreviewed$user.x),], aes(log10(edits.x))) +
geom_density(colour = "blue", fill = "blue") +
labs(title = "distribution of unique reverted page patrollers, by edit count, log10",
x = "edits (log10)")
ggsave("log10_unreviwed_editor_density.png",
unreviewed_log_unreviewed_editor_density)
#Quantiles
quantile(unreviewed$edits.x[!duplicated(unreviewed$user.x)])
#0% 25% 50% 75% 100%
#19.00 712.25 2289.50 9172.25 151824.00
#Patrol plots and quantiles
unreviewed_patrol_density <- ggplot(data = unreviewed, aes(edits.x)) +
geom_density(colour = "blue", fill = "blue") +
labs(title = "distribution of revertedpatrol actions, by patroller edit count",
x = "edits")
ggsave("unreviewed_patrol_density.png",
unreviewed_patrol_density)
unreviewed_log_patrol_density <- ggplot(data = unreviewed, aes(log10(edits.x))) +
geom_density(colour = "blue", fill = "blue") +
labs(title = "distribution of reverted patrol actions, by patroller edit count, log10",
x = "edits (log10)")
ggsave("unreviewed_log10_patrol_density.png",
unreviewed_log_patrol_density)
#Quantiles
quantile(unreviewed$edits.x)
#0% 25% 50% 75% 100%
#19 880 2818 10046 151824
}
#Run and quit
npp()
q(save = "no")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment