Created
August 20, 2014 23:03
-
-
Save Ironholds/82db1544a59cbe12158a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Libraries and options | |
library(RMySQL) | |
library(ggplot2) | |
library(data.table) | |
options(quit = "no", scipen = 500) | |
npp <- function(){ | |
#Retrieve and parse data | |
con <- dbConnect(drv = "MySQL", | |
host = "analytics-store.eqiad.wmnet", | |
dbname = "enwiki") | |
data <- dbGetQuery(con, | |
statement = "SELECT | |
log_action AS action, | |
log_user AS user, | |
log_title AS page, | |
LEFT(user_registration,6) AS registration, | |
user_editcount AS edits | |
FROM logging INNER JOIN user | |
ON log_user = user_id | |
WHERE log_type = 'pagetriage-curation' | |
AND log_action IN('reviewed','unreviewed') | |
AND log_timestamp >= '20140520162601';") | |
dbDisconnect(con) | |
data <- as.data.table(data) | |
data <- merge(x = data[data$action == "reviewed",], | |
y = data[data$action == "unreviewed",], | |
by = "page", all.x = TRUE, | |
allow.cartesian = TRUE) | |
#Editor plots | |
editor_density <- ggplot(data = data[!duplicated(data$user.x),], aes(edits.x)) + | |
geom_density(colour = "blue", fill = "blue") + | |
labs(title = "distribution of unique page patrollers, by edit count", | |
x = "edits") | |
ggsave("editor_density.png", | |
editor_density) | |
log_editor_density <- ggplot(data = data[!duplicated(data$user.x),], aes(log10(edits.x))) + | |
geom_density(colour = "blue", fill = "blue") + | |
labs(title = "distribution of unique page patrollers, by edit count, log10", | |
x = "edits (log10)") | |
ggsave("log10_editor_density.png", | |
log_editor_density) | |
#Quantiles | |
quantile(data$edits.x[!duplicated(data$user.x)]) | |
# 0% 25% 50% 75% 100% | |
# 11.0 470.0 2500.0 11428.5 331811.0 | |
#Patrol plots and quantiles | |
patrol_density <- ggplot(data = data, aes(edits.x)) + | |
geom_density(colour = "blue", fill = "blue") + | |
labs(title = "distribution of patrol actions, by patroller edit count", | |
x = "edits") | |
ggsave("patrol_density.png", | |
patrol_density) | |
log_patrol_density <- ggplot(data = data, aes(log10(edits.x))) + | |
geom_density(colour = "blue", fill = "blue") + | |
labs(title = "distribution of patrol actions, by patroller edit count, log10", | |
x = "edits (log10)") | |
ggsave("log10_patrol_density.png", | |
log_patrol_density) | |
#Quantiles | |
quantile(data$edits.x) | |
#0% 25% 50% 75% 100% | |
#11 1017 3350 19131 331811 | |
#unreviewed patrols | |
unreviewed <- data[!is.na(data$action.y),] | |
#Plot editors | |
unreviewed_editor_density <- ggplot(data = unreviewed[!duplicated(unreviewed$user.x),], aes(edits.x)) + | |
geom_density(colour = "blue", fill = "blue") + | |
labs(title = "distribution of unique reverted page patrollers, by edit count", | |
x = "edits") | |
ggsave("unreviwed_editor_density.png", | |
unreviewed_editor_density) | |
unreviewed_log_unreviewed_editor_density <- ggplot(data = unreviewed[!duplicated(unreviewed$user.x),], aes(log10(edits.x))) + | |
geom_density(colour = "blue", fill = "blue") + | |
labs(title = "distribution of unique reverted page patrollers, by edit count, log10", | |
x = "edits (log10)") | |
ggsave("log10_unreviwed_editor_density.png", | |
unreviewed_log_unreviewed_editor_density) | |
#Quantiles | |
quantile(unreviewed$edits.x[!duplicated(unreviewed$user.x)]) | |
#0% 25% 50% 75% 100% | |
#19.00 712.25 2289.50 9172.25 151824.00 | |
#Patrol plots and quantiles | |
unreviewed_patrol_density <- ggplot(data = unreviewed, aes(edits.x)) + | |
geom_density(colour = "blue", fill = "blue") + | |
labs(title = "distribution of revertedpatrol actions, by patroller edit count", | |
x = "edits") | |
ggsave("unreviewed_patrol_density.png", | |
unreviewed_patrol_density) | |
unreviewed_log_patrol_density <- ggplot(data = unreviewed, aes(log10(edits.x))) + | |
geom_density(colour = "blue", fill = "blue") + | |
labs(title = "distribution of reverted patrol actions, by patroller edit count, log10", | |
x = "edits (log10)") | |
ggsave("unreviewed_log10_patrol_density.png", | |
unreviewed_log_patrol_density) | |
#Quantiles | |
quantile(unreviewed$edits.x) | |
#0% 25% 50% 75% 100% | |
#19 880 2818 10046 151824 | |
} | |
#Run and quit | |
npp() | |
q(save = "no") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment