Created
October 21, 2016 04:19
-
-
Save jonocarroll/14eb2083713e4818369424967533dfde to your computer and use it in GitHub Desktop.
Analysis of my own Stack Overflow Answers via the Kaggle dataset
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Process the Stack Overflow Answers data for my own account | |
## jonathan-carroll -- 4168169 | |
## | |
## Jonathan Carroll | |
## 21 October, 2016 | |
## | |
## https://twitter.com/carroll_jono/status/789319774773714946 | |
## load required packages | |
library(dplyr) | |
library(anytime) | |
library(emojifont) | |
library(extrafont) | |
library(ggplot2) | |
## allow the FontAwesome stack-overflow glyph | |
loadfonts(device="win") | |
fa <- fontawesome(c('fa-stack-overflow')) | |
## process the incoming data -- note, it appears as if every row is duplicated | |
## so these are filtered out in each plot | |
# ans <- read.csv("Answers.csv", header = TRUE) | |
# ans$CreationDate <- anytime(ans$CreationDate) | |
# ans$week <- 423 - cut(ans$CreationDate, "week", labels = FALSE) | |
## save for easy loading | |
# saveRDS(ans, file = "Answers.rds") | |
ans <- readRDS("Answers.rds") | |
## how many net answer upvotes I have? | |
ans %>% | |
filter(OwnerUserId == 4168169) %>% | |
distinct(Id, .keep_all = TRUE) %>% | |
summarise(myRep = sum(Score)) | |
## plot my weekly scores against how many weeks ago | |
ans %>% | |
filter(OwnerUserId == 4168169) %>% | |
distinct(Id, .keep_all = TRUE) %>% | |
group_by(week) %>% | |
summarise(WeeklyRep = sum(Score)) %>% | |
ggplot() + geom_bar(aes(x=week, y=WeeklyRep), stat = "identity", fill = "orange") + | |
theme(axis.text.x = element_text(angle = 90)) + | |
scale_x_reverse() + | |
labs(title = "Weekly Stack Overflow Answer Scores", subtitle = "Jonathan Carroll [4168169]", | |
caption = "Data from https://www.kaggle.com/stackoverflow/rquestions/downloads/Answers.csv.zip", | |
x = "Weeks Ago\n", y = "Weekly Scores on Stack Overflow r Tagged Answers\n") + | |
theme_bw() + | |
scale_y_continuous(breaks = 0:20, limits = c(0, 10)) + | |
geom_text(aes(x = 1, y = 9, label=fa), family='FontAwesome', size = 15, color = "orange") | |
## plot my number of weekly answers against how many weeks ago | |
ans %>% | |
filter(OwnerUserId == 4168169) %>% | |
distinct(Id, .keep_all = TRUE) %>% | |
group_by(week) %>% | |
summarise(WeeklyAnswers = n()) %>% | |
ggplot() + geom_bar(aes(x=week, y=WeeklyAnswers), stat = "identity", fill = "orange") + | |
theme(axis.text.x = element_text(angle = 90)) + | |
scale_x_reverse() + | |
labs(title = "Weekly Stack Overflow Answers", subtitle = "Jonathan Carroll [4168169]", | |
caption = "Data from https://www.kaggle.com/stackoverflow/rquestions/downloads/Answers.csv.zip", | |
x = "Weeks Ago\n", y = "Number of Weekly Answers on Stack Overflow r Tagged Answers\n") + | |
theme_bw() + | |
scale_y_continuous(breaks = 0:20, limits = c(0, 7)) + | |
geom_text(aes(x = 1, y = 6, label=fa), family='FontAwesome', size = 15, color = "orange") | |
## plot my average weekly scores against how many weeks ago | |
ans %>% | |
filter(OwnerUserId == 4168169) %>% | |
distinct(Id, .keep_all = TRUE) %>% | |
group_by(week) %>% | |
summarise(WeeklyRatio = sum(Score)/n()) %>% | |
ggplot() + geom_bar(aes(x=week, y=WeeklyRatio), stat = "identity", fill = "orange") + | |
theme(axis.text.x = element_text(angle = 90)) + | |
scale_x_reverse() + | |
labs(title = "Weekly Stack Overflow Average Scores", subtitle = "Jonathan Carroll [4168169]", | |
caption = "Data from https://www.kaggle.com/stackoverflow/rquestions/downloads/Answers.csv.zip", | |
x = "Weeks Ago\n", y = "Average Weekly Scores on Stack Overflow r Tagged Answers\n") + | |
theme_bw() + | |
scale_y_continuous(breaks = 0:20, limits = c(0, 4)) + | |
geom_text(aes(x = 1, y = 3.75, label=fa), family='FontAwesome', size = 15, color = "orange") |
Author
jonocarroll
commented
Oct 21, 2016
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment