Skip to content

Instantly share code, notes, and snippets.

@khakieconomics
Last active October 9, 2019 21:08
Show Gist options
  • Save khakieconomics/63c143ad7c037d4c39d41364b28c4e68 to your computer and use it in GitHub Desktop.
Save khakieconomics/63c143ad7c037d4c39d41364b28c4e68 to your computer and use it in GitHub Desktop.
File to ping Github's API, get the commit numbers for applicants to our predoc program, and run a simple DiD on their daily commits before and after the program.
# A simple script to grab github commits for a list of users and plot
# Group average commits per day for two groups
# To use this script, you'll need to set up a yaml file with credentials
# for github, and set up a google sheet with columns `Github handle` and
#`Attended` (with values "Yes" or "No")
# Author: Jim Savage, Schmidt Futures
# Load libraries
library(tidyverse); library(httr); library(yaml);library(jsonlite);
library(googlesheets); library(RCurl); library(lubridate); library(ggthemes)
library(lfe)
# Load credentials
creds <- read_yaml("creds.yaml")
gs_auth()
# Load the google sheet
the_sheet <- gs_url(x = creds$googlesheets$usernames) %>%
gs_read(ws = 1)
# Filter for program applicants that have a Github profile
only_GH_members <- the_sheet %>%
filter(!is.na(`Github handle`))
# Function to get commits data for each users
get_commits <- function(user) {
base_api <- "https://api.github.com/"
events <- GET(paste0(base_api, "users/",user), authenticate(creds$github$username, creds$github$pw))
repos <- parse_json(GET(paste0(parse_json(events)$repos_url), authenticate(creds$github$username, creds$github$pw)))
commits <- lapply(repos, function(y) {
commit_info <- parse_json(GET(paste0(base_api, "repos/", user, "/", y$name, "/commits"), authenticate(creds$github$username, creds$github$pw)))
if(length(unlist(commit_info))>2) {
if(TRUE){
commit_info <- commit_info[unlist(lapply(commit_info, function(x) x$committer$login == user))]
lapply(commit_info, function(x) {
tibble(author = x$commit$committer$name,
repo = y$name,
`Github handle` = user,
date = as.POSIXct(x$commit$author$date),
comment = x$commit$message)
}) %>% bind_rows()
} else(tibble())
} else {
tibble()
}
}) %>% bind_rows
commits
}
# Get commits for all applicants with github accounts
get_all_commits <- lapply(only_GH_members$`Github handle`, get_commits)
# Bind them all together and
class_commits <- bind_rows(get_all_commits) %>%
left_join(only_GH_members)
daily_commits <- class_commits %>%
group_by(Attended) %>%
group_by(`Github handle`, Month = as.Date(date), Attended) %>%
summarise(n = n()) %>%
right_join(crossing(`Github handle` = unique(.$`Github handle`), Month = seq(from = as.Date("2019-01-01"), to = Sys.Date(), by = "day"))) %>%
group_by(`Github handle`) %>%
mutate(Attended = first(Attended[!is.na(Attended)]),
Attended = ifelse(is.na(Attended), "No", Attended)) %>%
mutate(n = ifelse(is.na(n), 0, n)) %>%
mutate(period = ifelse(between(Month, as.Date("2019-01-01"), as.Date("2019-09-19")), "Pre",
ifelse(Month > as.Date("2019-09-22"), "Post", "During")))
daily_commits %>%
group_by(Month, Attended) %>%
summarise(`Average commits`= mean(n)) %>%
filter(Month > as.Date("2019-01-01")) %>%
ggplot(aes(x = Month, y = `Average commits`, colour = Attended)) +
geom_line() +
theme_hc() +
labs(y = "Average daily commits",
x = "Date",
title = "Github commits per day",
subtitle = "NYU predoc attendees and non-attendee applicants")
daily_commits %>%
filter(period %in% c("Pre", "Post")) %>%
group_by(`Github handle`) %>%
mutate(demeaned_commits = n - mean(n)) %>%
group_by(period, Attended) %>%
summarise(m = mean(demeaned_commits),
se = sd(demeaned_commits)/sqrt(n()),
n = n()) %>%
ungroup %>%
mutate(period = relevel(as.factor(period), "Pre")) %>%
ggplot(aes(x = period, y = m, colour = Attended)) +
geom_linerange(aes(ymin = m - 1.96*se, ymax = m+1.96*se)) +
geom_point() +
theme_hc()
# Simple regression analysis. Fixed effects linear regression on commit counts
# with dummies for treated RAs during and after the training.
linear_fit <- felm(n ~ I(period=="During" & Attended == "Yes")+
I(period=="Post" & Attended == "Yes") |
`Github handle` | 0 | 0, data = daily_commits)
summary(linear_fit)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment