Skip to content

Instantly share code, notes, and snippets.

View jamesdunham's full-sized avatar

James Dunham jamesdunham

  • Center for Security and Emerging Technology
  • New York
View GitHub Profile
@jamesdunham
jamesdunham / smoother.R
Last active August 12, 2016 22:15
Smooth interpolation of missing data with loess
smooth_years <- function(d, varnames = NULL) {
# Smooth variables over years using a loess interpolation
#
# param d: a dataframe
# param varnames: the names of the variables to smooth over;
# if NULL names will be guessed.
#
# Returns a data.frame 2.5x larger than the input,
# containing the original values and predicted values from a
# loess fit. Assumes that a variable called 'year' exists.
plot_rhats <- function(dgirt_out) {
# Save to disk plots of split R-hats for theta-bars (from a particular model)
#
# This function produces two plots showing the average R-hat in each year by
# 1) state and 2) party, each averaging over the other. It assumes that
# particular variable names exist in dgirt_out as the grouping variable (D_pid3),
# time variable (D_year), and geographic variable (D_abb).
# From the stanfit object extract R-hats and effective sample sizes
tb_sum <- summary(dgirt_out, par = "theta_bar", verbose = TRUE)$summary
@jamesdunham
jamesdunham / scrape_congressional_session-years.r
Created November 14, 2016 15:17
scrape Congressional session-years from history.house.gov
library(rvest)
library(data.table)
library(lubridate)
doc = read_html("http://history.house.gov/Institution/Session-Dates/All/")
sessions = doc %>%
html_nodes(xpath = "//div[contains(@class, 'manual-table')]/table") %>%
html_table() %>%
as.data.frame()
@jamesdunham
jamesdunham / mre.R
Last active February 16, 2017 19:34
library(rstan)
stan_model <- readLines("https://raw.githubusercontent.com/jamesdunham/dgo/master/exec/2017_01_04.stan")
stan_data <- structure(list(NNl2=structure(c(0L,0L,0L,0L),.Dim=c(2L,2L,1L)),SSl2=structure(c(0L,0L,0L,0L),.Dim=c(2L,2L,1L)),XX=structure(c(0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,1,1),.Dim=c(6L,3L),.Dimnames=list(c("SC__black","VA__black","SC__other","VA__other","SC__white","VA__white"),c("VA","race3other","race3white"))),ZZ=structure(c(0,0,0,0,0,0),.Dim=c(2L,3L,1L),.Dimnames=list(c("2009","2010"),c("VA","race3other","race3white"),"")),ZZ_prior=structure(c(0,0,0,0,0,0),.Dim=c(2L,3L,1L),.Dimnames=list(c("2009","2010"),c("VA","race3other","race3white"),"")),G=6L,Q=2L,T=2L,N=24L,P=3L,S=1,H=1L,D=1L,Hprior=1L,WT=structure(c(1,1,1,1,1,1,1,1,1,1,1,1),.Dim=c(2L,1L,6L)),l2_only=structure(c(0L,0L,0L,0L),.Dim=c(2L,2L)),G_hier=1L,constant_item=TRUE,n_vec=structure(c(20,36,11,13,102,190,0,0,0,0,0,0,31,40,5,15,95,150,31,40,5,15,94,150),.Names=c("2009__SC__black__affirmative_action_gt1","2009__VA__black__affirmati
@jamesdunham
jamesdunham / ps.R
Last active February 28, 2017 13:48
poststratifying the posterior with dgo (workaround)
rm(list = ls())
library(dgo)
library(dplyr)
data(toy_dgirtfit)
data(targets)
targets = targets %>%
group_by(year, state, race3) %>%
summarize(proportion = sum(proportion)) %>%
ungroup() %>%
require(dataverse)
require(readr)
read_presidential <- function() {
f <- dataverse::get_file("1976-2016-president.tab", "doi:10.7910/DVN/42MVDX",
server="dataverse.harvard.edu")
tab <- readr::read_csv(readBin(f, "character"))
return(tab)
}
from copy import copy
import spacy
from spacy.tokens import Doc, Span
class Template(object):
"""Create synthetic NER training data from a template document.
Provide a template NER-annotated spacy Doc when instantiating the class. Passing text to the `render` method
@jamesdunham
jamesdunham / review-ny-counties.R
Created June 17, 2018 17:53
Review New York county returns
library(data.table)
library(stringr)
OE_PATH = '~/medsl/openelections'
`%=%` = function(string, pattern) {
str_detect(string, stringr::regex(pattern, TRUE))
}
`%-%` = function(string, pattern) {
# Define a minimal Ctrl object
Ctrl <- setClass("Ctrl", slots = c(item_data = "data.frame", item_names = "ANY",
time_name = "character", geo_name = "character", group_names = "ANY",
weight_name = "ANY", time_filter='numeric'))
# Create minimal test data
test_data = data.table(
abortion = c(0, 0, 1, 2),
year = 2018,
state = 'MA',
library(testthat)
devtools::load_all()
names_to_levels = function(item_names, gt_names) {
vapply(item_names, function(item_name) {
sum(grepl(paste0('^', item_name, '_gt[0-9]+$'), gt_names))
}, integer(1))
}
test_that('names_to_levels recovers number of choices per item given item names', {
expect_equal(names_to_levels('abortion', 'abortion_gt1'), c('abortion' = 1))