James Dunham jamesdunham

## smoother.R
smooth_years <- function(d, varnames = NULL) {
  # Smooth variables over years using a loess interpolation
  #
  # param d: a dataframe
  # param varnames: the names of the variables to smooth over;
  #   if NULL names will be guessed.
  #
  # Returns a data.frame 2.5x larger than the input,
  # containing the original values and predicted values from a
  # loess fit. Assumes that a variable called 'year' exists.

## plot_rhats.R
plot_rhats <- function(dgirt_out) {
  # Save to disk plots of split R-hats for theta-bars (from a particular model)
  #
  # This function produces two plots showing the average R-hat in each year by
  # 1) state and 2) party, each averaging over the other. It assumes that
  # particular variable names exist in dgirt_out as the grouping variable (D_pid3),
  # time variable (D_year), and geographic variable (D_abb).

  # From the stanfit object extract R-hats and effective sample sizes
  tb_sum <- summary(dgirt_out, par = "theta_bar", verbose = TRUE)$summary

## scrape_congressional_session-years.r
library(rvest)
library(data.table)
library(lubridate)

doc = read_html("http://history.house.gov/Institution/Session-Dates/All/")

sessions = doc %>%
  html_nodes(xpath = "//div[contains(@class, 'manual-table')]/table") %>%
  html_table() %>%
  as.data.frame()

## mre.R
library(rstan)
stan_model <- readLines("https://raw.githubusercontent.com/jamesdunham/dgo/master/exec/2017_01_04.stan")
stan_data <- structure(list(NNl2=structure(c(0L,0L,0L,0L),.Dim=c(2L,2L,1L)),SSl2=structure(c(0L,0L,0L,0L),.Dim=c(2L,2L,1L)),XX=structure(c(0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,1,1),.Dim=c(6L,3L),.Dimnames=list(c("SC__black","VA__black","SC__other","VA__other","SC__white","VA__white"),c("VA","race3other","race3white"))),ZZ=structure(c(0,0,0,0,0,0),.Dim=c(2L,3L,1L),.Dimnames=list(c("2009","2010"),c("VA","race3other","race3white"),"")),ZZ_prior=structure(c(0,0,0,0,0,0),.Dim=c(2L,3L,1L),.Dimnames=list(c("2009","2010"),c("VA","race3other","race3white"),"")),G=6L,Q=2L,T=2L,N=24L,P=3L,S=1,H=1L,D=1L,Hprior=1L,WT=structure(c(1,1,1,1,1,1,1,1,1,1,1,1),.Dim=c(2L,1L,6L)),l2_only=structure(c(0L,0L,0L,0L),.Dim=c(2L,2L)),G_hier=1L,constant_item=TRUE,n_vec=structure(c(20,36,11,13,102,190,0,0,0,0,0,0,31,40,5,15,95,150,31,40,5,15,94,150),.Names=c("2009__SC__black__affirmative_action_gt1","2009__VA__black__affirmati

## ps.R
rm(list = ls())
library(dgo)
library(dplyr)

data(toy_dgirtfit)
data(targets)
targets = targets %>%
  group_by(year, state, race3) %>%
  summarize(proportion = sum(proportion)) %>%
  ungroup() %>%

## medsl-example.R
require(dataverse)
require(readr)

read_presidential <- function() {
  f <- dataverse::get_file("1976-2016-president.tab", "doi:10.7910/DVN/42MVDX",
    server="dataverse.harvard.edu")
  tab <- readr::read_csv(readBin(f, "character"))
  return(tab)
}

## template.py
from copy import copy

import spacy
from spacy.tokens import Doc, Span


class Template(object):
    """Create synthetic NER training data from a template document.

    Provide a template NER-annotated spacy Doc when instantiating the class. Passing text to the `render` method

## review-ny-counties.R
library(data.table)
library(stringr)

OE_PATH = '~/medsl/openelections'

`%=%` = function(string, pattern) {
  str_detect(string, stringr::regex(pattern, TRUE))
}

`%-%` = function(string, pattern) {

## test-group-counts.R
# Define a minimal Ctrl object
Ctrl <- setClass("Ctrl", slots = c(item_data = "data.frame", item_names = "ANY",
    time_name = "character", geo_name = "character", group_names = "ANY",
    weight_name = "ANY", time_filter='numeric'))

# Create minimal test data
test_data = data.table(
  abortion = c(0, 0, 1, 2),
  year = 2018,
  state = 'MA',

## item_weights.R
library(testthat)
devtools::load_all()

names_to_levels = function(item_names, gt_names) {
  vapply(item_names, function(item_name) {
    sum(grepl(paste0('^', item_name, '_gt[0-9]+$'), gt_names))
  }, integer(1))
}
test_that('names_to_levels recovers number of choices per item given item names', {
  expect_equal(names_to_levels('abortion', 'abortion_gt1'), c('abortion' = 1))
	smooth_years <- function(d, varnames = NULL) {
	# Smooth variables over years using a loess interpolation
	#
	# param d: a dataframe
	# param varnames: the names of the variables to smooth over;
	# if NULL names will be guessed.
	#
	# Returns a data.frame 2.5x larger than the input,
	# containing the original values and predicted values from a
	# loess fit. Assumes that a variable called 'year' exists.
	plot_rhats <- function(dgirt_out) {
	# Save to disk plots of split R-hats for theta-bars (from a particular model)
	#
	# This function produces two plots showing the average R-hat in each year by
	# 1) state and 2) party, each averaging over the other. It assumes that
	# particular variable names exist in dgirt_out as the grouping variable (D_pid3),
	# time variable (D_year), and geographic variable (D_abb).

	# From the stanfit object extract R-hats and effective sample sizes
	tb_sum <- summary(dgirt_out, par = "theta_bar", verbose = TRUE)$summary
	library(rvest)
	library(data.table)
	library(lubridate)

	doc = read_html("http://history.house.gov/Institution/Session-Dates/All/")

	sessions = doc %>%
	html_nodes(xpath = "//div[contains(@class, 'manual-table')]/table") %>%
	html_table() %>%
	as.data.frame()
	library(rstan)
	stan_model <- readLines("https://raw.githubusercontent.com/jamesdunham/dgo/master/exec/2017_01_04.stan")
	stan_data <- structure(list(NNl2=structure(c(0L,0L,0L,0L),.Dim=c(2L,2L,1L)),SSl2=structure(c(0L,0L,0L,0L),.Dim=c(2L,2L,1L)),XX=structure(c(0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,1,1),.Dim=c(6L,3L),.Dimnames=list(c("SC__black","VA__black","SC__other","VA__other","SC__white","VA__white"),c("VA","race3other","race3white"))),ZZ=structure(c(0,0,0,0,0,0),.Dim=c(2L,3L,1L),.Dimnames=list(c("2009","2010"),c("VA","race3other","race3white"),"")),ZZ_prior=structure(c(0,0,0,0,0,0),.Dim=c(2L,3L,1L),.Dimnames=list(c("2009","2010"),c("VA","race3other","race3white"),"")),G=6L,Q=2L,T=2L,N=24L,P=3L,S=1,H=1L,D=1L,Hprior=1L,WT=structure(c(1,1,1,1,1,1,1,1,1,1,1,1),.Dim=c(2L,1L,6L)),l2_only=structure(c(0L,0L,0L,0L),.Dim=c(2L,2L)),G_hier=1L,constant_item=TRUE,n_vec=structure(c(20,36,11,13,102,190,0,0,0,0,0,0,31,40,5,15,95,150,31,40,5,15,94,150),.Names=c("2009__SC__black__affirmative_action_gt1","2009__VA__black__affirmati
	rm(list = ls())
	library(dgo)
	library(dplyr)

	data(toy_dgirtfit)
	data(targets)
	targets = targets %>%
	group_by(year, state, race3) %>%
	summarize(proportion = sum(proportion)) %>%
	ungroup() %>%
	require(dataverse)
	require(readr)

	read_presidential <- function() {
	f <- dataverse::get_file("1976-2016-president.tab", "doi:10.7910/DVN/42MVDX",
	server="dataverse.harvard.edu")
	tab <- readr::read_csv(readBin(f, "character"))
	return(tab)
	}
	from copy import copy

	import spacy
	from spacy.tokens import Doc, Span


	class Template(object):
	"""Create synthetic NER training data from a template document.

	Provide a template NER-annotated spacy Doc when instantiating the class. Passing text to the `render` method
	library(data.table)
	library(stringr)

	OE_PATH = '~/medsl/openelections'

	`%=%` = function(string, pattern) {
	str_detect(string, stringr::regex(pattern, TRUE))
	}

	`%-%` = function(string, pattern) {
	# Define a minimal Ctrl object
	Ctrl <- setClass("Ctrl", slots = c(item_data = "data.frame", item_names = "ANY",
	time_name = "character", geo_name = "character", group_names = "ANY",
	weight_name = "ANY", time_filter='numeric'))

	# Create minimal test data
	test_data = data.table(
	abortion = c(0, 0, 1, 2),
	year = 2018,
	state = 'MA',
	library(testthat)
	devtools::load_all()

	names_to_levels = function(item_names, gt_names) {
	vapply(item_names, function(item_name) {
	sum(grepl(paste0('^', item_name, '_gt[0-9]+$'), gt_names))
	}, integer(1))
	}
	test_that('names_to_levels recovers number of choices per item given item names', {
	expect_equal(names_to_levels('abortion', 'abortion_gt1'), c('abortion' = 1))