Skip to content

Instantly share code, notes, and snippets.

View jamesdunham's full-sized avatar

James Dunham jamesdunham

  • Center for Security and Emerging Technology
  • New York
View GitHub Profile
@jamesdunham
jamesdunham / copy_from.py
Created August 6, 2019 13:33
Load a DataFrame into Postgres
from io import StringIO
import psycopg2
from tqdm import tqdm
def copy_from(df: pd.DataFrame,
table: str,
connection: psycopg2.extensions.connection,
chunk_size: int = 10000):
"""
Deduplicate organization names.
"""
import json
import logging
import pickle
import random
from io import StringIO
from itertools import combinations

Keybase proof

I hereby claim:

  • I am jamesdunham on github.
  • I am jdunham (https://keybase.io/jdunham) on keybase.
  • I have a public key ASBfo4fghAbkFJnfASnep_tSPk_gKGjT1Gx-PnY8SxgcfQo

To claim this, I am signing this object:

library(testthat)
devtools::load_all()
names_to_levels = function(item_names, gt_names) {
vapply(item_names, function(item_name) {
sum(grepl(paste0('^', item_name, '_gt[0-9]+$'), gt_names))
}, integer(1))
}
test_that('names_to_levels recovers number of choices per item given item names', {
expect_equal(names_to_levels('abortion', 'abortion_gt1'), c('abortion' = 1))
# Define a minimal Ctrl object
Ctrl <- setClass("Ctrl", slots = c(item_data = "data.frame", item_names = "ANY",
time_name = "character", geo_name = "character", group_names = "ANY",
weight_name = "ANY", time_filter='numeric'))
# Create minimal test data
test_data = data.table(
abortion = c(0, 0, 1, 2),
year = 2018,
state = 'MA',
@jamesdunham
jamesdunham / review-ny-counties.R
Created June 17, 2018 17:53
Review New York county returns
library(data.table)
library(stringr)
OE_PATH = '~/medsl/openelections'
`%=%` = function(string, pattern) {
str_detect(string, stringr::regex(pattern, TRUE))
}
`%-%` = function(string, pattern) {
from copy import copy
import spacy
from spacy.tokens import Doc, Span
class Template(object):
"""Create synthetic NER training data from a template document.
Provide a template NER-annotated spacy Doc when instantiating the class. Passing text to the `render` method
require(dataverse)
require(readr)
read_presidential <- function() {
f <- dataverse::get_file("1976-2016-president.tab", "doi:10.7910/DVN/42MVDX",
server="dataverse.harvard.edu")
tab <- readr::read_csv(readBin(f, "character"))
return(tab)
}
@jamesdunham
jamesdunham / ps.R
Last active February 28, 2017 13:48
poststratifying the posterior with dgo (workaround)
rm(list = ls())
library(dgo)
library(dplyr)
data(toy_dgirtfit)
data(targets)
targets = targets %>%
group_by(year, state, race3) %>%
summarize(proportion = sum(proportion)) %>%
ungroup() %>%
@jamesdunham
jamesdunham / mre.R
Last active February 16, 2017 19:34
library(rstan)
stan_model <- readLines("https://raw.githubusercontent.com/jamesdunham/dgo/master/exec/2017_01_04.stan")
stan_data <- structure(list(NNl2=structure(c(0L,0L,0L,0L),.Dim=c(2L,2L,1L)),SSl2=structure(c(0L,0L,0L,0L),.Dim=c(2L,2L,1L)),XX=structure(c(0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,1,1),.Dim=c(6L,3L),.Dimnames=list(c("SC__black","VA__black","SC__other","VA__other","SC__white","VA__white"),c("VA","race3other","race3white"))),ZZ=structure(c(0,0,0,0,0,0),.Dim=c(2L,3L,1L),.Dimnames=list(c("2009","2010"),c("VA","race3other","race3white"),"")),ZZ_prior=structure(c(0,0,0,0,0,0),.Dim=c(2L,3L,1L),.Dimnames=list(c("2009","2010"),c("VA","race3other","race3white"),"")),G=6L,Q=2L,T=2L,N=24L,P=3L,S=1,H=1L,D=1L,Hprior=1L,WT=structure(c(1,1,1,1,1,1,1,1,1,1,1,1),.Dim=c(2L,1L,6L)),l2_only=structure(c(0L,0L,0L,0L),.Dim=c(2L,2L)),G_hier=1L,constant_item=TRUE,n_vec=structure(c(20,36,11,13,102,190,0,0,0,0,0,0,31,40,5,15,95,150,31,40,5,15,94,150),.Names=c("2009__SC__black__affirmative_action_gt1","2009__VA__black__affirmati