Paul Goldsmith-Pinkham paulgp

## example.py
import itertools
import hashlib
from collections import Counter
from multiprocessing import cpu_count
from concurrent.futures import ProcessPoolExecutor

def get_username(topic_id, ip):
    """
    Returns a username generated from the given topic ID and IP address.

## sort
n <- 100
m <- 1000
coef <- rep(0,m)
coef2 <- rep(0,m)
beta <- 0

for (j in seq(1,m) ){
  x <- rnorm(n, 0, 1)
  y <- rnorm(n, x* beta, 1)


## gist:7e0c0ad9dee76c4ab8e475e1165d493f
	reghdfe `x' yearT_bk_2001 yearT_bk_2002 yearT_bk_2003 yearT_bk_2004 yearT_bk_2005 yearT_bk_2006  `post' [aweight=`weight' ] if year < $max_year & year > $min_year , vce(cluster state_id ) absorb(county_id i.yq ) dofadjustments(clusters)
	local df_r = e(df_r)
	local n`spec' = string(e(N), "%15.0fc")
	local b`x'`spec' =  string(`multiplier'*_b[`post2'], "%15.3fc")
	local se`x'`spec' =  string(`multiplier'*_se[`post2'], "%12.3fc")
	local p = 2*ttail(`df_r',abs(_b[`post2']/_se[`post2']))
	make_stars `p'
	local star`x'`spec' `r(star)'

	local spec = 2

## year
actyear,weightAK,weightAL,weightAR,weightAZ,weightCA,weightCO,weightCT,weightDC,weightDE,weightFL,weightGA,weightHI,weightIA,weightID,weightIL,weightIN,weightKS,weightKY,weightLA,weightMA,weightMD,weightME,weightMI,weightMN,weightMO,weightMS,weightMT,weightNC,weightND,weightNE,weightNH,weightNJ,weightNM,weightNV,weightNY,weightOH,weightOK,weightOR,weightPA,weightRI,weightSC,weightSD,weightTN,weightTX,weightUT,weightVA,weightVT,weightWA,weightWI,weightWV,weightWY
1981,.03755028,.02990938,.20703676,0,.04086193,.01945291,.10767708,.06262338,.06350493,.03920498,.00479458,.06658893,.06084494,.06284221,.02269132,.04047319,.08070602,.02967702,.06200945,.0411989,.05829636,.01381558,.04262264,.02045847,.06681184,.04752667,.03490627,.03436573,.04141355,.00596215,.03996021,.0605478,.02732919,.06319549,.01641251,.05705033,.04074663,.14937994,.07915642,.01641021,.15789983,.04026218,.03547503,.0317673,.08546715,.09883956,.0492923,.09792817,.02944991,.07367196,.04073161
1982,.6684013,.02493662,.0422279,0,.30996058,.07911115

## nlf.R
pbp_2016 <- read_csv("~/Downloads/pbp_2016.csv")
a <- pbp_2016 %>% group_by(GameID, posteam) %>% summarize(top = sum(PlayTimeDiff), end_score = max(PosTeamScore, na.rm=TRUE), numplays = n()) %>% filter(posteam != "NA") %>% group_by(GameID) %>% mutate(teamNum = row_number()) %>% select(GameID, top, numplays, teamNum, end_score)

score <- a %>% select(GameID, teamNum, end_score) %>% spread(teamNum, end_score, sep="_")
numplays <- a %>% select(GameID, teamNum, numplays) %>% spread(teamNum, numplays, sep="_")
top <- a %>% select(GameID, teamNum, top) %>% spread(teamNum, top, sep="_")
colnames(top) <- c("GameID", "top_1", "top_2")
colnames(score) <- c("GameID", "score_1", "score_2")
colnames(numplays) <- c("GameID", "numplays_1", "numplays_2")
final <- inner_join(numplays,score, by="GameID") %>% inner_join(top, by="GameID")  %>% mutate(winner_1 = as.integer(score_1 > score_2))
	import itertools
	import hashlib
	from collections import Counter
	from multiprocessing import cpu_count
	from concurrent.futures import ProcessPoolExecutor

	def get_username(topic_id, ip):
	"""
	Returns a username generated from the given topic ID and IP address.
	n <- 100
	m <- 1000
	coef <- rep(0,m)
	coef2 <- rep(0,m)
	beta <- 0

	for (j in seq(1,m) ){
	x <- rnorm(n, 0, 1)
	y <- rnorm(n, x* beta, 1)
	reghdfe `x' yearT_bk_2001 yearT_bk_2002 yearT_bk_2003 yearT_bk_2004 yearT_bk_2005 yearT_bk_2006 `post' [aweight=`weight' ] if year < $max_year & year > $min_year , vce(cluster state_id ) absorb(county_id i.yq ) dofadjustments(clusters)
	local df_r = e(df_r)
	local n`spec' = string(e(N), "%15.0fc")
	local b`x'`spec' = string(`multiplier'*_b[`post2'], "%15.3fc")
	local se`x'`spec' = string(`multiplier'*_se[`post2'], "%12.3fc")
	local p = 2*ttail(`df_r',abs(_b[`post2']/_se[`post2']))
	make_stars `p'
	local star`x'`spec' `r(star)'

	local spec = 2
	actyear,weightAK,weightAL,weightAR,weightAZ,weightCA,weightCO,weightCT,weightDC,weightDE,weightFL,weightGA,weightHI,weightIA,weightID,weightIL,weightIN,weightKS,weightKY,weightLA,weightMA,weightMD,weightME,weightMI,weightMN,weightMO,weightMS,weightMT,weightNC,weightND,weightNE,weightNH,weightNJ,weightNM,weightNV,weightNY,weightOH,weightOK,weightOR,weightPA,weightRI,weightSC,weightSD,weightTN,weightTX,weightUT,weightVA,weightVT,weightWA,weightWI,weightWV,weightWY
	1981,.03755028,.02990938,.20703676,0,.04086193,.01945291,.10767708,.06262338,.06350493,.03920498,.00479458,.06658893,.06084494,.06284221,.02269132,.04047319,.08070602,.02967702,.06200945,.0411989,.05829636,.01381558,.04262264,.02045847,.06681184,.04752667,.03490627,.03436573,.04141355,.00596215,.03996021,.0605478,.02732919,.06319549,.01641251,.05705033,.04074663,.14937994,.07915642,.01641021,.15789983,.04026218,.03547503,.0317673,.08546715,.09883956,.0492923,.09792817,.02944991,.07367196,.04073161
	1982,.6684013,.02493662,.0422279,0,.30996058,.07911115
	pbp_2016 <- read_csv("~/Downloads/pbp_2016.csv")
	a <- pbp_2016 %>% group_by(GameID, posteam) %>% summarize(top = sum(PlayTimeDiff), end_score = max(PosTeamScore, na.rm=TRUE), numplays = n()) %>% filter(posteam != "NA") %>% group_by(GameID) %>% mutate(teamNum = row_number()) %>% select(GameID, top, numplays, teamNum, end_score)

	score <- a %>% select(GameID, teamNum, end_score) %>% spread(teamNum, end_score, sep="_")
	numplays <- a %>% select(GameID, teamNum, numplays) %>% spread(teamNum, numplays, sep="_")
	top <- a %>% select(GameID, teamNum, top) %>% spread(teamNum, top, sep="_")
	colnames(top) <- c("GameID", "top_1", "top_2")
	colnames(score) <- c("GameID", "score_1", "score_2")
	colnames(numplays) <- c("GameID", "numplays_1", "numplays_2")
	final <- inner_join(numplays,score, by="GameID") %>% inner_join(top, by="GameID") %>% mutate(winner_1 = as.integer(score_1 > score_2))