Skip to content

Instantly share code, notes, and snippets.

@benzipperer
Last active October 28, 2020 13:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save benzipperer/3c14189a8b3f49d0b289426e58b52925 to your computer and use it in GitHub Desktop.
Save benzipperer/3c14189a8b3f49d0b289426e58b52925 to your computer and use it in GitHub Desktop.
analysis of pivot vs non-pivot counties and QCEW wage growth
library(tidyverse)
library(haven)
library(data.table)
clean_qcew_annual <- function(x) {
# QCEW data from https://www.bls.gov/cew/downloadable-data-files.htm
fread(cmd = paste0("unzip -p ", x, "_annual_singlefile.zip")) %>%
# keep only county-level private sector
filter(agglvl_code == 71 & own_code == 5) %>%
# drop PR and VI
filter(as.numeric(str_sub(area_fips, 1, 2)) < 72) %>%
transmute(
fips = as.numeric(area_fips),
year,
qcew_pay_avg = avg_annual_pay,
qcew_emp_avg = annual_avg_emplvl
)
}
map_dfr(2013:2019, clean_qcew_annual) %>%
write_dta("qcew_2013_2019.dta")
year cpi_u_x1 cpi_u cpi_u_core cpiurs cpiurs_core
1947 24.200001 22.299999 37.4
1948 26.200001 24 40.5
1949 25.9 23.799999 40.1
1950 26.200001 24.1 40.5
1951 28.299999 26 43.8
1952 28.799999 26.6 44.5
1953 29 26.799999 44.90000000000001
1954 29.200001 26.799999 45.2
1955 29.1 26.799999 45
1956 29.6 27.200001 45.8
1957 30.5 28.1 28.9 47.2
1958 31.4 28.9 29.6 48.6
1959 31.6 29.1 30.2 48.90000000000001
1960 32.200001 29.6 30.6 49.8
1961 32.5 29.9 31 50.3
1962 32.799999 30.200001 31.4 50.7
1963 33.299999 30.6 31.8 51.5
1964 33.700001 31 32.3 52.1
1965 34.200001 31.5 32.7 52.90000000000001
1966 35.200001 32.5 33.5 54.40000000000001
1967 36.299999 33.400002 34.7 56.1
1968 37.700001 34.799999 36.3 58.3
1969 39.400002 36.700001 38.4 60.90000000000001
1970 41.299999 38.799999 40.8 63.90000000000001
1971 43.099998 40.5 42.7 66.7
1972 44.400002 41.799999 44 68.7
1973 47.200001 44.400002 45.6 73
1974 51.900002 49.299999 49.3 80.30000000000001
1975 56.200001 53.799999 53.9 86.90000000000001
1976 59.400002 56.900002 57.4 91.90000000000001
1977 63.200001 60.599998 61 97.7
1978 67.5 65.199997 65.5 104.4 103.6
1979 74 72.599998 71.90000000000001 114.3 111
1980 82.300003 82.400002 80.8 127.1 120.9
1981 90.099998 90.900002 89.2 139.1 132.2
1982 95.599998 96.5 95.8 147.5 142.4
1983 99.599998 99.59999999999999 153.8 150.4
1984 103.9 104.5 160.2 157.9
1985 107.6 109.1 165.7 164.8
1986 109.6 113.5 168.6 171.4
1987 113.6 118.2 174.4 178.1
1988 118.3 123.4 180.7 185.2
1989 124 129 188.6 192.6
1990 130.7 135.5 197.9 201.4
1991 136.2 142.1 205.1 209.9
1992 140.3 147.3 210.2 216.4
1993 144.5 152.2 215.5 222.5
1994 148.2 156.5 220 227.7
1995 152.39999 161.2 225.3 233.4
1996 156.8 165.6 231.3 239.1
1997 160.5 169.5 236.3 244.4
1998 163 173.4 239.5 249.6
1999 166.60001 177 244.6 254.6
2000 172.2 181.3 252.9 260.9
2001 177.10001 186.1 260.1 267.9
2002 179.89999 190.4 264.2 274.1
2003 184 193.2 270.2 278.1
2004 188.89999 196.6 277.5 283.1
2005 195.3 200.9 286.9 289.2
2006 201.60001 205.9 296.2 296.5
2007 207.3 210.7 304.6 303.4
2008 215.3 215.6 316.3 310.3
2009 214.5 219.2 315.2 315.6
2010 218.10001 221.3 320.4 318.7
2011 224.89999 225 330.5 324
2012 229.60001 229.8 337.5 331
2013 233 233.8 342.5 337
2014 236.7 237.9 348.3 343.1
2015 237 242.2 348.9 349.7
2016 240 247.6 353.4 357.5
2017 245.10001 252.2 361 364.2
2018 251.10001 257.6 369.8 372
2019 255.7 263.2 376.5 380.2
set more off
clear all
* cpi data
import delimited cpi.csv, clear
keep year cpiurs
keep if cpiurs != .
sum cpiurs if year == 2019
local cpiurs2019 = r(mean)
tempfile cpiurs
save `cpiurs'
* clean QCEW data
global Rterm_path "/usr/bin/R"
* use R script to convert raw QCEW to Stata format,
* because Stata is maddeningly slow
* rsource using clean_qcew.R, roptions(--no-save)
* deal with some QCEW county issues
use qcew_2013_2019.dta, clear
tostring fips, gen(fips_string) format(%05.0f)
* problem: won't be able to match "Unknown Or Undefined" QCEW counties to electoral returns
* solution: drop them from QCEW data
drop if substr(fips_string, 3, 5) == "999"
* problem: Bedford city (51515) added to Bedford County (FIPS 51019) in 2013
* solution: drop entries for Bedford City in 2013
drop if fips == 51515 & year == 2013
* problem: mid-year Shannon County (46113) renamed to Oglala Lakota County (46102) in mid-2015
* solution: drop the QCEW entries for 2015 Shannon, keep 2015 Oglala
* then rename Oglala to Shannon to match electoral returns
drop if fips == 46113 & year == 2015
replace fips = 46113 if fips == 46102
* problem: can't match many Alaska returns and QCEW data
* solution: drop Alaska
drop if substr(fips_string, 1, 2) == "02"
drop fips_string
tempfile qcewdata
save `qcewdata'
* clean electoral data
* from https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/VOQCHQ
import delimited countypres_2000-2016.csv, clear
replace candidatevotes = "" if candidatevotes == "NA"
destring candidatevotes, replace
replace fips = "" if fips == "NA"
destring fips, replace
encode candidate, gen(candidate_code)
rename state_po state_abb
rename county county_name
rename totalvotes total_votes
rename candidatevotes candidate_votes
keep year state_abb fips candidate_code candidate_votes total_votes county_name
* deal with electoral county issues
* remove entries with no fips
drop if fips == .
* problem: Kansas city entry in electoral (FIPS 36000) not a county in QCEW
* weird that it has FIPS of 36000 when 36 is the FIPS code for NY
* solution: going to drop it because there is no clean way to allocate
* Kansas City votes to other counties
drop if fips == 36000
* deal with mismatches
tostring fips, gen(fips_string) format(%05.0f)
* problem: can't match many Alaska returns and QCEW data
* solution: drop Alaska
drop if substr(fips_string, 1, 2) == "02"
drop fips_string
* problem: Bedford city (51515) added to Bedford County (51019) in 2013
* but returns have both
* solution: add all of Bedford city returns to Bedford County
gen new_fips = fips
replace new_fips = 51019 if fips == 51515
assert county_name == "Bedford" if new_fips == 51019
collapse (sum) candidate_votes total_votes (first) state_abb county_name, by(new_fips year candidate_code)
rename new_fips fips
* create Obama => Obama => Trump indicator
keep if year >= 2008
gen vote_share = candidate_votes/total_votes
egen rank = rank(vote_share), field by(year fips)
* check to make sure there is really only one winner in year X fips
duplicates report year fips if rank == 1
assert r(unique_value) == r(N)
* determine who won county
gen obama = candidate_code == 2 & rank == 1
gen trump = candidate_code == 3 & rank == 1
gen mccain = candidate_code == 7 & rank == 1
gen romney = candidate_code == 8 & rank == 1
gen clinton = candidate_code == 5 & rank == 1
collapse (max) obama trump mccain romney clinton (first) county_name total_votes, by(year state_abb fips)
* identify swing counties
reshape wide obama trump mccain romney clinton total_votes, i(fips) j(year)
gen pivot_to_trump = obama2008 == 1 & obama2012 == 1 & trump2016 == 1
gen pivot_to_clinton = mccain2008 == 1 & romney2012 == 1 & clinton2016 == 1
tab pivot_to_trump pivot_to_clinton, m
egen mean_total_votes = rowmean(total_votes*)
keep fips county_name state_abb pivot* mean_total_votes trump2016 clinton2016
* merge QCEW data
merge 1:m fips using `qcewdata', assert(3) nogenerate
* confirm this is a balanced panel
tsset fips year
assert r(balanced) == "strongly balanced"
* some counties have no QCEW employment; drop counties with any zeros
tab fips, nofreq
local before_count = r(r)
tab fips if pivot_to_trump == 1, nofreq
local before_count_pivot = r(r)
gen _zeroemp = qcew_emp_avg == 0
egen zeroemp = max(_zeroemp), by(fips)
drop if zeroemp == 1
drop *zeroemp
tab fips, nofreq
local after_count = r(r)
tab fips if pivot_to_trump == 1, nofreq
local after_count_pivot = r(r)
di "Removing zero emp counties dropped"
di `before_count_pivot' - `after_count_pivot' " pivot-to-Trump counties"
di `before_count' - `after_count' " counties overall"
merge m:1 year using `cpiurs', keep(3) nogenerate
gen real_avg_pay = qcew_pay_avg * `cpiurs2019' / cpiurs
rename qcew_pay_avg nom_avg_pay
rename qcew_emp_avg emp
drop cpiurs
keep if year == 2013 | year == 2016 | year == 2019
egen period = group(year)
lab def period 2 "2013 - 2016" 3 "2016 - 2019"
lab val period period
tsset fips period
foreach var of varlist *avg_pay emp {
gen Dpct_`var' = `var' / L.`var' - 1
}
* analysis of pivot vs others
keep if period > 1
keep fips period Dpct* mean_total_votes pivot_to_trump
reshape wide Dpct_real_avg_pay Dpct_nom_avg_pay Dpct_emp, i(fips) j(period)
foreach x in real_avg_pay nom_avg_pay emp {
gen faster_`x' = Dpct_`x'3 > Dpct_`x'2
}
table pivot_to_trump, c(mean faster_nom_avg_pay mean faster_real_avg_pay)
collapse (mean) Dpct_real_avg_pay2 Dpct_real_avg_pay3 Dpct_nom_avg_pay2 Dpct_nom_avg_pay3 [aw=mean_total_votes], by(pivot_to_trump)
foreach x of varlist Dpct* {
gen `x'_string = string(round(`x' * 100, 0.1), "%3.1f") + "%"
drop `x'
rename `x'_string `x'
}
lab var Dpct_real_avg_pay2 "2013-2016"
lab var Dpct_real_avg_pay3 "2016-2019"
lab var Dpct_nom_avg_pay2 "2013-2016"
lab var Dpct_nom_avg_pay3 "2016-2019"
lab def pivot_to_trump 0 "Other counties" 1 "Pivot counties"
lab val pivot_to_trump pivot_to_trump
list
keep pivot_to_trump Dpct_real_avg_pay*
export excel using average_growth.xlsx, firstrow(varlabels) replace
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment