analysis of pivot vs non-pivot counties and QCEW wage growth
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(haven) | |
library(data.table) | |
clean_qcew_annual <- function(x) { | |
# QCEW data from https://www.bls.gov/cew/downloadable-data-files.htm | |
fread(cmd = paste0("unzip -p ", x, "_annual_singlefile.zip")) %>% | |
# keep only county-level private sector | |
filter(agglvl_code == 71 & own_code == 5) %>% | |
# drop PR and VI | |
filter(as.numeric(str_sub(area_fips, 1, 2)) < 72) %>% | |
transmute( | |
fips = as.numeric(area_fips), | |
year, | |
qcew_pay_avg = avg_annual_pay, | |
qcew_emp_avg = annual_avg_emplvl | |
) | |
} | |
map_dfr(2013:2019, clean_qcew_annual) %>% | |
write_dta("qcew_2013_2019.dta") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
year | cpi_u_x1 | cpi_u | cpi_u_core | cpiurs | cpiurs_core | |
---|---|---|---|---|---|---|
1947 | 24.200001 | 22.299999 | 37.4 | |||
1948 | 26.200001 | 24 | 40.5 | |||
1949 | 25.9 | 23.799999 | 40.1 | |||
1950 | 26.200001 | 24.1 | 40.5 | |||
1951 | 28.299999 | 26 | 43.8 | |||
1952 | 28.799999 | 26.6 | 44.5 | |||
1953 | 29 | 26.799999 | 44.90000000000001 | |||
1954 | 29.200001 | 26.799999 | 45.2 | |||
1955 | 29.1 | 26.799999 | 45 | |||
1956 | 29.6 | 27.200001 | 45.8 | |||
1957 | 30.5 | 28.1 | 28.9 | 47.2 | ||
1958 | 31.4 | 28.9 | 29.6 | 48.6 | ||
1959 | 31.6 | 29.1 | 30.2 | 48.90000000000001 | ||
1960 | 32.200001 | 29.6 | 30.6 | 49.8 | ||
1961 | 32.5 | 29.9 | 31 | 50.3 | ||
1962 | 32.799999 | 30.200001 | 31.4 | 50.7 | ||
1963 | 33.299999 | 30.6 | 31.8 | 51.5 | ||
1964 | 33.700001 | 31 | 32.3 | 52.1 | ||
1965 | 34.200001 | 31.5 | 32.7 | 52.90000000000001 | ||
1966 | 35.200001 | 32.5 | 33.5 | 54.40000000000001 | ||
1967 | 36.299999 | 33.400002 | 34.7 | 56.1 | ||
1968 | 37.700001 | 34.799999 | 36.3 | 58.3 | ||
1969 | 39.400002 | 36.700001 | 38.4 | 60.90000000000001 | ||
1970 | 41.299999 | 38.799999 | 40.8 | 63.90000000000001 | ||
1971 | 43.099998 | 40.5 | 42.7 | 66.7 | ||
1972 | 44.400002 | 41.799999 | 44 | 68.7 | ||
1973 | 47.200001 | 44.400002 | 45.6 | 73 | ||
1974 | 51.900002 | 49.299999 | 49.3 | 80.30000000000001 | ||
1975 | 56.200001 | 53.799999 | 53.9 | 86.90000000000001 | ||
1976 | 59.400002 | 56.900002 | 57.4 | 91.90000000000001 | ||
1977 | 63.200001 | 60.599998 | 61 | 97.7 | ||
1978 | 67.5 | 65.199997 | 65.5 | 104.4 | 103.6 | |
1979 | 74 | 72.599998 | 71.90000000000001 | 114.3 | 111 | |
1980 | 82.300003 | 82.400002 | 80.8 | 127.1 | 120.9 | |
1981 | 90.099998 | 90.900002 | 89.2 | 139.1 | 132.2 | |
1982 | 95.599998 | 96.5 | 95.8 | 147.5 | 142.4 | |
1983 | 99.599998 | 99.59999999999999 | 153.8 | 150.4 | ||
1984 | 103.9 | 104.5 | 160.2 | 157.9 | ||
1985 | 107.6 | 109.1 | 165.7 | 164.8 | ||
1986 | 109.6 | 113.5 | 168.6 | 171.4 | ||
1987 | 113.6 | 118.2 | 174.4 | 178.1 | ||
1988 | 118.3 | 123.4 | 180.7 | 185.2 | ||
1989 | 124 | 129 | 188.6 | 192.6 | ||
1990 | 130.7 | 135.5 | 197.9 | 201.4 | ||
1991 | 136.2 | 142.1 | 205.1 | 209.9 | ||
1992 | 140.3 | 147.3 | 210.2 | 216.4 | ||
1993 | 144.5 | 152.2 | 215.5 | 222.5 | ||
1994 | 148.2 | 156.5 | 220 | 227.7 | ||
1995 | 152.39999 | 161.2 | 225.3 | 233.4 | ||
1996 | 156.8 | 165.6 | 231.3 | 239.1 | ||
1997 | 160.5 | 169.5 | 236.3 | 244.4 | ||
1998 | 163 | 173.4 | 239.5 | 249.6 | ||
1999 | 166.60001 | 177 | 244.6 | 254.6 | ||
2000 | 172.2 | 181.3 | 252.9 | 260.9 | ||
2001 | 177.10001 | 186.1 | 260.1 | 267.9 | ||
2002 | 179.89999 | 190.4 | 264.2 | 274.1 | ||
2003 | 184 | 193.2 | 270.2 | 278.1 | ||
2004 | 188.89999 | 196.6 | 277.5 | 283.1 | ||
2005 | 195.3 | 200.9 | 286.9 | 289.2 | ||
2006 | 201.60001 | 205.9 | 296.2 | 296.5 | ||
2007 | 207.3 | 210.7 | 304.6 | 303.4 | ||
2008 | 215.3 | 215.6 | 316.3 | 310.3 | ||
2009 | 214.5 | 219.2 | 315.2 | 315.6 | ||
2010 | 218.10001 | 221.3 | 320.4 | 318.7 | ||
2011 | 224.89999 | 225 | 330.5 | 324 | ||
2012 | 229.60001 | 229.8 | 337.5 | 331 | ||
2013 | 233 | 233.8 | 342.5 | 337 | ||
2014 | 236.7 | 237.9 | 348.3 | 343.1 | ||
2015 | 237 | 242.2 | 348.9 | 349.7 | ||
2016 | 240 | 247.6 | 353.4 | 357.5 | ||
2017 | 245.10001 | 252.2 | 361 | 364.2 | ||
2018 | 251.10001 | 257.6 | 369.8 | 372 | ||
2019 | 255.7 | 263.2 | 376.5 | 380.2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
set more off | |
clear all | |
* cpi data | |
import delimited cpi.csv, clear | |
keep year cpiurs | |
keep if cpiurs != . | |
sum cpiurs if year == 2019 | |
local cpiurs2019 = r(mean) | |
tempfile cpiurs | |
save `cpiurs' | |
* clean QCEW data | |
global Rterm_path "/usr/bin/R" | |
* use R script to convert raw QCEW to Stata format, | |
* because Stata is maddeningly slow | |
* rsource using clean_qcew.R, roptions(--no-save) | |
* deal with some QCEW county issues | |
use qcew_2013_2019.dta, clear | |
tostring fips, gen(fips_string) format(%05.0f) | |
* problem: won't be able to match "Unknown Or Undefined" QCEW counties to electoral returns | |
* solution: drop them from QCEW data | |
drop if substr(fips_string, 3, 5) == "999" | |
* problem: Bedford city (51515) added to Bedford County (FIPS 51019) in 2013 | |
* solution: drop entries for Bedford City in 2013 | |
drop if fips == 51515 & year == 2013 | |
* problem: mid-year Shannon County (46113) renamed to Oglala Lakota County (46102) in mid-2015 | |
* solution: drop the QCEW entries for 2015 Shannon, keep 2015 Oglala | |
* then rename Oglala to Shannon to match electoral returns | |
drop if fips == 46113 & year == 2015 | |
replace fips = 46113 if fips == 46102 | |
* problem: can't match many Alaska returns and QCEW data | |
* solution: drop Alaska | |
drop if substr(fips_string, 1, 2) == "02" | |
drop fips_string | |
tempfile qcewdata | |
save `qcewdata' | |
* clean electoral data | |
* from https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/VOQCHQ | |
import delimited countypres_2000-2016.csv, clear | |
replace candidatevotes = "" if candidatevotes == "NA" | |
destring candidatevotes, replace | |
replace fips = "" if fips == "NA" | |
destring fips, replace | |
encode candidate, gen(candidate_code) | |
rename state_po state_abb | |
rename county county_name | |
rename totalvotes total_votes | |
rename candidatevotes candidate_votes | |
keep year state_abb fips candidate_code candidate_votes total_votes county_name | |
* deal with electoral county issues | |
* remove entries with no fips | |
drop if fips == . | |
* problem: Kansas city entry in electoral (FIPS 36000) not a county in QCEW | |
* weird that it has FIPS of 36000 when 36 is the FIPS code for NY | |
* solution: going to drop it because there is no clean way to allocate | |
* Kansas City votes to other counties | |
drop if fips == 36000 | |
* deal with mismatches | |
tostring fips, gen(fips_string) format(%05.0f) | |
* problem: can't match many Alaska returns and QCEW data | |
* solution: drop Alaska | |
drop if substr(fips_string, 1, 2) == "02" | |
drop fips_string | |
* problem: Bedford city (51515) added to Bedford County (51019) in 2013 | |
* but returns have both | |
* solution: add all of Bedford city returns to Bedford County | |
gen new_fips = fips | |
replace new_fips = 51019 if fips == 51515 | |
assert county_name == "Bedford" if new_fips == 51019 | |
collapse (sum) candidate_votes total_votes (first) state_abb county_name, by(new_fips year candidate_code) | |
rename new_fips fips | |
* create Obama => Obama => Trump indicator | |
keep if year >= 2008 | |
gen vote_share = candidate_votes/total_votes | |
egen rank = rank(vote_share), field by(year fips) | |
* check to make sure there is really only one winner in year X fips | |
duplicates report year fips if rank == 1 | |
assert r(unique_value) == r(N) | |
* determine who won county | |
gen obama = candidate_code == 2 & rank == 1 | |
gen trump = candidate_code == 3 & rank == 1 | |
gen mccain = candidate_code == 7 & rank == 1 | |
gen romney = candidate_code == 8 & rank == 1 | |
gen clinton = candidate_code == 5 & rank == 1 | |
collapse (max) obama trump mccain romney clinton (first) county_name total_votes, by(year state_abb fips) | |
* identify swing counties | |
reshape wide obama trump mccain romney clinton total_votes, i(fips) j(year) | |
gen pivot_to_trump = obama2008 == 1 & obama2012 == 1 & trump2016 == 1 | |
gen pivot_to_clinton = mccain2008 == 1 & romney2012 == 1 & clinton2016 == 1 | |
tab pivot_to_trump pivot_to_clinton, m | |
egen mean_total_votes = rowmean(total_votes*) | |
keep fips county_name state_abb pivot* mean_total_votes trump2016 clinton2016 | |
* merge QCEW data | |
merge 1:m fips using `qcewdata', assert(3) nogenerate | |
* confirm this is a balanced panel | |
tsset fips year | |
assert r(balanced) == "strongly balanced" | |
* some counties have no QCEW employment; drop counties with any zeros | |
tab fips, nofreq | |
local before_count = r(r) | |
tab fips if pivot_to_trump == 1, nofreq | |
local before_count_pivot = r(r) | |
gen _zeroemp = qcew_emp_avg == 0 | |
egen zeroemp = max(_zeroemp), by(fips) | |
drop if zeroemp == 1 | |
drop *zeroemp | |
tab fips, nofreq | |
local after_count = r(r) | |
tab fips if pivot_to_trump == 1, nofreq | |
local after_count_pivot = r(r) | |
di "Removing zero emp counties dropped" | |
di `before_count_pivot' - `after_count_pivot' " pivot-to-Trump counties" | |
di `before_count' - `after_count' " counties overall" | |
merge m:1 year using `cpiurs', keep(3) nogenerate | |
gen real_avg_pay = qcew_pay_avg * `cpiurs2019' / cpiurs | |
rename qcew_pay_avg nom_avg_pay | |
rename qcew_emp_avg emp | |
drop cpiurs | |
keep if year == 2013 | year == 2016 | year == 2019 | |
egen period = group(year) | |
lab def period 2 "2013 - 2016" 3 "2016 - 2019" | |
lab val period period | |
tsset fips period | |
foreach var of varlist *avg_pay emp { | |
gen Dpct_`var' = `var' / L.`var' - 1 | |
} | |
* analysis of pivot vs others | |
keep if period > 1 | |
keep fips period Dpct* mean_total_votes pivot_to_trump | |
reshape wide Dpct_real_avg_pay Dpct_nom_avg_pay Dpct_emp, i(fips) j(period) | |
foreach x in real_avg_pay nom_avg_pay emp { | |
gen faster_`x' = Dpct_`x'3 > Dpct_`x'2 | |
} | |
table pivot_to_trump, c(mean faster_nom_avg_pay mean faster_real_avg_pay) | |
collapse (mean) Dpct_real_avg_pay2 Dpct_real_avg_pay3 Dpct_nom_avg_pay2 Dpct_nom_avg_pay3 [aw=mean_total_votes], by(pivot_to_trump) | |
foreach x of varlist Dpct* { | |
gen `x'_string = string(round(`x' * 100, 0.1), "%3.1f") + "%" | |
drop `x' | |
rename `x'_string `x' | |
} | |
lab var Dpct_real_avg_pay2 "2013-2016" | |
lab var Dpct_real_avg_pay3 "2016-2019" | |
lab var Dpct_nom_avg_pay2 "2013-2016" | |
lab var Dpct_nom_avg_pay3 "2016-2019" | |
lab def pivot_to_trump 0 "Other counties" 1 "Pivot counties" | |
lab val pivot_to_trump pivot_to_trump | |
list | |
keep pivot_to_trump Dpct_real_avg_pay* | |
export excel using average_growth.xlsx, firstrow(varlabels) replace |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment