Skip to content

Instantly share code, notes, and snippets.

@a8dx
Created March 28, 2018 19:51
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save a8dx/7e9d5af24101fc66aafa739577713b59 to your computer and use it in GitHub Desktop.
Save a8dx/7e9d5af24101fc66aafa739577713b59 to your computer and use it in GitHub Desktop.
Create a better ZIP5-County lookup table
/*
Filename: ZIP5_County_Lookup.do
Author: Anthony D'Agostino (ald at satanford dot edu)
Date Created: 02/09/2018
Last Edited: 03/27/2018
Purpose: Generate a merged zip5-county crosswalk that leverages the strengths of both the US Census and HUD ZCTA crosswalks.
*/
clear
clear all
clear matrix
set more off
set maxvar 25000, permanently
loc basePath "<your path here>"
// read in county codes, downloaded from: https://www.census.gov/geo/reference/codes/cou.html
import delimited using "`basePath'/national_county.txt", delim(",") clear stringcols(_all) varnames(nonames)
ren v1 state
ren v2 state_fips
ren v3 county_fips
ren v4 county_name
ren v5 fipsclasscode
gen county = state_fips + county_fips
tempfile counties
save `counties'
*import excel using "`basePath'/ZIP_COUNTY_032011.xlsx", firstrow clear
import excel using "`basePath'/ZIP_COUNTY_122017.xlsx", firstrow clear
// 39455 unique entries before any transformations
merge m:1 county using `counties', update replace
tab _merge
drop if _merge == 2
drop _merge
gsort zip -tot_ratio
bys zip: gen totOrder = _n
gsort zip -res_ratio
bys zip: gen resOrder = _n
tw (hist res_ratio if resOrder == 1), xtitle("Residential Address Percent") graphregion(color(white) lwidth(large))
keep if resOrder == 1
// some basic cleaning
replace county_name = "Oglala Lakota, SD" if county == "46102"
replace state = "SD" if county == "46102"
replace state_fips = "46" if county == "46102"
replace county_fips = "102" if county == "46102"
// see http://www.nws.noaa.gov/om/notification/scn17-57kusilvak_ak.htm
replace county_name = "Kusilvak Census Area" if county == "02158"
replace state = "AK" if county == "02158"
replace state_fips = "02" if county == "02158"
replace county_fips = "158" if county == "02158"
drop resOrder totOrder
ren zip zip5
destring, replace
tempfile zip5_county
save `zip5_county'
import delimited using "`basePath'/zcta_county_rel_10.txt", delim(",") clear stringcols(_all) // varnames(nonames)
** 33120 unique ZCTA5
ren state state_fips_rel
ren county county_fips_rel
gen county = state_fips_rel + county_fips_rel
merge m:1 county using `counties', update replace
tab _merge
drop if _merge == 2
drop _merge
destring, replace
ren state state_rel
ren county county_rel
ren county_name county_name_rel
gen popabove50 = cond(zpoppct >= 50, 1, 0)
// apportionment: keep county matches with largest pop ZCTA5 share
gsort zcta5 -poppt
bys zcta5: gen popOrder = _n
keep if popOrder == 1
ren zpop pop_zip5
ren zcta5 zip5
keep zip5 pop_zip5 zpoppct county_rel state_rel county_name_rel state_fips_rel county_fips_rel
merge 1:1 zip5 using `zip5_county', update replace
tab _merge
drop _merge
// prioritize Census values, replace with HUD where missing
foreach x in "county" "state_fips" "county_fips" {
gen `x'_final = `x'_rel
replace `x'_final = `x' if `x'_final == .
drop `x'
ren `x'_final `x'
}
foreach x in "county_name" "state" {
gen `x'_final = `x'_rel
replace `x'_final = `x' if `x'_final == ""
drop `x'
ren `x'_final `x'
}
keep zip5 county county_fips county_name state state_fips
save "`basePath'/ZIP5_County_Crosswalk", replace
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment