Created
May 29, 2024 21:40
-
-
Save cbgoodman/3e248e387001b9045ba4769c1604e361 to your computer and use it in GitHub Desktop.
R script to download, extract, and create the single year age, SEER U.S. County Population Data dataset
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(data.table) | |
# import, unzip, and read | |
# this is for IL, change the url to whichever version you want/need from here: | |
# https://seer.cancer.gov/popdata/download.html | |
il_singleage = fread( | |
"curl https://seer.cancer.gov/popdata/yr1969_2022.singleages/il.1969_2022.singleages.txt.gz | funzip", | |
colClasses = 'character', | |
header = FALSE | |
) |> | |
mutate( | |
yr = str_sub(V1, 1, 4), | |
state_abbv = str_sub(V1, 5, 6), | |
state_fips = str_sub(V1, 7, 8), | |
county_fips = str_sub(V1, 9, 11), | |
registry = str_sub(V1, 12, 13), | |
race = str_sub(V1, 14, 14), | |
origin = str_sub(V1, 15, 15), | |
sex = str_sub(V1, 16, 16), | |
age = str_sub(V1, 17, 18), | |
pop = as.numeric(str_sub(V1, 19, 27)), | |
age_group = case_when( | |
age == "00" ~ "0 years", | |
age == "01" ~ "1 years", | |
age == "02" ~ "2 years", | |
age == "03" ~ "3 years", | |
age == "04" ~ "4 years", | |
age == "05" ~ "5 years", | |
age == "06" ~ "6 years", | |
age == "07" ~ "7 years", | |
age == "08" ~ "8 years", | |
age == "09" ~ "9 years", | |
age == "10" ~ "10 years", | |
age == "11" ~ "11 years", | |
age == "12" ~ "12 years", | |
age == "13" ~ "13 years", | |
age == "14" ~ "14 years", | |
age == "15" ~ "15 years", | |
age == "16" ~ "16 years", | |
age == "17" ~ "17 years", | |
age == "18" ~ "18 years", | |
age == "19" ~ "19 years", | |
age == "20" ~ "20 years", | |
age == "21" ~ "21 years", | |
age == "22" ~ "22 years", | |
age == "23" ~ "23 years", | |
age == "24" ~ "24 years", | |
age == "25" ~ "25 years", | |
age == "26" ~ "26 years", | |
age == "27" ~ "27 years", | |
age == "28" ~ "28 years", | |
age == "29" ~ "29 years", | |
age == "30" ~ "30 years", | |
age == "31" ~ "31 years", | |
age == "32" ~ "32 years", | |
age == "33" ~ "33 years", | |
age == "34" ~ "34 years", | |
age == "35" ~ "35 years", | |
age == "36" ~ "36 years", | |
age == "37" ~ "37 years", | |
age == "38" ~ "38 years", | |
age == "39" ~ "39 years", | |
age == "40" ~ "40 years", | |
age == "41" ~ "41 years", | |
age == "42" ~ "42 years", | |
age == "43" ~ "43 years", | |
age == "44" ~ "44 years", | |
age == "45" ~ "45 years", | |
age == "46" ~ "46 years", | |
age == "47" ~ "47 years", | |
age == "48" ~ "48 years", | |
age == "49" ~ "49 years", | |
age == "50" ~ "50 years", | |
age == "51" ~ "51 years", | |
age == "52" ~ "52 years", | |
age == "53" ~ "53 years", | |
age == "54" ~ "54 years", | |
age == "55" ~ "55 years", | |
age == "56" ~ "56 years", | |
age == "57" ~ "57 years", | |
age == "58" ~ "58 years", | |
age == "59" ~ "59 years", | |
age == "60" ~ "60 years", | |
age == "61" ~ "61 years", | |
age == "62" ~ "62 years", | |
age == "63" ~ "63 years", | |
age == "64" ~ "64 years", | |
age == "65" ~ "65 years", | |
age == "66" ~ "66 years", | |
age == "67" ~ "67 years", | |
age == "68" ~ "68 years", | |
age == "69" ~ "69 years", | |
age == "70" ~ "70 years", | |
age == "71" ~ "71 years", | |
age == "72" ~ "72 years", | |
age == "73" ~ "73 years", | |
age == "74" ~ "74 years", | |
age == "75" ~ "75 years", | |
age == "76" ~ "76 years", | |
age == "77" ~ "77 years", | |
age == "78" ~ "78 years", | |
age == "79" ~ "79 years", | |
age == "80" ~ "80 years", | |
age == "81" ~ "81 years", | |
age == "82" ~ "82 years", | |
age == "83" ~ "83 years", | |
age == "84" ~ "84 years", | |
age == "85" ~ "85+ years" | |
) | |
) |> | |
unite(fips, state_fips:county_fips, sep = "", remove = FALSE) |> | |
select(-V1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment