Skip to content

Instantly share code, notes, and snippets.

@smach
Created October 3, 2018 22:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save smach/b0157e917476923c11deb314896325c9 to your computer and use it in GitHub Desktop.
Save smach/b0157e917476923c11deb314896325c9 to your computer and use it in GitHub Desktop.
Code for importing ASCII survey data at https://www.cdc.gov/brfss/annual_data/annual_2017.html
# Survey data file downloaded manually from the CDC https://www.cdc.gov/brfss/annual_data/annual_2017.html
surveydatafile <- "LLCP2017.ASC"
# I used datapasta package's df_paste() function to create the data frame below,
# after copying the metadata table into my clipboard. Metadata is here:
# https://www.cdc.gov/brfss/annual_data/2017/llcp_varlayout_17_onecolumn.html
column_meta_data <- data.frame(stringsAsFactors=FALSE,
Starting.Column = c(1L, 17L, 19L, 19L, 21L, 23L, 32L, 36L, 36L, 63L, 64L,
65L, 66L, 67L, 68L, 69L, 71L, 73L, 75L, 76L, 77L, 78L,
79L, 80L, 81L, 84L, 85L, 90L, 91L, 93L, 95L, 97L, 98L, 99L,
100L, 101L, 102L, 103L, 104L, 105L, 106L, 107L, 108L,
109L, 110L, 111L, 112L, 113L, 114L, 115L, 116L, 117L, 118L,
120L, 121L, 122L, 123L, 125L, 162L, 163L, 164L, 173L, 174L,
175L, 176L, 177L, 178L, 180L, 182L, 183L, 187L, 191L,
192L, 193L, 194L, 195L, 196L, 197L, 198L, 199L, 200L, 201L,
203L, 204L, 205L, 206L, 209L, 211L, 213L, 215L, 218L, 221L,
224L, 227L, 230L, 233L, 234L, 236L, 239L, 242L, 244L,
247L, 250L, 253L, 254L, 255L, 261L, 262L, 263L, 264L, 270L,
290L, 291L, 292L, 293L, 296L, 299L, 301L, 303L, 305L, 306L,
307L, 308L, 309L, 310L, 311L, 312L, 314L, 315L, 316L,
317L, 318L, 319L, 320L, 321L, 322L, 323L, 324L, 325L, 326L,
327L, 328L, 329L, 330L, 331L, 332L, 333L, 334L, 335L, 337L,
338L, 340L, 342L, 344L, 347L, 348L, 349L, 350L, 351L,
353L, 355L, 357L, 359L, 361L, 363L, 365L, 366L, 367L, 368L,
370L, 371L, 396L, 397L, 398L, 400L, 401L, 402L, 403L, 404L,
405L, 406L, 407L, 408L, 409L, 411L, 413L, 414L, 416L,
417L, 418L, 419L, 420L, 421L, 422L, 423L, 424L, 427L, 430L,
431L, 432L, 434L, 435L, 436L, 437L, 439L, 441L, 443L, 444L,
446L, 447L, 450L, 453L, 456L, 457L, 458L, 460L, 461L,
462L, 464L, 465L, 466L, 467L, 468L, 469L, 470L, 471L, 472L,
473L, 474L, 475L, 476L, 477L, 479L, 480L, 481L, 482L, 483L,
684L, 685L, 686L, 687L, 688L, 695L, 730L, 731L, 732L,
737L, 739L, 1457L, 1458L, 1464L, 1494L, 1504L, 1519L, 1530L,
1587L, 1589L, 1632L, 1729L, 1730L, 1740L, 1798L, 1946L,
1947L, 1948L, 1949L, 1950L, 1951L, 1952L, 1953L, 1954L,
1955L, 1956L, 1957L, 1958L, 1959L, 1960L, 2017L, 2019L, 2023L,
2024L, 2025L, 2026L, 2027L, 2028L, 2030L, 2031L, 2033L,
2034L, 2037L, 2040L, 2045L, 2049L, 2050L, 2051L, 2052L,
2053L, 2054L, 2055L, 2056L, 2057L, 2058L, 2059L, 2062L,
2063L, 2068L, 2069L, 2073L, 2077L, 2081L, 2085L, 2089L, 2093L,
2094L, 2095L, 2096L, 2097L, 2103L, 2109L, 2110L, 2111L,
2112L, 2113L, 2114L, 2115L, 2116L, 2119L, 2122L, 2127L,
2132L, 2133L, 2134L, 2137L, 2140L, 2145L, 2150L, 2155L, 2160L,
2165L, 2166L, 2171L, 2176L, 2181L, 2186L, 2191L, 2196L,
2197L, 2198L, 2199L, 2200L, 2201L, 2202L, 2203L, 2204L,
2205L, 2206L, 2207L, 2208L),
Variable.Name = c("_STATE", "FMONTH", "IDATE", "IMONTH", "IDAY", "IYEAR",
"DISPCODE", "SEQNO", "_PSU", "CTELENM1", "PVTRESD1",
"COLGHOUS", "STATERE1", "CELLFON4", "LADULT", "NUMADULT",
"NUMMEN", "NUMWOMEN", "SAFETIME", "CTELNUM1", "CELLFON5",
"CADULT", "PVTRESD3", "CCLGHOUS", "CSTATE1", "LANDLINE",
"HHADULT", "GENHLTH", "PHYSHLTH", "MENTHLTH", "POORHLTH",
"HLTHPLN1", "PERSDOC2", "MEDCOST", "CHECKUP1", "BPHIGH4",
"BPMEDS", "CHOLCHK1", "TOLDHI2", "CHOLMED1", "CVDINFR4",
"CVDCRHD4", "CVDSTRK3", "ASTHMA3", "ASTHNOW", "CHCSCNCR",
"CHCOCNCR", "CHCCOPD1", "HAVARTH3", "ADDEPEV2", "CHCKIDNY",
"DIABETE3", "DIABAGE2", "LMTJOIN3", "ARTHDIS2", "ARTHSOCL",
"JOINPAI1", "SEX", "MARITAL", "EDUCA", "RENTHOM1",
"NUMHHOL2", "NUMPHON2", "CPDEMO1A", "VETERAN3", "EMPLOY1",
"CHILDREN", "INCOME2", "INTERNET", "WEIGHT2", "HEIGHT3",
"PREGNANT", "DEAF", "BLIND", "DECIDE", "DIFFWALK", "DIFFDRES",
"DIFFALON", "SMOKE100", "SMOKDAY2", "STOPSMK2", "LASTSMK2",
"USENOW3", "ECIGARET", "ECIGNOW", "ALCDAY5", "AVEDRNK2",
"DRNK3GE5", "MAXDRNKS", "FRUIT2", "FRUITJU2", "FVGREEN1",
"FRENCHF1", "POTATOE1", "VEGETAB2", "EXERANY2", "EXRACT11",
"EXEROFT1", "EXERHMM1", "EXRACT21", "EXEROFT2",
"EXERHMM2", "STRENGTH", "SEATBELT", "FLUSHOT6", "FLSHTMY2",
"PNEUVAC3", "SHINGLE2", "HIVTST6", "HIVTSTD3", "HIVRISK5",
"PDIABTST", "PREDIAB1", "INSULIN", "BLDSUGAR", "FEETCHK2",
"DOCTDIAB", "CHKHEMO3", "FEETCHK", "EYEEXAM", "DIABEYE",
"DIABEDU", "COPDCOGH", "COPDFLEM", "COPDBRTH", "COPDBTST",
"COPDSMOK", "HAREHAB1", "STREHAB1", "CVDASPRN", "ASPUNSAF",
"RLIVPAIN", "RDUCHART", "RDUCSTRK", "BPEATHBT", "BPSALT",
"BPALCHOL", "BPEXER", "BPEATADV", "BPSLTADV", "BPALCADV",
"BPEXRADV", "BPMEDADV", "BPHI2MR", "ARTTODAY", "ARTHWGT",
"ARTHEXER", "ARTHEDU", "ASTHMAGE", "ASATTACK", "ASERVIST",
"ASDRVIST", "ASRCHKUP", "ASACTLIM", "ASYMPTOM", "ASNOSLEP",
"ASTHMED3", "ASINHALR", "PAINACT2", "QLMENTL2", "QLSTRES2",
"QLHLTH2", "SLEPTIM1", "ADSLEEP", "SLEPDAY1", "SLEPSNO2",
"SLEPBRTH", "MEDICARE", "HLTHCVR1", "DELAYMED",
"DLYOTHER", "NOCOV121", "LSTCOVRG", "DRVISITS", "MEDSCOS1",
"CARERCVD", "MEDBILL1", "ASBIALCH", "ASBIDRNK", "ASBIBING",
"ASBIADVC", "ASBIRDUC", "CNCRDIFF", "CNCRAGE", "CNCRTYP1",
"CSRVTRT2", "CSRVDOC1", "CSRVSUM", "CSRVRTRN", "CSRVINST",
"CSRVINSR", "CSRVDEIN", "CSRVCLIN", "CSRVPAIN", "CSRVCTL1",
"SSBSUGR2", "SSBFRUT3", "WTCHSALT", "DRADVISE", "MARIJANA",
"USEMRJN1", "RSNMRJNA", "PFPPRVN2", "TYPCNTR7",
"NOBCUSE6", "IMFVPLAC", "HPVADVC2", "HPVADSHT", "TETANUS",
"LCSFIRST", "LCSLAST", "LCSNUMCG", "LCSCTSCN", "CAREGIV1",
"CRGVREL2", "CRGVLNG1", "CRGVHRS1", "CRGVPRB2", "CRGVPERS",
"CRGVHOUS", "CRGVMST2", "CRGVEXPT", "CIMEMLOS", "CDHOUSE",
"CDASSIST", "CDHELP", "CDSOCIAL", "CDDISCUS", "EMTSUPRT",
"LSATISFY", "SDHBILLS", "SDHMOVE", "HOWSAFE1", "SDHFOOD",
"SDHMEALS", "SDHMONEY", "SDHSTRES", "SXORIENT", "TRNSGNDR",
"FIREARM4", "GUNLOAD", "LOADULK2", "RCSGENDR", "RCSRLTN2",
"CASTHDX2", "CASTHNO2", "QSTVER", "QSTLANG", "MSCODE",
"_STSTR", "_STRWT", "_RAWRAKE", "_WT2RAKE", "_IMPRACE",
"_CHISPNC", "_CRACE1", "_CPRACE", "_CLLCPWT", "_DUALUSE",
"_DUALCOR", "_LLCPWT2", "_LLCPWT", "_RFHLTH", "_PHYS14D",
"_MENT14D", "_HCVU651", "_RFHYPE5", "_CHOLCH1", "_RFCHOL1",
"_MICHD", "_LTASTH1", "_CASTHM1", "_ASTHMS1", "_DRDXAR1",
"_LMTACT1", "_LMTWRK1", "_LMTSCL1", "_PRACE1", "_MRACE1",
"_HISPANC", "_RACE", "_RACEG21", "_RACEGR3", "_RACE_G1",
"_AGEG5YR", "_AGE65YR", "_AGE80", "_AGE_G", "HTIN4", "HTM4",
"WTKG3", "_BMI5", "_BMI5CAT", "_RFBMI5", "_CHLDCNT",
"_EDUCAG", "_INCOMG", "_SMOKER3", "_RFSMOK3", "_ECIGSTS",
"_CURECIG", "DRNKANY5", "DROCDY3_", "_RFBING5", "_DRNKWEK",
"_RFDRHV5", "FTJUDA2_", "FRUTDA2_", "GRENDA1_", "FRNCHDA_",
"POTADA1_", "VEGEDA2_", "_MISFRT1", "_MISVEG1", "_FRTRES1",
"_VEGRES1", "_FRUTSU1", "_VEGESU1", "_FRTLT1A", "_VEGLT1A",
"_FRT16A", "_VEG23A", "_FRUITE1", "_VEGETE1", "_TOTINDA",
"METVL11_", "METVL21_", "MAXVO2_", "FC60_", "ACTIN11_",
"ACTIN21_", "PADUR1_", "PADUR2_", "PAFREQ1_", "PAFREQ2_",
"_MINAC11", "_MINAC21", "STRFREQ_", "PAMISS1_", "PAMIN11_",
"PAMIN21_", "PA1MIN_", "PAVIG11_", "PAVIG21_",
"PA1VIGM_", "_PACAT1", "_PAINDX1", "_PA150R2", "_PA300R2",
"_PA30021", "_PASTRNG", "_PAREC1", "_PASTAE1", "_RFSEAT2",
"_RFSEAT3", "_FLSHOT6", "_PNEUMO2", "_AIDTST3"),
Field.Length = c(2L, 2L, 8L, 2L, 2L, 4L, 4L, 10L, 10L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 4L,
4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
3L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 2L, 3L, 3L,
2L, 3L, 3L, 3L, 1L, 1L, 6L, 1L, 1L, 1L, 6L, 1L, 1L, 1L, 1L,
3L, 3L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 3L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 25L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 1L, 1L, 2L, 1L,
1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 3L, 3L, 3L, 1L, 1L, 2L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 1L, 6L, 10L, 10L, 10L, 2L, 1L, 2L, 2L, 10L,
1L, 10L, 10L, 10L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
2L, 1L, 3L, 3L, 5L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 3L, 1L, 5L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L,
1L, 6L, 6L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 5L, 5L,
1L, 1L, 3L, 3L, 5L, 5L, 5L, 5L, 5L, 1L, 5L, 5L, 5L, 5L, 5L,
5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)
)
column_positions <- column_meta_data$Field.Length
column_names <- column_meta_data$Variable.Name
surveydatawidths <- readr::fwf_widths(column_positions, col_names = column_names)
surveydata <- readr::read_fwf(surveydatafile, surveydatawidths)
# Suggest using the fst package to save this in a compressed format that's very quick to read
fst::write.fst(surveydata, "surveydata.fst", compress = 100 )
# To read it in the future:
surveydata2 <- fst::read.fst("surveydata.fst")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment