Created
September 10, 2021 01:24
-
-
Save markdanese/d053aab591483e82dd73a16e336b33ad to your computer and use it in GitHub Desktop.
NIS 2017 load script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# copied from original "load_data.R" program created for 2016 data | |
# primary change was that there are more data fields in 2017 | |
library(data.table) | |
library(magrittr) | |
library(readr) | |
library(fst) | |
# load core data -------------------------------------------------------------------- | |
nis_specs <- fread("./docs/nis_specs_core_2017.csv") | |
nis_specs[, width := end - start + 1] | |
nis_specs[, varname := tolower(varname)] | |
nis_specs[, type := ifelse(type == "int", "i", ifelse(type %in% c("byte", "double", "long"), "d", "c"))] | |
missing_values <- as.character(quote(c(-99, -88, -66, -99.9999999, -88.8888888, -66.6666666, -9, -8, -6, -5, -9999, -8888, -6666, -999999999, -888888888, -666666666,-999, -888, -666))) | |
dt <- | |
read_fwf( | |
"~/Dropbox (Outcomes Insights)/Data repository/HCUP/NIS_2017/NIS_2017_Core.ASC", | |
col_positions = fwf_widths(nis_specs$width), | |
col_types = paste0(nis_specs$type, collapse = ""), | |
trim_ws = TRUE, | |
# n_max = 1000, | |
na = missing_values | |
) | |
setDT(dt) | |
setnames(dt, names(dt), nis_specs$varname) | |
write_fst(dt, "./data/analysis/2017/core.fst", compress = 100) | |
# load hospital data ---------------------------------------------------------------- | |
# specs unchanged from 2016 | |
nis_specs <- fread("./docs/nis_specs_hospital.csv") | |
nis_specs[, width := end - start + 1] | |
nis_specs[, varname := tolower(varname)] | |
nis_specs[, type := ifelse(type == "int", "i", ifelse(type %in% c("byte", "double", "long"), "d", "c"))] | |
missing_values <- as.character(quote(c(-99, -88, -66, -99.9999999, -88.8888888, -66.6666666, -9, -8, -6, -5, -9999, -8888, -6666, -999999999, -888888888, -666666666,-999, -888, -666))) | |
dt <- | |
read_fwf( | |
"~/Dropbox (Outcomes Insights)/Data repository/HCUP/NIS_2017/NIS_2017_Hospital.ASC", | |
col_positions = fwf_widths(nis_specs$width), | |
col_types = paste0(nis_specs$type, collapse = ""), | |
trim_ws = TRUE, | |
# n_max = 1000, | |
na = missing_values | |
) | |
setDT(dt) | |
setnames(dt, names(dt), nis_specs$varname) | |
write_fst(dt, "./data/analysis/2017/hospital.fst", compress = 100) | |
# load severity data ---------------------------------------------------------------- | |
# specs unchanged from 2016 | |
nis_specs <- fread("./docs/nis_specs_severity.csv") | |
nis_specs[, width := end - start + 1] | |
nis_specs[, varname := tolower(varname)] | |
nis_specs[, type := ifelse(type == "int", "i", ifelse(type %in% c("byte", "double", "long"), "d", "c"))] | |
missing_values <- as.character(quote(c(-99, -88, -66, -99.9999999, -88.8888888, -66.6666666, -9, -8, -6, -5, -9999, -8888, -6666, -999999999, -888888888, -666666666,-999, -888, -666))) | |
dt <- | |
read_fwf( | |
"~/Dropbox (Outcomes Insights)/Data repository/HCUP/NIS_2017/NIS_2017_Severity.ASC", | |
col_positions = fwf_widths(nis_specs$width), | |
col_types = paste0(nis_specs$type, collapse = ""), | |
trim_ws = TRUE, | |
# n_max = 1000, | |
na = missing_values | |
) | |
setDT(dt) | |
setnames(dt, names(dt), nis_specs$varname) | |
write_fst(dt, "./data/analysis/2017/severity.fst", compress = 100) | |
cc2017 <- fread("./data/analysis/2017/cc2017NIS.csv", quote = "'") | |
setnames(cc2017, names(cc2017), tolower(names(cc2017))) | |
write_fst(cc2017, "./data/analysis/2017/cc2017.fst", compress = 100) | |
# storing variable labels here ------------------------------------------------------ | |
# core file | |
# label var AGE "Age in years at admission" | |
# label var AGE_NEONATE "Neonatal age (first 28 days after birth) indicator" | |
# label var AMONTH "Admission month" | |
# label var AWEEKEND "Admission day is a weekend" | |
# label var DIED "Died during hospitalization" | |
# label var DISCWT "NIS discharge weight" | |
# label var DISPUNIFORM "Disposition of patient (uniform)" | |
# label var DQTR "Discharge quarter" | |
# label var DRG "DRG in effect on discharge date" | |
# label var DRGVER "DRG grouper version used on discharge date" | |
# label var DRG_NoPOA "DRG in use on discharge date, calculated without POA" | |
# label var DXVER "Diagnosis Version" | |
# label var ELECTIVE "Elective versus non-elective admission" | |
# label var FEMALE "Indicator of sex" | |
# label var HCUP_ED "HCUP Emergency Department service indicator" | |
# label var HOSP_DIVISION "Census Division of hospital" | |
# label var HOSP_NIS "NIS hospital number" | |
# label var I10_DX1 "ICD-10-CM Diagnosis 1" | |
# label var I10_DX2 "ICD-10-CM Diagnosis 2" | |
# label var I10_DX3 "ICD-10-CM Diagnosis 3" | |
# label var I10_DX4 "ICD-10-CM Diagnosis 4" | |
# label var I10_DX5 "ICD-10-CM Diagnosis 5" | |
# label var I10_DX6 "ICD-10-CM Diagnosis 6" | |
# label var I10_DX7 "ICD-10-CM Diagnosis 7" | |
# label var I10_DX8 "ICD-10-CM Diagnosis 8" | |
# label var I10_DX9 "ICD-10-CM Diagnosis 9" | |
# label var I10_DX10 "ICD-10-CM Diagnosis 10" | |
# label var I10_DX11 "ICD-10-CM Diagnosis 11" | |
# label var I10_DX12 "ICD-10-CM Diagnosis 12" | |
# label var I10_DX13 "ICD-10-CM Diagnosis 13" | |
# label var I10_DX14 "ICD-10-CM Diagnosis 14" | |
# label var I10_DX15 "ICD-10-CM Diagnosis 15" | |
# label var I10_DX16 "ICD-10-CM Diagnosis 16" | |
# label var I10_DX17 "ICD-10-CM Diagnosis 17" | |
# label var I10_DX18 "ICD-10-CM Diagnosis 18" | |
# label var I10_DX19 "ICD-10-CM Diagnosis 19" | |
# label var I10_DX20 "ICD-10-CM Diagnosis 20" | |
# label var I10_DX21 "ICD-10-CM Diagnosis 21" | |
# label var I10_DX22 "ICD-10-CM Diagnosis 22" | |
# label var I10_DX23 "ICD-10-CM Diagnosis 23" | |
# label var I10_DX24 "ICD-10-CM Diagnosis 24" | |
# label var I10_DX25 "ICD-10-CM Diagnosis 25" | |
# label var I10_DX26 "ICD-10-CM Diagnosis 26" | |
# label var I10_DX27 "ICD-10-CM Diagnosis 27" | |
# label var I10_DX28 "ICD-10-CM Diagnosis 28" | |
# label var I10_DX29 "ICD-10-CM Diagnosis 29" | |
# label var I10_DX30 "ICD-10-CM Diagnosis 30" | |
# label var I10_DX31 "ICD-10-CM Diagnosis 31" | |
# label var I10_DX32 "ICD-10-CM Diagnosis 32" | |
# label var I10_DX33 "ICD-10-CM Diagnosis 33" | |
# label var I10_DX34 "ICD-10-CM Diagnosis 34" | |
# label var I10_DX35 "ICD-10-CM Diagnosis 35" | |
# label var I10_DX36 "ICD-10-CM Diagnosis 36" | |
# label var I10_DX37 "ICD-10-CM Diagnosis 37" | |
# label var I10_DX38 "ICD-10-CM Diagnosis 38" | |
# label var I10_DX39 "ICD-10-CM Diagnosis 39" | |
# label var I10_DX40 "ICD-10-CM Diagnosis 40" | |
# label var I10_NDX "ICD-10-CM Number of diagnoses on this record" | |
# label var I10_NPR "ICD-10-PCS Number of procedures on this record" | |
# label var I10_PR1 "ICD-10-PCS Procedure 1" | |
# label var I10_PR2 "ICD-10-PCS Procedure 2" | |
# label var I10_PR3 "ICD-10-PCS Procedure 3" | |
# label var I10_PR4 "ICD-10-PCS Procedure 4" | |
# label var I10_PR5 "ICD-10-PCS Procedure 5" | |
# label var I10_PR6 "ICD-10-PCS Procedure 6" | |
# label var I10_PR7 "ICD-10-PCS Procedure 7" | |
# label var I10_PR8 "ICD-10-PCS Procedure 8" | |
# label var I10_PR9 "ICD-10-PCS Procedure 9" | |
# label var I10_PR10 "ICD-10-PCS Procedure 10" | |
# label var I10_PR11 "ICD-10-PCS Procedure 11" | |
# label var I10_PR12 "ICD-10-PCS Procedure 12" | |
# label var I10_PR13 "ICD-10-PCS Procedure 13" | |
# label var I10_PR14 "ICD-10-PCS Procedure 14" | |
# label var I10_PR15 "ICD-10-PCS Procedure 15" | |
# label var I10_PR16 "ICD-10-PCS Procedure 16" | |
# label var I10_PR17 "ICD-10-PCS Procedure 17" | |
# label var I10_PR18 "ICD-10-PCS Procedure 18" | |
# label var I10_PR19 "ICD-10-PCS Procedure 19" | |
# label var I10_PR20 "ICD-10-PCS Procedure 20" | |
# label var I10_PR21 "ICD-10-PCS Procedure 21" | |
# label var I10_PR22 "ICD-10-PCS Procedure 22" | |
# label var I10_PR23 "ICD-10-PCS Procedure 23" | |
# label var I10_PR24 "ICD-10-PCS Procedure 24" | |
# label var I10_PR25 "ICD-10-PCS Procedure 25" | |
# label var KEY_NIS "NIS record number" | |
# label var LOS "Length of stay (cleaned)" | |
# label var MDC "MDC in effect on discharge date" | |
# label var MDC_NoPOA "MDC in use on discharge date, calculated without POA" | |
# label var NIS_STRATUM "NIS hospital stratum" | |
# label var PAY1 "Primary expected payer (uniform)" | |
# label var PL_NCHS "Patient Location: NCHS Urban-Rural Code" | |
# label var PRDAY1 "Number of days from admission to I10_PR1" | |
# label var PRDAY2 "Number of days from admission to I10_PR2" | |
# label var PRDAY3 "Number of days from admission to I10_PR3" | |
# label var PRDAY4 "Number of days from admission to I10_PR4" | |
# label var PRDAY5 "Number of days from admission to I10_PR5" | |
# label var PRDAY6 "Number of days from admission to I10_PR6" | |
# label var PRDAY7 "Number of days from admission to I10_PR7" | |
# label var PRDAY8 "Number of days from admission to I10_PR8" | |
# label var PRDAY9 "Number of days from admission to I10_PR9" | |
# label var PRDAY10 "Number of days from admission to I10_PR10" | |
# label var PRDAY11 "Number of days from admission to I10_PR11" | |
# label var PRDAY12 "Number of days from admission to I10_PR12" | |
# label var PRDAY13 "Number of days from admission to I10_PR13" | |
# label var PRDAY14 "Number of days from admission to I10_PR14" | |
# label var PRDAY15 "Number of days from admission to I10_PR15" | |
# label var PRDAY16 "Number of days from admission to I10_PR16" | |
# label var PRDAY17 "Number of days from admission to I10_PR17" | |
# label var PRDAY18 "Number of days from admission to I10_PR18" | |
# label var PRDAY19 "Number of days from admission to I10_PR19" | |
# label var PRDAY20 "Number of days from admission to I10_PR20" | |
# label var PRDAY21 "Number of days from admission to I10_PR21" | |
# label var PRDAY22 "Number of days from admission to I10_PR22" | |
# label var PRDAY23 "Number of days from admission to I10_PR23" | |
# label var PRDAY24 "Number of days from admission to I10_PR24" | |
# label var PRDAY25 "Number of days from admission to I10_PR25" | |
# label var PRVER "Procedure Version" | |
# label var RACE "Race (uniform)" | |
# label var TOTCHG "Total charges (cleaned)" | |
# label var TRAN_IN "Transfer in indicator" | |
# label var TRAN_OUT "Transfer out indicator" | |
# label var YEAR "Calendar year" | |
# label var ZIPINC_QRTL "Median household income national quartile for patient ZIP Code" | |
# | |
# *** Convert special values to missing values *** | |
# recode AGE (-99 -88 -66=.) | |
# recode AGE_NEONATE (-9 -8 -6 -5=.) | |
# recode AMONTH (-9 -8 -6 -5=.) | |
# recode AWEEKEND (-9 -8 -6 -5=.) | |
# recode DIED (-9 -8 -6 -5=.) | |
# recode DISCWT (-99.9999999 -88.8888888 -66.6666666=.) | |
# recode DISPUNIFORM (-9 -8 -6 -5=.) | |
# recode DQTR (-9 -8 -6 -5=.) | |
# recode DRG (-99 -88 -66=.) | |
# recode DRGVER (-9 -8 -6 -5=.) | |
# recode DRG_NoPOA (-99 -88 -66=.) | |
# recode DXVER (-9 -8 -6 -5=.) | |
# recode ELECTIVE (-9 -8 -6 -5=.) | |
# recode FEMALE (-9 -8 -6 -5=.) | |
# recode HCUP_ED (-99 -88 -66=.) | |
# recode HOSP_DIVISION (-9 -8 -6 -5=.) | |
# recode HOSP_NIS (-9999 -8888 -6666=.) | |
# recode I10_NDX (-9 -8 -6 -5=.) | |
# recode I10_NPR (-9 -8 -6 -5=.) | |
# recode KEY_NIS (-999999999 -888888888 -666666666=.) | |
# recode LOS (-9999 -8888 -6666=.) | |
# recode MDC (-9 -8 -6 -5=.) | |
# recode MDC_NoPOA (-9 -8 -6 -5=.) | |
# recode NIS_STRATUM (-999 -888 -666=.) | |
# recode PAY1 (-9 -8 -6 -5=.) | |
# recode PL_NCHS (-99 -88 -66=.) | |
# recode PRDAY1 (-99 -88 -66=.) | |
# recode PRDAY2 (-99 -88 -66=.) | |
# recode PRDAY3 (-99 -88 -66=.) | |
# recode PRDAY4 (-99 -88 -66=.) | |
# recode PRDAY5 (-99 -88 -66=.) | |
# recode PRDAY6 (-99 -88 -66=.) | |
# recode PRDAY7 (-99 -88 -66=.) | |
# recode PRDAY8 (-99 -88 -66=.) | |
# recode PRDAY9 (-99 -88 -66=.) | |
# recode PRDAY10 (-99 -88 -66=.) | |
# recode PRDAY11 (-99 -88 -66=.) | |
# recode PRDAY12 (-99 -88 -66=.) | |
# recode PRDAY13 (-99 -88 -66=.) | |
# recode PRDAY14 (-99 -88 -66=.) | |
# recode PRDAY15 (-99 -88 -66=.) | |
# recode PRDAY16 (-99 -88 -66=.) | |
# recode PRDAY17 (-99 -88 -66=.) | |
# recode PRDAY18 (-99 -88 -66=.) | |
# recode PRDAY19 (-99 -88 -66=.) | |
# recode PRDAY20 (-99 -88 -66=.) | |
# recode PRDAY21 (-99 -88 -66=.) | |
# recode PRDAY22 (-99 -88 -66=.) | |
# recode PRDAY23 (-99 -88 -66=.) | |
# recode PRDAY24 (-99 -88 -66=.) | |
# recode PRDAY25 (-99 -88 -66=.) | |
# recode PRVER (-9 -8 -6 -5=.) | |
# recode RACE (-9 -8 -6 -5=.) | |
# recode TOTCHG (-999999999 -888888888 -666666666=.) | |
# recode TRAN_IN (-9 -8 -6 -5=.) | |
# recode TRAN_OUT (-9 -8 -6 -5=.) | |
# recode YEAR (-999 -888 -666=.) | |
# recode ZIPINC_QRTL (-9 -8 -6 -5=.) | |
# hospital file | |
# label var DISCWT "NIS discharge weight" | |
# label var HOSP_BEDSIZE "Bed size of hospital (STRATA)" | |
# label var HOSP_DIVISION "Census Division of hospital (STRATA)" | |
# label var HOSP_LOCTEACH "Location/teaching status of hospital (STRATA)" | |
# label var HOSP_NIS "NIS hospital number" | |
# label var HOSP_REGION "Region of hospital" | |
# label var H_CONTRL "Control/ownership of hospital (STRATA)" | |
# label var NIS_STRATUM "NIS hospital stratum" | |
# label var N_DISC_U "Number of universe discharges in the stratum" | |
# label var N_HOSP_U "Number of universe hospitals in the stratum" | |
# label var S_DISC_U "Number of sample discharges in the stratum" | |
# label var S_HOSP_U "Number of sample hospitals in the stratum" | |
# label var TOTAL_DISC "Total number of discharges from this hospital in the NIS" | |
# label var YEAR "Calendar year" |
I have not looked at any of the newer data, so I don't have any updates.
I thought you might have the updated code given you posted the above for 2017 (e.g. the change in I10_DX** up to 40)? Just saves me some extra editing if you did.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Do you happen to have the updated code for the nis_specs csv to include the additional data fields (like you did for 2016)?