Skip to content

Instantly share code, notes, and snippets.

@markdanese
Created September 10, 2021 01:24
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save markdanese/d053aab591483e82dd73a16e336b33ad to your computer and use it in GitHub Desktop.
Save markdanese/d053aab591483e82dd73a16e336b33ad to your computer and use it in GitHub Desktop.
NIS 2017 load script
# copied from original "load_data.R" program created for 2016 data
# primary change was that there are more data fields in 2017
library(data.table)
library(magrittr)
library(readr)
library(fst)
# load core data --------------------------------------------------------------------
nis_specs <- fread("./docs/nis_specs_core_2017.csv")
nis_specs[, width := end - start + 1]
nis_specs[, varname := tolower(varname)]
nis_specs[, type := ifelse(type == "int", "i", ifelse(type %in% c("byte", "double", "long"), "d", "c"))]
missing_values <- as.character(quote(c(-99, -88, -66, -99.9999999, -88.8888888, -66.6666666, -9, -8, -6, -5, -9999, -8888, -6666, -999999999, -888888888, -666666666,-999, -888, -666)))
dt <-
read_fwf(
"~/Dropbox (Outcomes Insights)/Data repository/HCUP/NIS_2017/NIS_2017_Core.ASC",
col_positions = fwf_widths(nis_specs$width),
col_types = paste0(nis_specs$type, collapse = ""),
trim_ws = TRUE,
# n_max = 1000,
na = missing_values
)
setDT(dt)
setnames(dt, names(dt), nis_specs$varname)
write_fst(dt, "./data/analysis/2017/core.fst", compress = 100)
# load hospital data ----------------------------------------------------------------
# specs unchanged from 2016
nis_specs <- fread("./docs/nis_specs_hospital.csv")
nis_specs[, width := end - start + 1]
nis_specs[, varname := tolower(varname)]
nis_specs[, type := ifelse(type == "int", "i", ifelse(type %in% c("byte", "double", "long"), "d", "c"))]
missing_values <- as.character(quote(c(-99, -88, -66, -99.9999999, -88.8888888, -66.6666666, -9, -8, -6, -5, -9999, -8888, -6666, -999999999, -888888888, -666666666,-999, -888, -666)))
dt <-
read_fwf(
"~/Dropbox (Outcomes Insights)/Data repository/HCUP/NIS_2017/NIS_2017_Hospital.ASC",
col_positions = fwf_widths(nis_specs$width),
col_types = paste0(nis_specs$type, collapse = ""),
trim_ws = TRUE,
# n_max = 1000,
na = missing_values
)
setDT(dt)
setnames(dt, names(dt), nis_specs$varname)
write_fst(dt, "./data/analysis/2017/hospital.fst", compress = 100)
# load severity data ----------------------------------------------------------------
# specs unchanged from 2016
nis_specs <- fread("./docs/nis_specs_severity.csv")
nis_specs[, width := end - start + 1]
nis_specs[, varname := tolower(varname)]
nis_specs[, type := ifelse(type == "int", "i", ifelse(type %in% c("byte", "double", "long"), "d", "c"))]
missing_values <- as.character(quote(c(-99, -88, -66, -99.9999999, -88.8888888, -66.6666666, -9, -8, -6, -5, -9999, -8888, -6666, -999999999, -888888888, -666666666,-999, -888, -666)))
dt <-
read_fwf(
"~/Dropbox (Outcomes Insights)/Data repository/HCUP/NIS_2017/NIS_2017_Severity.ASC",
col_positions = fwf_widths(nis_specs$width),
col_types = paste0(nis_specs$type, collapse = ""),
trim_ws = TRUE,
# n_max = 1000,
na = missing_values
)
setDT(dt)
setnames(dt, names(dt), nis_specs$varname)
write_fst(dt, "./data/analysis/2017/severity.fst", compress = 100)
cc2017 <- fread("./data/analysis/2017/cc2017NIS.csv", quote = "'")
setnames(cc2017, names(cc2017), tolower(names(cc2017)))
write_fst(cc2017, "./data/analysis/2017/cc2017.fst", compress = 100)
# storing variable labels here ------------------------------------------------------
# core file
# label var AGE "Age in years at admission"
# label var AGE_NEONATE "Neonatal age (first 28 days after birth) indicator"
# label var AMONTH "Admission month"
# label var AWEEKEND "Admission day is a weekend"
# label var DIED "Died during hospitalization"
# label var DISCWT "NIS discharge weight"
# label var DISPUNIFORM "Disposition of patient (uniform)"
# label var DQTR "Discharge quarter"
# label var DRG "DRG in effect on discharge date"
# label var DRGVER "DRG grouper version used on discharge date"
# label var DRG_NoPOA "DRG in use on discharge date, calculated without POA"
# label var DXVER "Diagnosis Version"
# label var ELECTIVE "Elective versus non-elective admission"
# label var FEMALE "Indicator of sex"
# label var HCUP_ED "HCUP Emergency Department service indicator"
# label var HOSP_DIVISION "Census Division of hospital"
# label var HOSP_NIS "NIS hospital number"
# label var I10_DX1 "ICD-10-CM Diagnosis 1"
# label var I10_DX2 "ICD-10-CM Diagnosis 2"
# label var I10_DX3 "ICD-10-CM Diagnosis 3"
# label var I10_DX4 "ICD-10-CM Diagnosis 4"
# label var I10_DX5 "ICD-10-CM Diagnosis 5"
# label var I10_DX6 "ICD-10-CM Diagnosis 6"
# label var I10_DX7 "ICD-10-CM Diagnosis 7"
# label var I10_DX8 "ICD-10-CM Diagnosis 8"
# label var I10_DX9 "ICD-10-CM Diagnosis 9"
# label var I10_DX10 "ICD-10-CM Diagnosis 10"
# label var I10_DX11 "ICD-10-CM Diagnosis 11"
# label var I10_DX12 "ICD-10-CM Diagnosis 12"
# label var I10_DX13 "ICD-10-CM Diagnosis 13"
# label var I10_DX14 "ICD-10-CM Diagnosis 14"
# label var I10_DX15 "ICD-10-CM Diagnosis 15"
# label var I10_DX16 "ICD-10-CM Diagnosis 16"
# label var I10_DX17 "ICD-10-CM Diagnosis 17"
# label var I10_DX18 "ICD-10-CM Diagnosis 18"
# label var I10_DX19 "ICD-10-CM Diagnosis 19"
# label var I10_DX20 "ICD-10-CM Diagnosis 20"
# label var I10_DX21 "ICD-10-CM Diagnosis 21"
# label var I10_DX22 "ICD-10-CM Diagnosis 22"
# label var I10_DX23 "ICD-10-CM Diagnosis 23"
# label var I10_DX24 "ICD-10-CM Diagnosis 24"
# label var I10_DX25 "ICD-10-CM Diagnosis 25"
# label var I10_DX26 "ICD-10-CM Diagnosis 26"
# label var I10_DX27 "ICD-10-CM Diagnosis 27"
# label var I10_DX28 "ICD-10-CM Diagnosis 28"
# label var I10_DX29 "ICD-10-CM Diagnosis 29"
# label var I10_DX30 "ICD-10-CM Diagnosis 30"
# label var I10_DX31 "ICD-10-CM Diagnosis 31"
# label var I10_DX32 "ICD-10-CM Diagnosis 32"
# label var I10_DX33 "ICD-10-CM Diagnosis 33"
# label var I10_DX34 "ICD-10-CM Diagnosis 34"
# label var I10_DX35 "ICD-10-CM Diagnosis 35"
# label var I10_DX36 "ICD-10-CM Diagnosis 36"
# label var I10_DX37 "ICD-10-CM Diagnosis 37"
# label var I10_DX38 "ICD-10-CM Diagnosis 38"
# label var I10_DX39 "ICD-10-CM Diagnosis 39"
# label var I10_DX40 "ICD-10-CM Diagnosis 40"
# label var I10_NDX "ICD-10-CM Number of diagnoses on this record"
# label var I10_NPR "ICD-10-PCS Number of procedures on this record"
# label var I10_PR1 "ICD-10-PCS Procedure 1"
# label var I10_PR2 "ICD-10-PCS Procedure 2"
# label var I10_PR3 "ICD-10-PCS Procedure 3"
# label var I10_PR4 "ICD-10-PCS Procedure 4"
# label var I10_PR5 "ICD-10-PCS Procedure 5"
# label var I10_PR6 "ICD-10-PCS Procedure 6"
# label var I10_PR7 "ICD-10-PCS Procedure 7"
# label var I10_PR8 "ICD-10-PCS Procedure 8"
# label var I10_PR9 "ICD-10-PCS Procedure 9"
# label var I10_PR10 "ICD-10-PCS Procedure 10"
# label var I10_PR11 "ICD-10-PCS Procedure 11"
# label var I10_PR12 "ICD-10-PCS Procedure 12"
# label var I10_PR13 "ICD-10-PCS Procedure 13"
# label var I10_PR14 "ICD-10-PCS Procedure 14"
# label var I10_PR15 "ICD-10-PCS Procedure 15"
# label var I10_PR16 "ICD-10-PCS Procedure 16"
# label var I10_PR17 "ICD-10-PCS Procedure 17"
# label var I10_PR18 "ICD-10-PCS Procedure 18"
# label var I10_PR19 "ICD-10-PCS Procedure 19"
# label var I10_PR20 "ICD-10-PCS Procedure 20"
# label var I10_PR21 "ICD-10-PCS Procedure 21"
# label var I10_PR22 "ICD-10-PCS Procedure 22"
# label var I10_PR23 "ICD-10-PCS Procedure 23"
# label var I10_PR24 "ICD-10-PCS Procedure 24"
# label var I10_PR25 "ICD-10-PCS Procedure 25"
# label var KEY_NIS "NIS record number"
# label var LOS "Length of stay (cleaned)"
# label var MDC "MDC in effect on discharge date"
# label var MDC_NoPOA "MDC in use on discharge date, calculated without POA"
# label var NIS_STRATUM "NIS hospital stratum"
# label var PAY1 "Primary expected payer (uniform)"
# label var PL_NCHS "Patient Location: NCHS Urban-Rural Code"
# label var PRDAY1 "Number of days from admission to I10_PR1"
# label var PRDAY2 "Number of days from admission to I10_PR2"
# label var PRDAY3 "Number of days from admission to I10_PR3"
# label var PRDAY4 "Number of days from admission to I10_PR4"
# label var PRDAY5 "Number of days from admission to I10_PR5"
# label var PRDAY6 "Number of days from admission to I10_PR6"
# label var PRDAY7 "Number of days from admission to I10_PR7"
# label var PRDAY8 "Number of days from admission to I10_PR8"
# label var PRDAY9 "Number of days from admission to I10_PR9"
# label var PRDAY10 "Number of days from admission to I10_PR10"
# label var PRDAY11 "Number of days from admission to I10_PR11"
# label var PRDAY12 "Number of days from admission to I10_PR12"
# label var PRDAY13 "Number of days from admission to I10_PR13"
# label var PRDAY14 "Number of days from admission to I10_PR14"
# label var PRDAY15 "Number of days from admission to I10_PR15"
# label var PRDAY16 "Number of days from admission to I10_PR16"
# label var PRDAY17 "Number of days from admission to I10_PR17"
# label var PRDAY18 "Number of days from admission to I10_PR18"
# label var PRDAY19 "Number of days from admission to I10_PR19"
# label var PRDAY20 "Number of days from admission to I10_PR20"
# label var PRDAY21 "Number of days from admission to I10_PR21"
# label var PRDAY22 "Number of days from admission to I10_PR22"
# label var PRDAY23 "Number of days from admission to I10_PR23"
# label var PRDAY24 "Number of days from admission to I10_PR24"
# label var PRDAY25 "Number of days from admission to I10_PR25"
# label var PRVER "Procedure Version"
# label var RACE "Race (uniform)"
# label var TOTCHG "Total charges (cleaned)"
# label var TRAN_IN "Transfer in indicator"
# label var TRAN_OUT "Transfer out indicator"
# label var YEAR "Calendar year"
# label var ZIPINC_QRTL "Median household income national quartile for patient ZIP Code"
#
# *** Convert special values to missing values ***
# recode AGE (-99 -88 -66=.)
# recode AGE_NEONATE (-9 -8 -6 -5=.)
# recode AMONTH (-9 -8 -6 -5=.)
# recode AWEEKEND (-9 -8 -6 -5=.)
# recode DIED (-9 -8 -6 -5=.)
# recode DISCWT (-99.9999999 -88.8888888 -66.6666666=.)
# recode DISPUNIFORM (-9 -8 -6 -5=.)
# recode DQTR (-9 -8 -6 -5=.)
# recode DRG (-99 -88 -66=.)
# recode DRGVER (-9 -8 -6 -5=.)
# recode DRG_NoPOA (-99 -88 -66=.)
# recode DXVER (-9 -8 -6 -5=.)
# recode ELECTIVE (-9 -8 -6 -5=.)
# recode FEMALE (-9 -8 -6 -5=.)
# recode HCUP_ED (-99 -88 -66=.)
# recode HOSP_DIVISION (-9 -8 -6 -5=.)
# recode HOSP_NIS (-9999 -8888 -6666=.)
# recode I10_NDX (-9 -8 -6 -5=.)
# recode I10_NPR (-9 -8 -6 -5=.)
# recode KEY_NIS (-999999999 -888888888 -666666666=.)
# recode LOS (-9999 -8888 -6666=.)
# recode MDC (-9 -8 -6 -5=.)
# recode MDC_NoPOA (-9 -8 -6 -5=.)
# recode NIS_STRATUM (-999 -888 -666=.)
# recode PAY1 (-9 -8 -6 -5=.)
# recode PL_NCHS (-99 -88 -66=.)
# recode PRDAY1 (-99 -88 -66=.)
# recode PRDAY2 (-99 -88 -66=.)
# recode PRDAY3 (-99 -88 -66=.)
# recode PRDAY4 (-99 -88 -66=.)
# recode PRDAY5 (-99 -88 -66=.)
# recode PRDAY6 (-99 -88 -66=.)
# recode PRDAY7 (-99 -88 -66=.)
# recode PRDAY8 (-99 -88 -66=.)
# recode PRDAY9 (-99 -88 -66=.)
# recode PRDAY10 (-99 -88 -66=.)
# recode PRDAY11 (-99 -88 -66=.)
# recode PRDAY12 (-99 -88 -66=.)
# recode PRDAY13 (-99 -88 -66=.)
# recode PRDAY14 (-99 -88 -66=.)
# recode PRDAY15 (-99 -88 -66=.)
# recode PRDAY16 (-99 -88 -66=.)
# recode PRDAY17 (-99 -88 -66=.)
# recode PRDAY18 (-99 -88 -66=.)
# recode PRDAY19 (-99 -88 -66=.)
# recode PRDAY20 (-99 -88 -66=.)
# recode PRDAY21 (-99 -88 -66=.)
# recode PRDAY22 (-99 -88 -66=.)
# recode PRDAY23 (-99 -88 -66=.)
# recode PRDAY24 (-99 -88 -66=.)
# recode PRDAY25 (-99 -88 -66=.)
# recode PRVER (-9 -8 -6 -5=.)
# recode RACE (-9 -8 -6 -5=.)
# recode TOTCHG (-999999999 -888888888 -666666666=.)
# recode TRAN_IN (-9 -8 -6 -5=.)
# recode TRAN_OUT (-9 -8 -6 -5=.)
# recode YEAR (-999 -888 -666=.)
# recode ZIPINC_QRTL (-9 -8 -6 -5=.)
# hospital file
# label var DISCWT "NIS discharge weight"
# label var HOSP_BEDSIZE "Bed size of hospital (STRATA)"
# label var HOSP_DIVISION "Census Division of hospital (STRATA)"
# label var HOSP_LOCTEACH "Location/teaching status of hospital (STRATA)"
# label var HOSP_NIS "NIS hospital number"
# label var HOSP_REGION "Region of hospital"
# label var H_CONTRL "Control/ownership of hospital (STRATA)"
# label var NIS_STRATUM "NIS hospital stratum"
# label var N_DISC_U "Number of universe discharges in the stratum"
# label var N_HOSP_U "Number of universe hospitals in the stratum"
# label var S_DISC_U "Number of sample discharges in the stratum"
# label var S_HOSP_U "Number of sample hospitals in the stratum"
# label var TOTAL_DISC "Total number of discharges from this hospital in the NIS"
# label var YEAR "Calendar year"
@jenkinsamj
Copy link

Do you happen to have the updated code for the nis_specs csv to include the additional data fields (like you did for 2016)?

@markdanese
Copy link
Author

I have not looked at any of the newer data, so I don't have any updates.

@jenkinsamj
Copy link

I thought you might have the updated code given you posted the above for 2017 (e.g. the change in I10_DX** up to 40)? Just saves me some extra editing if you did.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment