Last active Jul 30, 2018
Clean missing values (NA) in R in these easy steps
# install tidyverse
# step 1 -- review example dataset 'msleep' in tidyverse package
# step 2.1 -- remove all rows with missing values
clean_msleep <- drop_na(msleep)
# step 2.2 -- impute NA with mean or median values
# mean imputation
mean_sleep_rem <- mean(msleep$sleep_rem, na.rm=TRUE)
msleep$sleep_rem <- replace_na(msleep$sleep_rem, mean_sleep_rem)
# median imputation
median_sleep_rem <- median(msleep$sleep_rem, na.rm=TRUE)
msleep$sleep_rem <- replace_na(msleep$sleep_rem, median_sleep_rem)
# export clean dataframe to our working directory
write.csv(clean_msleep, "clean_msleep.csv")
# finished !!
