Skip to content

Instantly share code, notes, and snippets.

@jilmun
Created February 22, 2016 15:09
Show Gist options
  • Save jilmun/e3f06655044c8d549f32 to your computer and use it in GitHub Desktop.
Save jilmun/e3f06655044c8d549f32 to your computer and use it in GitHub Desktop.
var.freq <- table(var)
var.freq <- var.freq[rownames(var.freq) != ""]
# impute with mode, most frequent category
var.mode <- names(var.freq[var.freq==max(var.freq)])
var1 <- ifelse(is.na(var) | var=="", var.mode, var)
# impute with random category using sample distribution
var.filled <- var[!is.na(var) & var!=""]
var.random <- var.filled[sample(1:length(var.filled), 1)]
var2 <- ifelse(is.na(var) | var=="", var.random, var)
# impute with random category if actual distr is not representative
var.categories <- rownames(var.freq)
var.random <- var.categories[sample(1:length(var.categories), 1)]
var3 <- ifelse(is.na(var) | var=="", var.random, var)
# impute with its own category
var4 <- ifelse(is.na(var) | var=="", "-1", var)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment