Skip to content

Instantly share code, notes, and snippets.

@yabyzq
Created October 10, 2016 13:32
Show Gist options
  • Save yabyzq/546618c44dc2a4c6016a8c4e9388edd4 to your computer and use it in GitHub Desktop.
Save yabyzq/546618c44dc2a4c6016a8c4e9388edd4 to your computer and use it in GitHub Desktop.
library(mice)
library(missForest)
library(VIM)
library(Amelia)
library(Hmisc)
library(mi)
#Generate missing value using prodNA from missForest
data <- iris
summary(iris)
iris.mis <- prodNA(iris, noNA = 0.1)
summary(iris.mis)
#Visulise it
mice_plot <- aggr(iris.mis, col = c('grey', 'yellow'),
numbers = TRUE, sortVars = TRUE,
labels = names(iris.mis), cex.axis = .7, gap =3)
missmap(iris.mis, col = c("yellow","grey"), y.lables = names(iris.mis))
md.pattern(iris.mis) #ugly way to visualise
#Hmisc - Statistical/Predictive - linear
imputed.Hmiscmean <- iris.mis
imputed.Hmiscmean$Sepal.Length <- with(iris.mis, as.vector(impute(Sepal.Length, mean)))#randome, min, max
imputed.Hmiscmean$Sepal.Width <- with(iris.mis, as.vector(impute(Sepal.Width, mean)))
imputed.Hmiscmean$Petal.Length <- with(iris.mis, as.vector(impute(Petal.Length, mean)))
imputed.Hmiscmean$Petal.Width <- with(iris.mis, as.vector(impute(Petal.Width, mean)))
imputed.Hmiscmean$Species <- with(iris.mis, as.factor(as.vector(impute(Species, median))))
imputed.Hmiscpred<- data.frame(aregImpute(~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width +
Species, data = iris.mis, n.impute = 5, x = TRUE)$x)
imputed.Hmiscpred$Species <- factor(imputed.Hmiscpred$Species)
levels(imputed.Hmiscpred$Species) <- c("setosa","versicolor","virginica")
#Missing Forest - using RF for each variable
imputed.mf <- missForest(iris.mis)
imputed.mf$ximp
#Mi
mdf <- missing_data.frame(iris.mis) # warnings about missingness patterns
show(mdf)
image(mdf)
hist(mdf)
imputed.mi <- mi(mdf)
plot(imputed.mi)
complete(imputed.mi, m = 1)[,1:5]
#MICE - Missing at randome
imputed.mice <- mice(iris.mis, m = 5, method = 'pmm', maxit = 20, printFlag = FALSE)#m imputation = 5, maxit = interation
imputed.mice$imp$Species#look at the data
complete(imputed.mice, 1)#using first set
#Amelia - Missing at random - need normally distributed data
imputed.Amelia <- amelia(iris.mis, m=5, parallel = "multicore", noms = "Species")
imputed.Amelia$imputations[[1]]
#Testing Performance NRMSE - Continuous Error PFC - Categorical Error,
c("HMISC Mean ", mixError(imputed.Hmiscmean, iris.mis, iris))
c("HMISC Pred ", mixError(imputed.Hmiscpred, iris.mis, iris))
c("MI ", mixError(mi::complete(imputed.mi, m = 1)[,1:5], iris.mis, iris))
c("Missing Forest: ", mixError(imputed.mf$ximp, iris.mis, iris))
c("Mice: ", mixError(mice::complete(imputed.mice, 1), iris.mis, iris))
c("Amelia: ", mixError(imputed.Amelia$imputations[[1]], iris.mis, iris))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment