Created
October 10, 2016 13:32
-
-
Save yabyzq/546618c44dc2a4c6016a8c4e9388edd4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(mice) | |
library(missForest) | |
library(VIM) | |
library(Amelia) | |
library(Hmisc) | |
library(mi) | |
#Generate missing value using prodNA from missForest | |
data <- iris | |
summary(iris) | |
iris.mis <- prodNA(iris, noNA = 0.1) | |
summary(iris.mis) | |
#Visulise it | |
mice_plot <- aggr(iris.mis, col = c('grey', 'yellow'), | |
numbers = TRUE, sortVars = TRUE, | |
labels = names(iris.mis), cex.axis = .7, gap =3) | |
missmap(iris.mis, col = c("yellow","grey"), y.lables = names(iris.mis)) | |
md.pattern(iris.mis) #ugly way to visualise | |
#Hmisc - Statistical/Predictive - linear | |
imputed.Hmiscmean <- iris.mis | |
imputed.Hmiscmean$Sepal.Length <- with(iris.mis, as.vector(impute(Sepal.Length, mean)))#randome, min, max | |
imputed.Hmiscmean$Sepal.Width <- with(iris.mis, as.vector(impute(Sepal.Width, mean))) | |
imputed.Hmiscmean$Petal.Length <- with(iris.mis, as.vector(impute(Petal.Length, mean))) | |
imputed.Hmiscmean$Petal.Width <- with(iris.mis, as.vector(impute(Petal.Width, mean))) | |
imputed.Hmiscmean$Species <- with(iris.mis, as.factor(as.vector(impute(Species, median)))) | |
imputed.Hmiscpred<- data.frame(aregImpute(~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width + | |
Species, data = iris.mis, n.impute = 5, x = TRUE)$x) | |
imputed.Hmiscpred$Species <- factor(imputed.Hmiscpred$Species) | |
levels(imputed.Hmiscpred$Species) <- c("setosa","versicolor","virginica") | |
#Missing Forest - using RF for each variable | |
imputed.mf <- missForest(iris.mis) | |
imputed.mf$ximp | |
#Mi | |
mdf <- missing_data.frame(iris.mis) # warnings about missingness patterns | |
show(mdf) | |
image(mdf) | |
hist(mdf) | |
imputed.mi <- mi(mdf) | |
plot(imputed.mi) | |
complete(imputed.mi, m = 1)[,1:5] | |
#MICE - Missing at randome | |
imputed.mice <- mice(iris.mis, m = 5, method = 'pmm', maxit = 20, printFlag = FALSE)#m imputation = 5, maxit = interation | |
imputed.mice$imp$Species#look at the data | |
complete(imputed.mice, 1)#using first set | |
#Amelia - Missing at random - need normally distributed data | |
imputed.Amelia <- amelia(iris.mis, m=5, parallel = "multicore", noms = "Species") | |
imputed.Amelia$imputations[[1]] | |
#Testing Performance NRMSE - Continuous Error PFC - Categorical Error, | |
c("HMISC Mean ", mixError(imputed.Hmiscmean, iris.mis, iris)) | |
c("HMISC Pred ", mixError(imputed.Hmiscpred, iris.mis, iris)) | |
c("MI ", mixError(mi::complete(imputed.mi, m = 1)[,1:5], iris.mis, iris)) | |
c("Missing Forest: ", mixError(imputed.mf$ximp, iris.mis, iris)) | |
c("Mice: ", mixError(mice::complete(imputed.mice, 1), iris.mis, iris)) | |
c("Amelia: ", mixError(imputed.Amelia$imputations[[1]], iris.mis, iris)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment