Created
October 24, 2016 02:42
-
-
Save PallCreaker/3952a58d8b7cafb5ac7c0994ac8a11b8 to your computer and use it in GitHub Desktop.
R(統計) 変数毎に基本統計量(summary)を算出する。int,num,factorに対応。 ref: http://qiita.com/PallCreaker/items/17008c97ad4536f4d512
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
> output <- mySummary(iris) | |
> output | |
colnames class inform sum sd mean Min Median Max NA.s | |
2 Sepal.Length numeric <NA> 876.5 0.8281 5.8433 4.3 5.8 7.9 0 | |
3 Sepal.Width numeric <NA> 458.6 0.4359 3.0573 2 3 4.4 0 | |
4 Petal.Length numeric <NA> 563.7 1.7653 3.758 1 4.35 6.9 0 | |
5 Petal.Width numeric <NA> 179.9 0.7622 1.1993 0.1 1.3 2.5 0 | |
6 Species factor setosa:50, versicolor:50, virginica:50 <NA> <NA> <NA> <NA> <NA> <NA> 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 基本統計量を算出 | |
mySummary <- function(df){ | |
# only data frame | |
if (!is.data.frame(df)) return(NA) | |
# create new df | |
new.df <- data.frame( | |
colnames = NA | |
,class = NA | |
,inform = NA | |
,sum = NA | |
,sd = NA | |
,mean = NA | |
,Min = NA | |
,Median = NA | |
,Max = NA | |
,"NA's" = NA | |
) | |
col.count <- ncol(df) | |
for (i in 1:col.count) { | |
if (is.integer(df[,i]) || is.numeric(df[,i])) { | |
cal.sum <- sum(as.numeric(df[,i]), na.rm=TRUE) | |
cal.mean <- mean(df[,i], na.rm=TRUE) | |
cal.sd <- sd(df[,i], na.rm=TRUE) | |
cal.min <- min(df[,i], na.rm=TRUE) | |
cal.median <- median(df[,i], na.rm=TRUE) | |
cal.max <- max(df[,i], na.rm=TRUE) | |
} else if (is.factor(df[,i]) || is.logical(df[,i])) { | |
tmp <- summary(df[,i]) | |
str.val <- paste(names(tmp), tmp, sep=":") | |
cal.summary<- paste(str.val, collapse=", ") | |
} | |
insert.row <- c( | |
colnames = colnames(df[i]) | |
,class = class(df[,i]) | |
,inform = ifelse(exists("cal.summary"), cal.summary, NA) | |
,sum = ifelse(exists("cal.sum"), cal.sum, NA) | |
,sd = ifelse(exists("cal.sd"), round(cal.sd, 4), NA) | |
,mean = ifelse(exists("cal.mean"), round(cal.mean, 4), NA) | |
,Min = ifelse(exists("cal.min"), cal.min, NA) | |
,Median = ifelse(exists("cal.median"), cal.median, NA) | |
,Max = ifelse(exists("cal.max"), cal.max, NA) | |
,"NA's" = sum(is.na(df[,i])) | |
) | |
new.df <- rbind(new.df, insert.row) | |
suppressWarnings(rm("cal.sum","cal.mean","cal.max","cal.median","cal.min","cal.sd","cal.summary")) | |
} | |
new.df <- new.df[-1,] | |
return(new.df) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment