Last active
February 23, 2018 21:56
-
-
Save gomesfellipe/f22e54b6f8e9c82908ed30a11eaed514 to your computer and use it in GitHub Desktop.
Funções para análise multivariada
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Teste de Bartlett - a hipótese nula da matriz de correlação ser uma matriz identidade ( $| R | = 1$ ), | |
#isto é, avalia se os componentes fora da diagonal principal são zero. | |
#O resultado significativo indica que existem algumas relações entre as variáveis. | |
Bartlett.sphericity.test <- function(x) | |
{ | |
method <- "Teste de esfericidade de Bartlett" | |
data.name <- deparse(substitute(x)) | |
x <- subset(x, complete.cases(x)) # Omitindo valores faltantes | |
n <- nrow(x) | |
p <- ncol(x) | |
chisq <- (1-n+(2*p+5)/6)*log(det(cor(x))) | |
df <- p*(p-1)/2 | |
p.value <- pchisq(chisq, df, lower.tail=FALSE) | |
names(chisq) <- "X-squared" | |
names(df) <- "df" | |
return(structure(list(statistic=chisq, parameter=df, p.value=p.value, | |
method=method, data.name=data.name), class="htest")) | |
} | |
Bartlett.sphericity.test(dados) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# source: https://github.com/vqv/ggbiplot/blob/master/R/ggscreeplot.r | |
# ggscreeplot.r | |
# | |
# Copyright 2011 Vincent Q. Vu. | |
# | |
# This program is free software; you can redistribute it and/or | |
# modify it under the terms of the GNU General Public License | |
# as published by the Free Software Foundation; either version 2 | |
# of the License, or (at your option) any later version. | |
# | |
# This program is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# GNU General Public License for more details. | |
# | |
# You should have received a copy of the GNU General Public License | |
# along with this program; if not, write to the Free Software | |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | |
# | |
#' Screeplot for Principal Components | |
#' | |
#' @param pcobj an object returned by prcomp() or princomp() | |
#' @param type the type of scree plot. 'pev' corresponds proportion of explained variance, i.e. the eigenvalues divided by the trace. 'cev' corresponds to the cumulative proportion of explained variance, i.e. the partial sum of the first k eigenvalues divided by the trace. | |
#' @export | |
#' @examples | |
#' data(wine) | |
#' wine.pca <- prcomp(wine, scale. = TRUE) | |
#' print(ggscreeplot(wine.pca)) | |
#' | |
ggscreeplot <- function(pcobj, type = c('pev', 'cev')) | |
{ | |
type <- match.arg(type) | |
d <- pcobj$sdev^2 | |
yvar <- switch(type, | |
pev = d / sum(d), | |
cev = cumsum(d) / sum(d)) | |
yvar.lab <- switch(type, | |
pev = 'proportion of explained variance', | |
cev = 'cumulative proportion of explained variance') | |
df <- data.frame(PC = 1:length(d), yvar = yvar) | |
ggplot(data = df, aes(x = PC, y = yvar)) + | |
xlab('principal component number') + ylab(yvar.lab) + | |
geom_point() + geom_path() | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Teste KMO (Kaiser-Meyer-Olkin) - avalia a adequação do tamanho amostra. | |
#Varia entre 0 e 1, onde: zero indica inadequado para análise fatorial, aceitável se for maior que 0.5, recomendado acima de 0.8. | |
kmo <- function(x) | |
{ | |
x <- subset(x, complete.cases(x)) # Omitindo valores faltantes | |
r <- cor(x) # Matrix de correlacao | |
r2 <- r^2 # coeficiente de correlação ao quadrado | |
i <- solve(r) # inverso da matriz de correlacoes | |
d <- diag(i) # Elementos da diagonal da matriz inversa | |
p2 <- (-i/sqrt(outer(d, d)))^2 # coeficiente de correlação parcial ao quadrado | |
diag(r2) <- diag(p2) <- 0 # Removendo elementos da diagonal | |
KMO <- sum(r2)/(sum(r2)+sum(p2)) | |
MSA <- colSums(r2)/(colSums(r2)+colSums(p2)) | |
return(list(KMO=KMO, MSA=MSA)) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
plot_kmeans = function(df, cluster=3) { # Funcao para plotar o agrupamento k means | |
#cluster | |
tmp_k = kmeans(df, centers = cluster, nstart = 100) | |
#factor | |
acpcor=prcomp(df, scale = TRUE) # Funcao do R que realiza PCA | |
tmp_f = acpcor | |
#collect data | |
tmp_d = data.frame(matrix(ncol=0, nrow=nrow(df))) | |
tmp_d$cluster = as.factor(tmp_k$cluster) | |
tmp_d$fact_1 = as.numeric(tmp_f$x[, 1]) | |
tmp_d$fact_2 = as.numeric(tmp_f$x[, 2]) | |
tmp_d$label = rownames(df) | |
#plot | |
g = ggplot(tmp_d, aes(fact_1, fact_2, color = cluster)) + | |
geom_point() + | |
geom_text(aes(label = label), | |
size = 3, | |
vjust = 1, | |
color = "black") | |
return(g) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment