Skip to content

Instantly share code, notes, and snippets.

@gomesfellipe
Last active February 23, 2018 21:56
Show Gist options
  • Save gomesfellipe/f22e54b6f8e9c82908ed30a11eaed514 to your computer and use it in GitHub Desktop.
Save gomesfellipe/f22e54b6f8e9c82908ed30a11eaed514 to your computer and use it in GitHub Desktop.
Funções para análise multivariada
#Teste de Bartlett - a hipótese nula da matriz de correlação ser uma matriz identidade ( $| R | = 1$ ),
#isto é, avalia se os componentes fora da diagonal principal são zero.
#O resultado significativo indica que existem algumas relações entre as variáveis.
Bartlett.sphericity.test <- function(x)
{
method <- "Teste de esfericidade de Bartlett"
data.name <- deparse(substitute(x))
x <- subset(x, complete.cases(x)) # Omitindo valores faltantes
n <- nrow(x)
p <- ncol(x)
chisq <- (1-n+(2*p+5)/6)*log(det(cor(x)))
df <- p*(p-1)/2
p.value <- pchisq(chisq, df, lower.tail=FALSE)
names(chisq) <- "X-squared"
names(df) <- "df"
return(structure(list(statistic=chisq, parameter=df, p.value=p.value,
method=method, data.name=data.name), class="htest"))
}
Bartlett.sphericity.test(dados)
# source: https://github.com/vqv/ggbiplot/blob/master/R/ggscreeplot.r
# ggscreeplot.r
#
# Copyright 2011 Vincent Q. Vu.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
#' Screeplot for Principal Components
#'
#' @param pcobj an object returned by prcomp() or princomp()
#' @param type the type of scree plot. 'pev' corresponds proportion of explained variance, i.e. the eigenvalues divided by the trace. 'cev' corresponds to the cumulative proportion of explained variance, i.e. the partial sum of the first k eigenvalues divided by the trace.
#' @export
#' @examples
#' data(wine)
#' wine.pca <- prcomp(wine, scale. = TRUE)
#' print(ggscreeplot(wine.pca))
#'
ggscreeplot <- function(pcobj, type = c('pev', 'cev'))
{
type <- match.arg(type)
d <- pcobj$sdev^2
yvar <- switch(type,
pev = d / sum(d),
cev = cumsum(d) / sum(d))
yvar.lab <- switch(type,
pev = 'proportion of explained variance',
cev = 'cumulative proportion of explained variance')
df <- data.frame(PC = 1:length(d), yvar = yvar)
ggplot(data = df, aes(x = PC, y = yvar)) +
xlab('principal component number') + ylab(yvar.lab) +
geom_point() + geom_path()
}
# Teste KMO (Kaiser-Meyer-Olkin) - avalia a adequação do tamanho amostra.
#Varia entre 0 e 1, onde: zero indica inadequado para análise fatorial, aceitável se for maior que 0.5, recomendado acima de 0.8.
kmo <- function(x)
{
x <- subset(x, complete.cases(x)) # Omitindo valores faltantes
r <- cor(x) # Matrix de correlacao
r2 <- r^2 # coeficiente de correlação ao quadrado
i <- solve(r) # inverso da matriz de correlacoes
d <- diag(i) # Elementos da diagonal da matriz inversa
p2 <- (-i/sqrt(outer(d, d)))^2 # coeficiente de correlação parcial ao quadrado
diag(r2) <- diag(p2) <- 0 # Removendo elementos da diagonal
KMO <- sum(r2)/(sum(r2)+sum(p2))
MSA <- colSums(r2)/(colSums(r2)+colSums(p2))
return(list(KMO=KMO, MSA=MSA))
}
plot_kmeans = function(df, cluster=3) { # Funcao para plotar o agrupamento k means
#cluster
tmp_k = kmeans(df, centers = cluster, nstart = 100)
#factor
acpcor=prcomp(df, scale = TRUE) # Funcao do R que realiza PCA
tmp_f = acpcor
#collect data
tmp_d = data.frame(matrix(ncol=0, nrow=nrow(df)))
tmp_d$cluster = as.factor(tmp_k$cluster)
tmp_d$fact_1 = as.numeric(tmp_f$x[, 1])
tmp_d$fact_2 = as.numeric(tmp_f$x[, 2])
tmp_d$label = rownames(df)
#plot
g = ggplot(tmp_d, aes(fact_1, fact_2, color = cluster)) +
geom_point() +
geom_text(aes(label = label),
size = 3,
vjust = 1,
color = "black")
return(g)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment