Last active
September 8, 2017 17:50
-
-
Save rparatodxs/d1e2bd4a7d47205d1e57aadb936e1662 to your computer and use it in GitHub Desktop.
Clasifica los informantes de una encuestra y busca perfiles en base a una matriz de respuesta/No-respuesta
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#--------08-09-2017----------- | |
#----------------------------- | |
#-Estoy feliz de compartir este código, pero cuenteme cuando lo utilice. | |
#-Ayuda a la indentificación de grupos de informantes en una base de datos o encuesta | |
#----------------------------- | |
library(vegan) | |
library(dendextend) | |
library(colorspace) | |
library(circlize) | |
library(gplots) | |
mm <- matrix(0, 100, 100) | |
respuesta<-apply(mm, c(1, 2), function(x) sample(c(0, 1), 1)) | |
respuesta<-as.data.frame(respuesta) | |
dist.mat<-vegdist(respuesta,method="jaccard") #distance matrix based on Jaccard distance | |
clust.res<-hclust(dist.mat) #agglomerative clustering using complete linkage | |
plot(clust.res) | |
fit <- cmdscale(dist.mat,eig=TRUE, k=5) # k is the number of dim | |
#fit # view results | |
# plot solution | |
x <- fit$points[,1] | |
y <- fit$points[,2] | |
plot(x, y, xlab="Cordenada 1", ylab="Cordenada 2", | |
main="Metric MDS", type="n") | |
text(x, y, labels = row.names(respuesta), cex=.7) | |
clust.res<-hclust(dist.mat^2, method="ward.D2") #agglomerative clustering using complete linkage | |
plot(clust.res) | |
grupos <- cutree(clust.res, k=5) # cut tree into 5 clusters | |
respuesta$grupos <- cutree(clust.res, k=5) # cut tree into 5 clusters | |
# draw dendogram with red borders around the 5 clusters | |
rect.hclust(clust.res, k=5, border="red") | |
clust.res<-hclust(dist.mat^2, method="ward.D2") #agglomerative clustering using complete linkage | |
plot(clust.res) | |
dend <- as.dendrogram(clust.res) | |
# order it the closest we can to the order of the observations: | |
dend <- rotate(dend, 1:max(dim(respuesta)[1])) | |
# Colorea las ramas del arbol de agrupamiento: | |
dend <- color_branches(dend, k=5) #, groupLabels= c("Grupo 1","Grupo 2","Grupo 3","Grupo 4")) | |
# Manualmente junta las etiquetas, tanto como sea posible a la clasificación real: | |
labels_colors(dend) <- | |
rainbow_hcl(5)[sort_levels_values( | |
as.numeric(respuesta$grupos)[order.dendrogram(dend)] | |
)] | |
# Debemos agregar el tipo de observación a las etiquetas: | |
labels(dend) <- paste(as.character(respuesta$grupos)[order.dendrogram(dend)], | |
"(",labels(dend),")", | |
sep = "") | |
# Se poda un poco el dendrograma: | |
dend <- hang.dendrogram(dend,hang_height=0.1) | |
# se redice el tamaño de las etiquetas: | |
# dend <- assign_values_to_leaves_nodePar(dend, 0.5, "lab.cex") | |
dend <- set(dend, "labels_cex", 0.2) | |
# And plot: | |
par(mar = c(3,3,3,7)) | |
plot(dend, | |
main = "Agrupamiento BD Cualquierda o Encuesta | |
(Distancia de Jaccard)", | |
horiz = TRUE, nodePar = list(cex = .007)) | |
legend("topleft", legend = c("Grupo 1","Grupo 2","Grupo 3","Grupo 4","Grupo 5"), fill = rainbow_hcl(5)) | |
#### BTW, notice that: | |
# labels(hc_iris) # no labels, because "iris" has no row names | |
# is.integer(labels(dend)) # this could cause problems... | |
# is.character(labels(dend)) # labels are no longer "integer" | |
# Requires that the circlize package will be installed | |
#par(mar = rep(0,4)) | |
#circlize_dendrogram(dend) | |
## Loading required namespace: circlize | |
some_col_func <- function(n) rev(colorspace::heat_hcl(n, c = c(80, 30), l = c(30, 90), power = c(1/5, 1.5))) | |
scaled_respuesta<- respuesta %>% as.matrix %>% scale | |
respuesta_Singrupos<-respuesta[, !names(respuesta) %in% c("grupos")] | |
gplots::heatmap.2(as.matrix(respuesta_Singrupos), | |
main = "NO respuesta en una encuesta cualquiera", | |
srtCol = 90, | |
dendrogram = "row", | |
Rowv = dend, | |
Colv = "NA", # this to make sure the columns are not ordered | |
trace="none", | |
margins =c(5,0.1), | |
key.xlab = "Distancia Jaccard", | |
denscol = "black", | |
density.info = "density", | |
RowSideColors = rev(labels_colors(dend)), # to add nice colored strips | |
col = some_col_func) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment