Skip to content

Instantly share code, notes, and snippets.

@ibartomeus
Created April 16, 2015 21:00
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save ibartomeus/321f2b3dd354141cd781 to your computer and use it in GitHub Desktop.
#Explore CIS data
#load data----
load("barometro_enero.RData")
head(barometro)
str(barometro)
head(nombres_etiquetas)
nombres_etiquetas
#select some variables related to personal well being and recode
dat <- barometro[, c("P901", "P902","P903","P904","P905","P906","P907","P908","P10")]
head(dat)
levels(dat$P901) <- c(1, 0, -1, NA, NA, NA)
levels(dat$P902) <- c(1, 0, -1, NA, NA, NA)
levels(dat$P903) <- c(1, 0, -1, NA, NA, NA)
levels(dat$P904) <- c(1, 0, -1, NA, NA, NA)
levels(dat$P905) <- c(1, 0, -1, NA, NA, NA)
levels(dat$P906) <- c(1, 0, -1, NA, NA, NA)
levels(dat$P907) <- c(1, 0, -1, NA, NA, NA)
levels(dat$P908) <- c(1, 0, -1, NA, NA, NA)
levels(dat$P10)[13:14] <- c(NA, NA)
#load a modified function to do the star plot----
#this function is borrowed from diamondplot{plotrix} but it normalize by rows in the data.frame,
#that is, by axes, making each axe maximum equal to 1. Hence, axes are not comparable among them,
#but groups (columns) are comparable within axes. Former function normalized by columns,
#making comparisions among axes meaningful for a given group, but groups within axes show completely misleading rankings.
diamondplot2 <- function (x, bg = gray(0.6), col = rainbow, name = "", ...)
{
if (!is.data.frame(x))
stop("Argument has to be a data frame")
opar <- par(no.readonly = TRUE)
par(bg = bg)
prop <- 0.8
scale <- 5
deg2rad <- function(alfa) {
(alfa * pi)/180
}
#IB do not normalize the data here
#normalizza <- function(x) {
# x/max(x)
#}
coord <- function(x, prop = 17) {
n <- length(x)
alfa <- rep(NA, n)
for (i in 1:n) {
alfa[i] <- 360/n * i
}
c.x <- sin(deg2rad(alfa)) * x * prop
c.y <- cos(deg2rad(alfa)) * x * prop
list(x = c.x, y = c.y)
}
segmenti <- function(n, prop = 1, labels = NULL, scale = 5) {
plot(0, 0, axes = FALSE, xlab = "", ylab = "", main = name,...)
for (i in 1:n) {
alfa <- 360/n * i
x1 <- c(0, sin(deg2rad(alfa)) * prop)
y1 <- c(0, cos(deg2rad(alfa)) * prop)
polygon(x1, y1)
text(sin(deg2rad(alfa)), cos(deg2rad(alfa)), labels[i],
cex = 0.8)
}
for (i in 1:n) {
alfa <- 360/n * i
for (j in 1:scale) {
xa <- (sin(deg2rad(alfa)) * (1/scale) * j * prop)
ya <- (cos(deg2rad(alfa)) * (1/scale) * j * prop)
points(xa, ya, pch = 19)
et <- round((1/scale) * j * 17)
#IB: remove lab axes
#text(sin(deg2rad(0)) + 0.1, cos(deg2rad(0)) *
#(1/scale) * j * prop, et, cex = 0.8)
}
}
}
n <- dim(x)[[1]]
k <- dim(x)[[2]]
segmenti(n, prop = prop, labels = rownames(x))
xx <- x
for (j in 1:n) {
#IB normalize the data by rows
xx[j,] <- x[j, ]/max(x[j,])
}
for (j in 1:k) {
#IB uses normalized data here
polygon(coord(xx[,j], prop = prop), lty = j,
border = col(k)[j])
}
#IB make legend smaller
#legend(-1, 1, legend = dimnames(x)[[2]], lty = 1:k,
# col = col(k), cex = 0.4)
par(opar)
}
#calculate means per political group----
library(reshape)
names(dat)
head(dat)
#make levels numerical
dat$P901 <- as.numeric(as.character(dat$P901))
dat$P902 <- as.numeric(as.character(dat$P902))
dat$P903 <- as.numeric(as.character(dat$P903))
dat$P904 <- as.numeric(as.character(dat$P904))
dat$P905 <- as.numeric(as.character(dat$P905))
dat$P906 <- as.numeric(as.character(dat$P906))
dat$P907 <- as.numeric(as.character(dat$P907))
dat$P908 <- as.numeric(as.character(dat$P908))
#do the means (that is done in a dirty way)
d1 <- cast(dat, P10 ~ ., #+ P902 + P903 + P904 + P905 + P906 + P907 + P908,
fun.aggregate = mean, na.rm = TRUE,
value = "P901")
head(d1)
d1 <- cast(dat, P10 ~ ., #+ P902 + P903 + P904 + P905 + P906 + P907 + P908,
fun.aggregate = mean, na.rm = TRUE,
value = "P901")
d2 <- cast(dat, P10 ~ ., #+ P902 + P903 + P904 + P905 + P906 + P907 + P908,
fun.aggregate = mean, na.rm = TRUE,
value = "P902")
d3 <- cast(dat, P10 ~ ., #+ P902 + P903 + P904 + P905 + P906 + P907 + P908,
fun.aggregate = mean, na.rm = TRUE,
value = "P903")
d4 <- cast(dat, P10 ~ ., #+ P902 + P903 + P904 + P905 + P906 + P907 + P908,
fun.aggregate = mean, na.rm = TRUE,
value = "P904")
d5 <- cast(dat, P10 ~ ., #+ P902 + P903 + P904 + P905 + P906 + P907 + P908,
fun.aggregate = mean, na.rm = TRUE,
value = "P905")
d6 <- cast(dat, P10 ~ ., #+ P902 + P903 + P904 + P905 + P906 + P907 + P908,
fun.aggregate = mean, na.rm = TRUE,
value = "P906")
d7 <- cast(dat, P10 ~ ., #+ P902 + P903 + P904 + P905 + P906 + P907 + P908,
fun.aggregate = mean, na.rm = TRUE,
value = "P907")
d8 <- cast(dat, P10 ~ ., #+ P902 + P903 + P904 + P905 + P906 + P907 + P908,
fun.aggregate = mean, na.rm = TRUE,
value = "P908")
d <- data.frame(P10 = d1$P10,
P901 = d1$'(all)',
P902 = d2$'(all)',
P903 = d3$'(all)',
P904 = d4$'(all)',
P905 = d5$'(all)',
P906 = d6$'(all)',
P907 = d7$'(all)',
P908 = d8$'(all)')
d
#filter data and add more intuitive col and row names-----
data <- as.data.frame(t(d[,2:9]))
colnames(data) <- d$P10
rownames(data) <- c("trabajo", "familia", "economia", "ocio", "vivienda", "salud", "educación", "amor")
data2 <- data + 0.2
data3 <- data2[,c(-11,-13)]
#plot it----
par(xpd=TRUE)
diamondplot2(data3)
legend(-1.6, 1.6, legend = dimnames(data3)[[2]], lty = 1:11,
col = rainbow(11), cex = 0.4)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment