Skip to content

Instantly share code, notes, and snippets.

@kzfm
Created August 24, 2013 01:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kzfm/6325547 to your computer and use it in GitHub Desktop.
Save kzfm/6325547 to your computer and use it in GitHub Desktop.
入門機械学習読書会9章 MDS
set.seed(851982)
ex.matrix <- matrix(sample(c(-1, 0, 1), 24, replace = TRUE),
nrow = 4,
ncol = 6)
row.names(ex.matrix) <- c('A', 'B', 'C', 'D')
colnames(ex.matrix) <- c('P1', 'P2', 'P3', 'P4', 'P5', 'P6')
ex.matrix
ex.mult <- ex.matrix %*% t(ex.matrix)
ex.mult
ex.dist <- dist(ex.mult)
ex.dist
ex.mds <- cmdscale(ex.dist)
plot(ex.mds, type = 'n')
text(ex.mds, c('A', 'B', 'C', 'D'))
library('foreign')
library('ggplot2')
library(gridExtra)
setwd("/Users/kzfm/lang/rcode/ML_for_Hackers/09-MDS/")
data.dir <- file.path("data", "roll_call")
data.files <- list.files(data.dir)
data.files
rollcall.data <- lapply(data.files,
function(f)
{
read.dta(file.path(data.dir, f), convert.factors = FALSE)
})
dim(rollcall.data[[1]])
rollcall.simplified <- function(df)
{
no.pres <- subset(df, state < 99)
for(i in 10:ncol(no.pres))
{
no.pres[,i] <- ifelse(no.pres[,i] > 6, 0, no.pres[,i])
no.pres[,i] <- ifelse(no.pres[,i] > 0 & no.pres[,i] < 4, 1, no.pres[,i])
no.pres[,i] <- ifelse(no.pres[,i] > 1, -1, no.pres[,i])
}
return(as.matrix(no.pres[,10:ncol(no.pres)]))
}
rollcall.simple <- lapply(rollcall.data, rollcall.simplified)
rollcall.dist <- lapply(rollcall.simple, function(m) dist(m %*% t(m)))
rollcall.mds <- lapply(rollcall.dist,
function(d) as.data.frame((cmdscale(d, k = 2)) * -1))
congresses <- 101:111
for(i in 1:length(rollcall.mds))
{
names(rollcall.mds[[i]]) <- c("x", "y")
congress <- subset(rollcall.data[[i]], state < 99)
congress.names <- sapply(as.character(congress$name),
function(n) strsplit(n, "[, ]")[[1]][1])
rollcall.mds[[i]] <- transform(rollcall.mds[[i]],
name = congress.names,
party = as.factor(congress$party),
congress = congresses[i])
}
cong.110 <- rollcall.mds[[9]]
base.110 <- ggplot(cong.110, aes(x = x, y = y)) +
scale_size(range = c(2,2), guide = "none") +
scale_alpha(guide = "none") + theme_bw() +
labs(title = "Roll Call Vote MDS Clustering for 110th U.S. Senate") +
theme(axis.ticks = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
panel.grid.major = element_blank()) +
xlab("") +
ylab("") +
scale_shape(name = "Party", breaks = c("100", "200", "328"),
labels = c("Dem.", "Rep.", "Ind."), solid = FALSE) +
scale_color_manual(name = "Party", values = c("100" = "black",
"200" = "dimgray",
"328"="grey"),
breaks = c("100", "200", "328"),
labels = c("Dem.", "Rep.", "Ind."))
g1 <- base.110 + geom_point(aes(shape = party, alpha = 0.75, size = 2))
g2 <- base.110 + geom_text(aes(shape = party, alpha = 0.75, size = 2, label = cong.110$name))
grid.arrange(g1, g2, ncol=2, nrow=1)
all.mds <- do.call(rbind, rollcall.mds)
all.plot <- ggplot(all.mds, aes(x = x, y = y)) +
geom_point(aes(shape = party, alpha = 0.75, size = 2)) +
scale_size(range = c(2, 2), guide = "none") +
scale_alpha(guide = "none") +
theme_bw() +
labs(title="Roll Call Vote MDS Clustering for U.S. Senate (101st - 111th Congress)") +
theme(axis.ticks = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
panel.grid.major = element_blank()) +
xlab("") +
ylab("") +
scale_shape(name = "Party",
breaks = c("100", "200", "328"),
labels = c("Dem.", "Rep.", "Ind."),
solid = FALSE) +
facet_wrap(~ congress)
print(all.plot)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment