Last active
April 2, 2019 14:41
-
-
Save benmarwick/4523018 to your computer and use it in GitHub Desktop.
ggbattleship - battleship curves with R and ggplot2, and some other methods
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# From http://cainarchaeology.weebly.com/r-package-for-seriation-via-ca.html | |
library(CAseriation) | |
data("perfect_seriation") | |
#loads the sample dataset | |
check.ca.plot(perfect_seriation,1,2) | |
#plot the Correspondence Analysis scatterplot of the first 2 dimensions in order #to inspect data structure (e.g., seeking for the horseshoe effect) | |
sort.table(perfect_seriation,1) | |
#sort the input contingency table according to the scores of rows and columns categories on the 1 CA dimension; two seriation plots and a 'battleship' plot for the sorted table are also produced | |
plot.clusters.rows(perfect_seriation,1,2) | |
#displays the CA scatterplot for row categories, with different clusters of points being given different colors | |
plot.clusters.cols(perfect_seriation,1,2) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://stat.ethz.ch/pipermail/r-help//2012-November/339973.html | |
library(plotrix) | |
vessels<-matrix(c(zero=sample(1:10,5),one2ten=sample(5:20,5), | |
ten2hundred=sample(15:36,5),hundred2thousand=sample(10:16,5)), | |
ncol=4) | |
battleship.plot(vessels,xlab="Number of passengers", | |
yaxlab=c("Barnacle","Maelstrom","Poopdeck","Seasick","Wallower"), | |
xaxlab=c("0","1-10","10-100","100-1000")) | |
kiteChart(vessels,xlab="Number of passengers",ylab="Vessel", | |
varlabels=c("Barnacle","Maelstrom","Poopdeck","Seasick","Wallower"), | |
timelabels=c("0","1-10","10-100","100-1000")) | |
# Expanding on the idea of the battleship.plot, you can draw rectangles of | |
# the right width with ggplot2 if you want. | |
Original data: | |
data2 <- read.csv(text= | |
"count, bin | |
7,0 | |
11,1-10 | |
6,11-100 | |
13,101-1000 | |
7,1001-10000 | |
3,10001-100000 | |
2,100001-1000000") | |
data2$bin <- ordered(data2$bin, | |
levels=c("0","1-10","11-100","101-1000","1001-10000","10001-100000","100001-1000000")) | |
# Define the lower and upper reaches of each bin: | |
data2$low <- c(0,1,11,101,1001,10001,100001) | |
data2$high <- c(0,10,100,1000,10000,100000,1000000) | |
# And make multiple ones for different vessels (or whatever grouping): | |
data3 <- rbind(data2, data2, data2, data2) | |
data3$vessel <- rep(c("Barnacle","Maelstrom","Poopdeck","Seasick"), | |
each=7) | |
data3$count <- abs(data2$count + sample(-5:5, 7*4, replace=TRUE)) | |
# With each bin taking the same size, regardless of its extent: | |
ggplot(data3) + | |
geom_blank(aes(x=count/2, y=bin)) + | |
geom_rect(aes(ymin=as.numeric(bin)-0.5, ymax=as.numeric(bin)+0.5, | |
xmin = -count/2, xmax = count/2)) + | |
facet_grid(~vessel) | |
# Width (height, really) of rectangles is based on range. Since | |
# logarithmic scale and exponential binning, rectangles are same height | |
# (with some gaps due to discrete nature). Since log scale, still problems | |
# with 0. | |
ggplot(data3) + | |
geom_rect(aes(ymin=low, ymax=high, xmin=-count/2, xmax=count/2)) + | |
facet_grid(~vessel) + | |
scale_y_log10() | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(reshape2) | |
library(ggplot2) | |
i <- melt(j) # j must be a dataframe with row names and col names | |
cat <- rep(1:as.integer(table(i[,1])[1]), length(unique(i[,1]) )) | |
ysep <- 0.475 # set horizontal separation between bars | |
breaks <- seq(0, round(max(i$value/2), digits = -1), length.out = 3) | |
xlabels <- c("", breaks[2:length(breaks)]*2) | |
ylabels <- row.names(j) | |
ggplot(i) + | |
geom_rect(aes(ymin = cat - 1 * ysep, ymax = cat + 1 * ysep, xmin = -value/2, xmax = value/2), group = 'variable') + | |
facet_grid(. ~variable) + | |
theme(panel.grid = element_blank()) + | |
scale_y_reverse(breaks = 1:as.integer(table(i[,1])[1]), | |
labels = ylabels) + | |
scale_x_continuous(breaks = breaks, | |
labels = xlabels) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# non-ggplot2 method for battleship plot, using the seriation package, which gives ordering functions | |
library('seriation') | |
data("Irish") | |
scale_by_rank <- function(x) apply(x, 2, rank) | |
x <- scale_by_rank(Irish[,-6]) | |
## use the the sum of absolute rank differences | |
order <- c( | |
seriate(dist(x, "minkowski", p = 1)), | |
seriate(dist(t(x), "minkowski", p = 1)) | |
) | |
## plot | |
bertinplot(x, order, options = list(panel = panel.squares)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://www.rdocumentation.org/packages/tabula/versions/1.2.0/topics/plotBar |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment