Skip to content

Instantly share code, notes, and snippets.

@stevenworthington
Created July 25, 2012 19:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save stevenworthington/5205d0dbf20ba4c9d2b8 to your computer and use it in GitHub Desktop.
Save stevenworthington/5205d0dbf20ba4c9d2b8 to your computer and use it in GitHub Desktop.
Permutation test for group differences using 3D coordinate data
# ===============================================================================
# Name : centroid_perm
# Original author : Steven Worthington (sworthington@iq.harvard.edu)
# Affiliation : IQSS, Harvard University
# Date (mm/dd/yyyy) : 06/14/2012
# Version : v0.8
# Aim : exact permutation test for group differences
# ===============================================================================
# Goal:
# To demonstrate that the points (i.e., the trabecular orientation)
# of each species are occupying (or not) a different area on the sphere.
# values in both datasets are in radians (required for the Haversine formula)
# A = chimps, B = humans, C = fossils
# dataset #1
pts1 <- data.frame(
Long = c(1.0727,1.5966,0.8448,1.1049,0.7834,0.8565,1.8754,1.4727,1.6216,
1.5203,0.7419,0.3658,1.6343,1.3046,1.5075),
Lat = c(1.3990,1.2087,1.4028,1.3570,1.3266,1.3806,1.4814,1.3943,1.4372,
1.3573,1.4939,1.4475,1.4156,1.2779,1.4175),
lvl = c( rep('A', 6), rep('B', 6), rep('C', 3) )
)
# dataset #1 with humans and fossils lumped together
pts11 <- data.frame(
Long = c(1.0727,1.5966,0.8448,1.1049,0.7834,0.8565,1.8754,1.4727,1.6216,
1.5203,0.7419,0.3658,1.6343,1.3046,1.5075),
Lat = c(1.3990,1.2087,1.4028,1.3570,1.3266,1.3806,1.4814,1.3943,1.4372,
1.3573,1.4939,1.4475,1.4156,1.2779,1.4175),
lvl = c( rep('A', 6), rep('B', 6), rep('B', 3) )
)
# dataset #2
pts2 <- data.frame(
Long = c(2.5952,1.3934,1.8102,1.7794,5.0217,1.4840,6.1798,1.1547,1.6422,
5.1650,0.4375,0.5192,0.8254,6.027,1.9699),
Lat = c(1.4742,1.2668,1.2611,1.4101,1.5240,1.3420,1.5194,1.4619,1.5554,
1.5452,1.4370,1.4864,1.5296,1.4265,1.4940),
lvl = c( rep('A', 6), rep('B', 6), rep('C', 3))
)
# dataset #2 with humans and fossils lumped together
pts22 <- data.frame(
Long = c(2.5952,1.3934,1.8102,1.7794,5.0217,1.4840,6.1798,1.1547,1.6422,
5.1650,0.4375,0.5192,0.8254,6.027,1.9699),
Lat = c(1.4742,1.2668,1.2611,1.4101,1.5240,1.3420,1.5194,1.4619,1.5554,
1.5452,1.4370,1.4864,1.5296,1.4265,1.4940),
lvl = c( rep('A', 6), rep('B', 6), rep('B', 3))
)
# utility functions
# ---------------------------------------------------
# rad <- function(x) x * (pi / 180) # convert degrees into radians
# deg <- function(x) x * (180 / pi) # convert radians into degrees
# ---------------------------------------------------
long_antepodes <- function(points) {
# convert longitudinal radians larger than pi to their antepodal values
points$Long <- sapply(points$Long, function(x) { if (x > pi) { x - pi } else { x } })
return(points)
}
# ---------------------------------------------------
LatLong2cartesian <- function(long, lat) {
# convert lat/long to cartesian (x,y,z) coordinates
r <- 1
x <- r * cos(lat) * cos(long)
y <- r * cos(lat) * sin(long)
z <- r * sin(lat)
return(data.frame(x = x, y = y, z = z))
}
# ---------------------------------------------------
xyz_surface <- function(coords_ave) {
# project centroids onto the surface of the sphere
x_ave <- coords_ave[1]
y_ave <- coords_ave[2]
z_ave <- coords_ave[3]
L <- sqrt( x_ave^2 + y_ave^2 + z_ave^2)
x <- x_ave / L
y <- y_ave / L
z <- z_ave / L
return(data.frame(x = x, y = y, z = z))
}
# ---------------------------------------------------
cartesian2LatLong <- function(coordsSum) {
# convert cartesian (x,y,z) coordinates to lat/long
X <- coordsSum[1]
Y <- coordsSum[2]
Z <- coordsSum[3]
Long <- atan2(Y, X)
Hyp <- sqrt(X^2 + Y^2)
Lat <- atan2(Z, Hyp)
# r <- sqrt(X^2 + Y^2 + Z^2)
# Lat2 <- asin(Z / r)
return(c(Long, Lat))
}
# ---------------------------------------------------
hf <- function(longs, lats, cent_long, cent_lat) {
# function to calculate distance between 2 points on a sphere
r <- 1 # sphere radius
delta.long <- (longs - cent_long)
delta.lat <- (lats - cent_lat)
b <- sin(delta.lat/2)^2 + cos(lats) * cos(cent_lat) * sin(delta.long/2)^2
p <- 2 * asin(pmin(1, sqrt(b)))
dis <- r * p
return(dis)
}
# ---------------------------------------------------
upermn <- function(x) {
# function to calculate number of unique permutations
duplicates <- as.numeric(table(x))
factorial(length(x)) / prod(factorial(duplicates))
}
# ---------------------------------------------------
uniqueperm <- function(group1, group2, group1n, group2n) {
# function to calculate all unique permutations of pts$lvl
totaln <- sum(group1n, group2n)
x <- rep(group1, totaln) # vector of pts$lvl for group 1
position <- combn(totaln, group2n) # all possible ways to place group2 lvls among group1 lvls
perms <- as.data.frame( apply(position, 2, function(p) replace(x, p, group2)) ) # all perms
return(perms) # returns a df of factors, with each column a unique permutation of lvls
}
# ---------------------------------------------------
dens <- function (nullDist, obs) {
# function to plot kernal density
d <- density(nullDist, na.rm = TRUE)
r <- dist( range(nullDist) )
plot(d, type = "n", main = "Null distribution and observed value",
xlim = c( min(nullDist)-(r/10),
pmax( obs+(obs/10), max(nullDist)+(r/10) ) ) )
polygon(d, col = "cornflowerblue", border = NA)
rug(nullDist, col = "darkred", lwd = 0.7)
}
# ---------------------------------------------------
# -----------------------------------------------------------------------
latlong_centroids <- function(LL, print = FALSE){
# function to calculate centroids of lat/long coordinates
# convert longitudinal radians larger than pi to their antepodal values
LL <- long_antepodes(LL)
# rows are x,y,z, and each column corresponds to a row in pts
coords <- mapply(LatLong2cartesian, long = LL$Long, lat = LL$Lat)
coords <- matrix(unlist(coords), nrow = nrow(coords), ncol = ncol(coords))
if (print == TRUE) {
cat("\n")
cat("x, y, z Cartesian coordinates:", "\n")
print(coords)
}
# calculate centroids as row means
coordsCentroids <- data.frame(A = rowMeans(coords[, LL$lvl == "A"]),
B = rowMeans(coords[, LL$lvl == "B"]),
C = rowMeans(coords[, LL$lvl == "C"]) )
if (print == TRUE) {
cat("\n")
cat("x, y, z Cartesian centroids within sphere:", "\n")
print(coordsCentroids)
}
surfaceCentroids <- sapply(coordsCentroids, xyz_surface)
surfaceCentroids <- as.data.frame( matrix(unlist(surfaceCentroids),
nrow = nrow(surfaceCentroids), ncol = ncol(surfaceCentroids)) )
if (print == TRUE) {
cat("\n")
cat("x, y, z Cartesian centroids on sphere surface:", "\n")
print(surfaceCentroids)
}
centroids <- t( sapply(surfaceCentroids, cartesian2LatLong) )
centroids <- data.frame(Long = centroids[, 1], Lat = centroids[, 2],
lvl = colnames(coordsCentroids))
if (print == TRUE) {
cat("\n")
cat("Lat/Long centroids:", "\n")
print(centroids)
}
return(centroids)
}
# -----------------------------------------------------------------------
# -----------------------------------------------------------------------
centroid_perm <- function(pt, comparison = c("AB", "AC", "BC")) {
# function for performing permutation test on group centroid differences
if (!is.data.frame(pt))
stop(deparse(substitute(pt)), " is not a data frame.")
if (any(pt$Long | pt$Lat) > pi*2)
stop(deparse(substitute(pt)), " does not contain Lat/Long in radians.")
comparison <- match.arg(comparison)
if (comparison == "AB") {
points <- pt[pt$lvl %in% c("A", "B"), ] # subset pt by groups of interest
calc.diff <- function(c){
centDistAB <- hf(c$Long[c$lvl == "A"], c$Lat[c$lvl == "A"],
c$Long[c$lvl == "B"], c$Lat[c$lvl == "B"])
return(centDistAB)
}
}
else if (comparison == "AC") {
points <- pt[pt$lvl %in% c("A", "C"), ] # subset pt by groups of interest
calc.diff <- function(c){
centDistAC <- hf(c$Long[c$lvl == "A"], c$Lat[c$lvl == "A"],
c$Long[c$lvl == "C"], c$Lat[c$lvl == "C"])
return(centDistAC)
}
}
else if (comparison == "BC") {
points <- pt[pt$lvl %in% c("B", "C"), ] # subset pt by groups of interest
calc.diff <- function(c){
centDistBC <- hf(c$Long[c$lvl == "B"], c$Lat[c$lvl == "B"],
c$Long[c$lvl == "C"], c$Lat[c$lvl == "C"])
return(centDistBC)
}
}
# for observed (unpermuted) groups
centroids <- latlong_centroids(points, print = TRUE) # calculate Lat/Long centroids
t.obs <- calc.diff(centroids) # calculate distance between centroids
# calculate all unique permutations for null distribution
lvl_points <- droplevels(points) # drop redundent factor level
perms <- uniqueperm(levels(lvl_points$lvl)[1], levels(lvl_points$lvl)[2],
table(lvl_points$lvl)[1], table(lvl_points$lvl)[2])
# permute group assignments accross coordinates
perm.vals <- vector(mode = "numeric", length = ncol(perms))
for (i in seq_along(perms)){
points$lvl <- perms[, i]
centroids <- latlong_centroids(points) # recalculate centroids for each group assignment
perm.vals[i] <- calc.diff(centroids)
}
perm.ecdf <- ecdf(perm.vals) # cumulative distribution function of null hypothesis
p.val <- 1 - perm.ecdf(t.obs) # p-value
# pdf plot
dens(perm.vals, t.obs)
arrows(t.obs, 4, t.obs, 0, col = "darkred", lwd = 1.5)
text(t.obs, 6.5, labels = paste("Data set:", deparse(substitute(pt))), cex = 0.8)
text(t.obs, 6, labels = paste("Comparison:", comparison), cex = 0.8)
text(t.obs, 5.5, labels = paste("Permutations:", upermn(lvl_points$lvl)), cex = 0.8)
text(t.obs, 5, labels = paste("P-value =", round(p.val, 4)), cex = 0.8)
# the p-value and other info
cat("\n")
cat("Data set:", deparse(substitute(pt)), "\n")
cat("Comparison:", comparison, "\n")
cat("Permutations:", upermn(lvl_points$lvl), "\n")
cat("P-value =", p.val, "\n")
return(p.val)
}
# ------------------------------------------------------------------
# ------------------------------------------------------------------
# function calls:
pts1_ab <- centroid_perm(pts1, "AB")
pts1_ac <- centroid_perm(pts1, "AC")
pts1_bc <- centroid_perm(pts1, "BC")
#
pts2_ab <- centroid_perm(pts2, "AB")
pts2_ac <- centroid_perm(pts2, "AC")
pts2_bc <- centroid_perm(pts2, "BC")
#
pts11_ab <- centroid_perm(pts11, "AB") # humans and fossils merged
pts22_ab <- centroid_perm(pts22, "AB") # humans and fossils merged
p_values <- data.frame(comparison = c("AB", "AC", "BC", "new_AB"),
pts1 = c(pts1_ab, pts1_ac, pts1_bc, pts11_ab),
pts2 = c(pts2_ab, pts2_ac, pts2_bc, pts22_ab))
p_values # ~2 minutes
# adjust p-values for multiple comparisons
p_values$pts1_adj <- p.adjust(p = p_values$pts1, method = "holm") # sequential Bonferroni
p_values$pts2_adj <- p.adjust(p = p_values$pts2, method = "holm") # sequential Bonferroni
# ------------------------------------------------------------------
# ------------------------------------------------------------------
dens_bw<- function (nullDist, obs, p) {
# function to plot kernal density in black and white
d <- density(nullDist, na.rm = TRUE)
r <- dist( range(nullDist) )
plot(d, main = "Null distribution and observed value",
xlim = c( min(nullDist)-(r/10),
pmax( obs+(obs/10), max(nullDist)+(r/10) ) ) )
rug(nullDist, col = "gray30", lwd = 0.1)
arrows(obs, 4, obs, 0, lwd = 1.2)
text(obs, 5, labels = paste("P-value =", round(p, 4)), cex = 0.8)
}
# need to run the analysis outside of the centroid_perm() function
# to get values for perm.vals and t.obs
dens_bw(nullDist = perm.vals, obs = t.obs, p = 0.0043956)
# ------------------------------------------------------------------
# ===================================================================
# Plotting the points in 3D
# ===================================================================
# data for pts2
# ------------------------------------------
# Cartesian coordinates
coords <- structure(c(-0.0824040452325179, 0.05011425118923, 0.995338201395415,
0.0528229766618636, 0.294638046695378, 0.954147868297211, -0.0722679466402008,
0.296077263247749, 0.952426164107293, -0.033136207288388, 0.15653684561824,
0.987116106509479, -0.01423971677629, 0.0445592704815626, 0.998905251703224,
0.0196611658175792, 0.22595158795114, 0.973940100037498, -0.0510993918414077,
0.00530182936886244, 0.99867949951863, 0.0439281880452844, 0.0994078687663381,
0.994076651935047, -0.00109837695599064, 0.0153564877827651,
0.999881478901895, -0.0111924388220814, 0.0230164764392326, 0.999672431912342,
0.120833273964134, 0.0565173503067401, 0.991062616093158, 0.0731873209047366,
0.0418265579026938, 0.996440743402637, 0.0279340451173512, 0.030263287830029,
0.99915155133398, -0.139103121978486, 0.0364368128034391, 0.989607336335259,
-0.0298131604990723, 0.0706913445215261, 0.997052611084689), .Dim = c(3L,
15L))
# Cartesian centroids
coordsCentroids <- structure(list(A = c(-0.0215939622429923, 0.177979544197217,
0.97697894867502), B = c(0.0290930958824458, 0.0402377617611054,
0.996635570293951), C = c(-0.0469940791200691, 0.0457971483849981,
0.995270499584642)), .Names = c("A", "B", "C"), row.names = c(NA,
-3L), class = "data.frame")
# Cartesian centroids projected to sphere surface
surfaceCentroids <- structure(list(V1 = c(-0.0217397719917556, 0.179181322376338,
0.983575841521747), V2 = c(0.0291551465469009, 0.0403235821173697,
0.998761225796763), V3 = c(-0.0471151038906367, 0.0459150906764468,
0.997833639157123)), .Names = c("V1", "V2", "V3"), row.names = c(NA,
-3L), class = "data.frame")
# lat/long coordinates for dataset #2
pts2 <- data.frame(
Long = c(2.5952,1.3934,1.8102,1.7794,5.0217,1.4840,6.1798,1.1547,1.6422,
5.1650,0.4375,0.5192,0.8254,6.027,1.9699),
Lat = c(1.4742,1.2668,1.2611,1.4101,1.5240,1.3420,1.5194,1.4619,1.5554,
1.5452,1.4370,1.4864,1.5296,1.4265,1.4940),
lvl = c( rep('A', 6), rep('B', 6), rep('C', 3))
)
# lat/long centroids
LatLong_centroids <- structure(list(Long = c(1.69153452750487, 0.944780830082602,
2.36909295471935), Lat = c(1.38930629806737, 1.52101620931788,
1.50496102499399), lvl = structure(1:3, .Label = c("A", "B",
"C"), class = "factor")), .Names = c("Long", "Lat", "lvl"), row.names = c("V1",
"V2", "V3"), class = "data.frame")
# 3D plots
# ------------------------------------------
library(rgl)
# rgl.open()
# Cartesian coordinates
open3d()
plot3d(coords[1, ], coords[2, ], coords[3, ], col = as.numeric(pts2$lvl), size = 10)
text3d(coordsCentroids[1, ], coordsCentroids[2, ], coordsCentroids[3, ],
texts = c("A", "B", "C"), col = c(1, 2, 3), adj = 0)
text3d(surfaceCentroids[1, ], surfaceCentroids[2, ], surfaceCentroids[3, ],
texts = c("a", "b", "c"), col = c(1, 2, 3), adj = 0)
# Lat/Long plot on sphere
library(sm)
sm.sphere(lat = pts2$Lat * (180 / pi), long = pts2$Long * (180 / pi),
theta = 30, phi = 120, pch = as.numeric(pts2$lvl), col = as.numeric(pts2$lvl))
sm.sphere(lat = LatLong_centroids$Lat * (180 / pi), long = LatLong_centroids$Long * (180 / pi),
theta = 30, phi = 120, pch = c(1,2,3), col = c(1,2,3), add = TRUE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment