Skip to content

Instantly share code, notes, and snippets.

@semenoffalex
Created June 2, 2022 16:24
Show Gist options
  • Save semenoffalex/c6e1bf5687e28ca55af9ebbdbd04ec38 to your computer and use it in GitHub Desktop.
Save semenoffalex/c6e1bf5687e28ca55af9ebbdbd04ec38 to your computer and use it in GitHub Desktop.
Ben Lind's script for backbone projection applied to Marvel Universe Social Network
# The following script was written by Benjamin Lind (@benjaminlind)
# It implements methods, described in the paper:
# Neal, Zachary. "The backbone of bipartite projections: Inferring relationships from co-authorship, co-sponsorship, co-attendance and other co-behaviors." Social Networks 39 (2014): 84-97.
# https://www.sciencedirect.com/science/article/abs/pii/S0378873314000343
#
# The Marvel Universe Social Network
# Dataset created by Cesc Rosselló, Ricardo Alberich, and Joe Miro
# The network data is bipartite. There are characters and the books they appear within.
# Information on the data: http://bioinfo.uib.es/~joemiro/marvel.html
# Primary source: http://www.chronologyproject.com/
setwd("C:/Users/Alexander/Dropbox/Events/TMCA")
library(igraph); library(Rcpp); library(compiler)
marvnet <- read.graph("http://bioinfo.uib.es/~joemiro/marvel/porgat.txt", format = "pajek")
save(marvnet, file="marvnet.Rdata")
typeof(marvnet)
# Count the number of characters and books
n.books <- sum(get.vertex.attribute(marvnet, "type"))
n.chars <- sum(get.vertex.attribute(marvnet, "type")==FALSE)
# How many books are each character in?
V(marvnet)$n.books <- rep(NA, vcount(marvnet))
V(marvnet)$n.books[which(V(marvnet)$type==FALSE)] <- degree(marvnet, which(V(marvnet)$type==FALSE))
# Create a one-mode projection for relationships between characters
cppFunction('IntegerMatrix all_pairs_Cpp(IntegerVector y){
int ylen = y.size();
int outrows = ylen * (ylen-1) / 2;
IntegerMatrix out(outrows, 2);
int outrowcounter = 0;
for(int i = 0; i < (ylen - 1); ++i){
for(int j = (i+1); j < ylen; ++j){
out(outrowcounter, 0) = y[i];
out(outrowcounter, 1) = y[j];
outrowcounter++;
}
}
return out;
}')
all_pairs <- function(x){
a <- 1:length(x)
outmat <- all_pairs_Cpp(a)
outmat <- data.frame(outmat)
outmat[,1] <- x[outmat[,1]]
outmat[,2] <- x[outmat[,2]]
return(outmat)
}
all_pairs <- cmpfun(all_pairs)
charel <- all_pairs(V(marvnet)$id[which(V(marvnet)$type==FALSE)])
charel <- data.frame(From = charel[, 1], To = charel[, 2], stringsAsFactors=FALSE)
charel <- charel[-which(charel$From == charel$To),]
rm(all_pairs, all_pairs_Cpp)
# How many interactions would we expect by chance?
cppFunction('NumericVector expected_interaction(IntegerVector x, IntegerVector y, int totalinteraction){
int n = x.size();
NumericVector out(n);
for(int i = 0; i < n; ++i){
out[i] = x[i] * y[i] / totalinteraction;
}
return out;
}')
charel[,"Prob"] <- expected_interaction(V(marvnet)$n.books[match(charel$From, V(marvnet)$id)], V(marvnet)$n.books[match(charel$To, V(marvnet)$id)], n.books)
rm(expected_interaction)
# How many interactions do we observe?
marvneigh <- neighborhood(marvnet, order = 1, nodes = which(V(marvnet)$type==FALSE))
marvneigh <- lapply(marvneigh, function(x) return(as.integer(x[-1])))
names(marvneigh) <- V(marvnet)$id[which(V(marvnet)$type==FALSE)]
marvneighsize <- sapply(marvneigh, length)
charel[,"FromInt"] <- match(charel$From, names(marvneigh))
charel[,"ToInt"] <- match(charel$To, names(marvneigh))
cppFunction('IntegerVector n_common_elements(IntegerVector ind1, IntegerVector ind2, List masterlist){
int n = ind1.size();
IntegerVector out(n);
for(int i = 0; i < n; ++i){
int aind = ind1[i]-1;
int bind = ind2[i]-1;
IntegerVector a = masterlist[aind];
IntegerVector b = masterlist[bind];
int countcommon = 0;
for(int j = 0; j < a.size(); ++j){
if(is_true(any(b == a[j]))){
countcommon++;
}
}
out[i] = countcommon;
}
return out;
}')
# This line might take a couple minutes.
charel[,"Obs"] <- n_common_elements(charel$FromInt, charel$ToInt, marvneigh)
rm(marvneigh, marvneighsize, n_common_elements) # Delete unnecessary objects
charel <- charel[, -c(4, 5)] # Drop unnecessary columns
cppFunction('NumericVector z_score_fun(IntegerVector obs, NumericVector expected){
int n = obs.size();
NumericVector out(n);
double avgexp = mean(expected);
double sdobs;
NumericVector obsminusexpsq(n);
for(int i = 0; i < n; ++i){
obsminusexpsq[i] = pow(obs[i] - avgexp, 2);
}
sdobs = sqrt(sum(obsminusexpsq) / n);
for(int j = 0; j < n; ++j){
out[j] = (obs[j] - expected[j]) / sdobs;
}
return out;
}')
charel[, "ObsZ"] <- z_score_fun(charel$Obs, charel$Prob)
rm(z_score_fun)
cppFunction('NumericVector log_transform(NumericVector x){
int n = x.size();
NumericVector out(n);
for(int i = 0; i < n; ++i){
if(x[i] == 0){
out[i] = 0;
}
if(x[i] > 0){
out[i] = log(x[i]);
}
if(x[i] < 0){
out[i] = (-1) * log(abs(x[i]));
}
}
return out;
}')
alpha = .00001; criticalvalue <- qnorm(alpha, lower.tail = FALSE)
charel$ObsZlog <- log_transform(charel$ObsZ)
charel[, "Keep"] <- (abs(charel$ObsZ) >= criticalvalue) & (abs(charel$ObsZlog) >= criticalvalue)
rm(alpha, criticalvalue, log_transform)
charelbb <- charel[which(charel$Keep == TRUE), ]
charelbb <- charelbb[, c("From", "To", "ObsZlog")]
charelbb <- graph.data.frame(charelbb, directed = FALSE)
charelbb <- simplify(charelbb)
cleanupnames <- function(y){
y <- strsplit(y, "/", fixed = TRUE)[[1]][1]
y <- strsplit(y, " [", fixed = TRUE)[[1]][1]
y <- gsub(".", "", y, fixed = TRUE)
ylen <- nchar(y)
lastchar <- substr(y, ylen, ylen)
y <- ifelse(lastchar == " ", substr(y, 1, ylen - 1), y)
return(y)
}
cleanupnames <- cmpfun(cleanupnames)
V(charelbb)$name <- sapply(1:vcount(charelbb), function(i) return(cleanupnames(V(charelbb)$name[i])))
rm(cleanupnames)
lo.charelbb <- layout.fruchterman.reingold(charelbb, params = list(niter = 5000, area = vcount(charelbb)^3))
plot(charelbb, vertex.size = 2 + 3 * betweenness(charelbb)/900, vertex.label.cex = .5, layout = lo.charelbb, vertex.label.family = "sans", vertex.color = "#F297A0", vertex.frame.color = "#6F95A2", vertex.label.color = "#000000", edge.color = "#B6BFBE")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment