Skip to content

Instantly share code, notes, and snippets.

@Gooseus
Last active June 14, 2018 21:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Gooseus/19fcd75b47cd13cb83fe892d9f9515c9 to your computer and use it in GitHub Desktop.
Save Gooseus/19fcd75b47cd13cb83fe892d9f9515c9 to your computer and use it in GitHub Desktop.
Updated, Adapted and Commented Simulation Code for "Homophily and Contagion Are Generically Confounded in Observational Social Network Studies"
# Adapted from http://www.stat.cmu.edu/~cshalizi/homophily-confounding/asymmetry-act.R
# for R version 3.4.4 (2018-03-15)
# Simulate the asymmetric issue.
asymmetry.sim <- function (num.nodes=400,
scale=3,
offset=0,
y.noise=0.02,
friend.noms=1,
response="linear",
nominate.by="distance",
time.trend=0.3
) {
#num.nodes=400; scale=3; offset=0; y.noise=0.02; response="not-linear"; nominate.by="not-distance";friend.noms=1; time.trend=0.3
#covariates.
# define a logit^-1 function
invlogit <- function(cc) exp(cc)/(1+exp(cc))
# get a vector of as many random values as we have nodes
xx <- runif(num.nodes)
# create a distance matrix from our random vector
distances <- as.matrix(dist(xx, diag=TRUE, upper=TRUE))
#potential friends.
adj.probs <- array(rbinom(length(distances), 1, invlogit(offset-scale*distances)), dim(distances))
diag(adj.probs) <- 0
adj.probs[lower.tri(adj.probs)] <- t(adj.probs)[lower.tri(adj.probs)]
fixed.x.dist <- distances
diag(fixed.x.dist) <- Inf
nominees <- t(rbind(sapply(1:num.nodes, FUN=function(ii) {
possibles <- which(adj.probs[ii,] == 1)
return(ifelse(rep(nominate.by=="distance",friend.noms),
possibles[which(order(fixed.x.dist[ii,possibles])<=friend.noms)],
sample(possibles, size=friend.noms, prob=invlogit(-abs((xx[possibles] - 0.5))))
))
})))
#num.nodes-by-friend.nom should be the size of the object.
#true adjacency matrix.
aa.mat <- array(0, dim(distances))
for (ii in 1:num.nodes) aa.mat[ii, nominees[ii,]] <- 1
#backwards matrix.
rev.aa.mat <- t(aa.mat)
#reciprocated matrix.
#recip.aa.mat <- aa.mat*rev.aa.mat
y1 = (xx-0.5)^3+rnorm(num.nodes,0,y.noise)
y2 = y1+rnorm(num.nodes,time.trend*xx,y.noise)
infl.y1 <- aa.mat%*%y1
back.y1 <- rev.aa.mat%*%y1
#recip.y1 <- recip.aa.mat%*%y1
XX <- cbind(1, y1, infl.y1, back.y1)
trial <- lm(y2 ~ y1 + infl.y1 + back.y1)
#summary(trial)
coef.table <- summary(trial)$coefficients[,1]
v.plus.c <- c(diag(summary(trial)$cov.unscaled), summary(trial)$cov.unscaled[3,4])*summary(trial)$sigma^2
#print(summary(trial)$cov.unscaled)
z.stat <- (coef.table[3]-coef.table[4])/sqrt(v.plus.c[3]+v.plus.c[4]+2*v.plus.c[5])
out <- cbind(c(coef.table, v.plus.c, z.stat))
rownames(out) <- c("int.b", "auto.b", "infl.b", "back.b",
"int.s", "auto.s", "infl.s", "back.s",
"cov.infl.back",
"z.stat")
return(out)
}
result <- replicate(5000, asymmetry.sim(friend=1, time.trend=0.3))
#result <- replicate(50, asymmetry.sim(friend=1, time.trend=0.3))
dim(result)
pdf("timeseriesmodel-act-5000.pdf", width=12, height=6)
par(mfrow=c(2,2))
hist(result[3,1,],
main="Effect of Phantom `Influencer' on `Influenced' in Time Series",
xlab=paste("Regression Coefficient, Mean =",signif(mean(result[3,1,]),digits=4)))
hist(result[2,1,],
main="Mutual Effect of Phantom Influence",
xlab=paste("Regression Coefficient Sum, Mean =",signif(mean(result[2,1,]),digits=4)))
hist(result[4,1,],
main="Effect of Phantom `Influenced' on `Influencer' in Time Series",
xlab=paste("Regression Coefficient, Mean =",signif(mean(result[4,1,]),digits=4)))
hist(result[10,1,],
main="Directional Difference of Phantom Influence",
xlab=paste("Coefficient Difference, Mean =",signif(mean(result[10,1,]),digits=4),", Prop>0:", signif(mean(result[10,1,]>0),digits=4)))
dev.off()
par(mfrow=c(2,2))
hist(result[3,1,],
main="Effect of Phantom `Influencer' on `Influenced' in Time Series",
xlab=paste("Regression Coefficient, Mean =",signif(mean(result[3,1,]),digits=4)))
hist(result[1,1,],
main="Mutual Effect of Phantom Influence",
xlab=paste("Regression Coefficient Sum, Mean =",signif(mean(result[1,1,]),digits=4)))
hist(result[4,1,],
main="Effect of Phantom `Influenced' on `Influencer' in Time Series",
xlab=paste("Regression Coefficient, Mean =",signif(mean(result[4,1,]),digits=4)))
hist(result[10,1,],
main="Directional Difference of Phantom Influence",
xlab=paste("Coefficient Difference, Mean =",signif(mean(result[10,1,]),digits=4),", Prop>0:", signif(mean(result[10,1,]>0),digits=4)))
# Adapted from http://www.stat.cmu.edu/~cshalizi/homophily-confounding/neutral-diffusion-act.R
# for R version 3.4.4 (2018-03-15)
#ACT's attempt at the voter model diffusion problem.
# Adapted for use with igraph instead of electrograph
# also updated to fix some bugs and to fix some pieces missing from the original paper
# load igraph for graph based stuff
library(igraph) # version 1.2.0
# a layout which should increase the distance between nodes for prettier graphs
layout.by.attr <- function(graph, wc, cluster.strength=1,layout=layout.auto) {
g <- graph.edgelist(get.edgelist(graph)) # create a lightweight copy of graph w/o the attributes.
E(g)$weight <- 1
attr <- cbind(id=1:vcount(g), val=wc)
g <- g + vertices(unique(attr[,2])) + igraph::edges(unlist(t(attr)), weight=cluster.strength)
l <- layout(g, weights=E(g)$weight)[1:vcount(graph),]
return(l)
}
# function create.network -- Create the toy network model... From page 13 of paper - https://arxiv.org/pdf/1004.4704.pdf
# We work with what is, frankly, a toy model of contagion (though, see footnote 13).
# There are n individuals connected in an undirected social network.
# Each individual i has an observed trait Xi, which is an unchanging variable; in our examples this will be binary.
# The network is homoophilous on this trait, so that individual with the same value of X are more likely to be connected.
# Individuals also have a time-varying choice, variable Yi(t), which again we will take to be binary.
# The initial choices, Yi(0), are set by flipping a fair coin (i.e. an unbiased Bernoulli process), and are therefore independent of Xi.
create.network <- function(nodes=100, # n individuals
homoph.factor=1, # factor of homophily, not used, assumed to be 1
baseline.density=0.01, # the baseline probability of a connection
same.boost=0.05 # the boost to probability for sharing the homophilous trait
){
# unbiased Bernoulli process to create vector of 0 or 1, the binary, unchanging, observed trait Xi
prop <- rbinom(nodes, 1, 0.5)
# generates a matrix of probabilities based between the homophilous trait Xi - 0.06 for sharing vs 0.01 for not
prob.matrix <- 1*(array(prop,c(nodes,nodes)) == t(array(prop,c(nodes,nodes))))*same.boost + baseline.density
# keep trying to make graphs using this method until one is created that is connected
connected = FALSE
while(!connected) {
# create the network adjacency matrix
# first pull an nodes^2 array of 0 or 1 values from a binomial distribution using the probability matrix
# and map to nxn array (matrix equivalent)
sociomat <- array(rbinom(length(prob.matrix), 1, prob.matrix), dim(prob.matrix))
# transpose this array and map to boolean while adding to itself and remap to integer
# this ensures that the upper-right triangle is mapped to the lower-left for an undirected network
# but it does not seem to work...
# probably because it also transposes the bottom-left to the top-right... this double mirrors?
sociomat <- 1*(sociomat+t(sociomat)>0)
# set the diagonal equal to 0 to eliminate self-relationships in network
diag(sociomat) <- 0
fix = graph_from_adjacency_matrix(sociomat)
connected = is.connected(fix)
}
sociomat <- as_adjacency_matrix(as.undirected(fix,mode="collapse"), sparse=FALSE)
# return both the adjacency matrix and the vector of Xi traits for individuals
return(list(socio=sociomat, traits=prop))
}
# 13: Notice that the expected value of Yi_t(t + 1) is just the mean of Yj(t) for the j neighboring i_t.
# The expected value of Yi(t + 1) for all i is thus a weighted average of Yi(t) and the mean of their neighbors.
# At the level of expectations, then, this process belongs to the family of linear social influence models
# used in, e.g., Friedkin (1998).
# Choices evolve as follows:
# At each time t, we pick an individual It, uniformly at random from i ∈ { 1,...,n }, independently of all prior events.
# This individual then picks a neighbor, again uniformly at random, Jt ∈ { j : Ai_tj = 1 }, and either,
# with very high probability, copies their choice, so that Yi_t(t) = Yj_t(t-1), or,
# with very low probability, assumes the opposite choice, for Yi_t(t) = 1 - Yj_t(t-1);
# all other individuals retain their previous choices.
# This process repeats for each time step.
evolve.network <- function(sociomatrix, # our adjacency matrix
choice=rbinom(dim(sociomatrix)[1], 1, 0.5), # our initial choice matrix, Yi(0), from a Bernoulli process
save.points=1000, # number of points where we'll save our evolving choice matrix
iterations.per.save=10, # how many iterations between saving the choice matrix
prob.of.match = 0.85, # the probability that a pick with match their friend
prob.of.opp = 0.1 # the probability that a pick with match their friend
){
# number of nodes (default=100)
nn <- dim(sociomatrix)[1]
# creat an uninitialized array of 100,1000 (defaults)
choice.out <- array(NA, c(nn, save.points))
for (kk in 1:(save.points*iterations.per.save)) {
# pick individual It, uniformily at random from i ∈ { 1,...,n }, independently of all prior events.
pick <- sample(nn, 1)
# find the friends of our pick
friends = which(sociomatrix[pick,]==1)
# randomly select a friend and get their choice
friend.choice = sample(choice[friends],1)
# with high probability adopt their choice, with low probability adopt the opposite?
# original code does not have the opposite choice...
#choice[pick] <- sample(choice[friends],1)
if(prob.of.match>runif(1,0.0,1.0)) {
choice[pick] <- friend.choice
} else {
if(prob.of.opp>runif(1,0.0,1.0)) {
choice[pick] <- 1 - friend.choice
}
}
if (kk %% iterations.per.save == 0) {
# every iterations.per.save (10) iterations, save the evolving choice matrix
choice.out[,kk/iterations.per.save] <- choice
}
}
# return the array of choice matrices
return(choice.out)
}
# create the test network with a baseline density of 0.002
nettest <- create.network(baseline.density=0.002)
# create the network graph
plot.obj <- graph_from_adjacency_matrix(nettest$socio, mode="undirected", diag=FALSE)
# save the initial positions
output.traits <- plot(plot.obj, layout=layout.by.attr(plot.obj,wc=1),
vertex.color=3+nettest$traits,vertex.label=NA, vertex.size=5,
main="Network showing homophilous traits")
# plot the data with Xi trait colors
#plot(plot.obj, layout=layout.by.attr(plot.obj,wc=1),
# vertex.color=1+nettest$traits,vertex.label=NA, vertex.size=5,
# main="Network showing homophilous traits")
#output.traits
# save our current data, for parania I suppose
save(nettest, plot.obj, output.traits, file="dont-lose-this.RData")
# create our initial choice vector from another fair binomial distribution
initial.choice <- rbinom(dim(nettest$socio)[1], 1, 0.5)
output.initial <- plot(plot.obj, layout=layout.by.attr(plot.obj,wc=1),
vertex.color=1+initial.choice, vertex.label=NA, vertex.size=5,
main="Network showing initial choices")
#output.initial
# plot to PDF with colors indicating initial choice
pdf("diffusion-initial-act.pdf", width=10, height=6)
plot(plot.obj, layout=layout.by.attr(plot.obj,wc=1),
vertex.color=1+initial.choice, vertex.label=NA, vertex.size=5,
main="Network showing initial choices")
dev.off() # turn off the PDF device
# evolve our choices
choice.prog <- evolve.network(nettest$socio, choice=initial.choice)
output.midway <- plot(plot.obj, layout=layout.by.attr(plot.obj,wc=1),
vertex.color=1+choice.prog[,297], vertex.label=NA, vertex.size=5,
main="Network showing choices at midway")
# plot midway choices to PDF
pdf("diffusion-midway-act.pdf", width=10, height=6)
plot(plot.obj, layout=layout.by.attr(plot.obj,wc=1),
vertex.color=1+choice.prog[,297], vertex.label=NA, vertex.size=5,
main="Network showing choices midway through simulation")
dev.off() # turn off the PDF device
output.final <- plot(plot.obj, layout=layout.by.attr(plot.obj,wc=1),
vertex.color=1+choice.prog[,1000], vertex.label=NA, vertex.size=5,
main="Network showing choices at end")
# plot the last choices to PDF
pdf("diffusion-final-act.pdf", width=10, height=6)
plot(plot.obj, layout=layout.by.attr(plot.obj,wc=1),
vertex.color=1+choice.prog[,1000], vertex.label=NA, vertex.size=5,
main="Network showing choices at end of simulation")
dev.off() # turn off the PDF device
par(mfrow=c(2,2))
output.traits
output.initial
output.midway
output.final
# Create a test network of the same density with no homophily
#dens <- mean(nettest$socio)
dens <- edge_density(plot.obj)
connected = FALSE
while(!connected) {
test.zero <- array(rbinom(length(nettest$socio), 1, dens/2), dim(nettest$socio))
test.zero <- test.zero + t(test.zero)
# we need to also fix our test.zero plot, maybe
test.g = graph_from_adjacency_matrix(test.zero, mode="undirected", diag=FALSE)
connected = is.connected(test.g)
}
if(is.directed(test.g)) {
test.g = as.undirected(test.g,mode="collapse")
test.zero = as_adjacency_matrix(test.g, sparse=FALSE)
}
# evolve the neutral test network with the same initial choices
choice.zero <- evolve.network(test.zero, choice=initial.choice)
# find the effect of our homophilous network and the test network
prog.effect <- zero.effect <- array(NA, c(2,dim(choice.prog)[2]))
for (kk in 1:dim(choice.prog)[2]) {
res.e <- glm(choice.prog[,kk] ~ nettest$trait, family=binomial(link=logit))
prog.effect[,kk] <- summary(res.e)$coef[2,1:2]
res.z <- glm(choice.zero[,kk] ~ nettest$trait, family=binomial(link=logit))
zero.effect[,kk] <- summary(res.z)$coef[2,1:2]
}
prog.e <- prog.effect[,1:100]
zero.e <- zero.effect[,1:100]
lims <- range(c(prog.e[1,]-2*prog.e[2,], prog.e[1,]+2*prog.e[2,],
zero.e[1,]-2*zero.e[2,], zero.e[1,]+2*zero.e[2,]))
# plot the correlation of the homophilous network to PDF
pdf("correlation-homoph.pdf", width=10, height=5)
plot(c(0,1000), lims, ty="n", main="Homophilous Network", xlab="Step", ylab="Effect Size")
x.seq <- seq(10, 1000, by=10)
points(x.seq, prog.e[1,], col=2, pch=19)
segments(x.seq, prog.e[1,]-2*prog.e[2,], x.seq, prog.e[1,]+2*prog.e[2,], col=2)
abline(h=0, col=4)
dev.off()
# plot the correlation of the neutral network to PDF
pdf("correlation-neutral.pdf", width=10, height=5)
plot(c(0,1000), lims, ty="n", main="Neutral-Model Network", xlab="Step", ylab="Effect Size")
points(x.seq, zero.e[1,], col=1, pch=19)
segments(x.seq, zero.e[1,]-2*zero.e[2,], x.seq, zero.e[1,]+2*zero.e[2,], col=1)
abline(h=0, col=4)
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment