Skip to content

Instantly share code, notes, and snippets.

@Selbosh
Last active May 31, 2017 16:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Selbosh/71ed6418c93bab43e8abac310f5a3964 to your computer and use it in GitHub Desktop.
Save Selbosh/71ed6418c93bab43e8abac310f5a3964 to your computer and use it in GitHub Desktop.
Example of fitting a log-linear model to journals in communities
set.seed(2017)
C <- scrooge::citations
n <- nrow(C)
#G <- sort(sample.int(n, floor(n/2))) # split into two communities
#G <- list(c1 = G, c2 = setdiff(1:n, G))
G <- split(1:n, cutree(hclust(as.dist(cor(C))), h = .85)) # hierarchical clustering into 4 groups
ngroups <- length(G)
Y <- as.matrix(cbind(win1 = t(C)[lower.tri(C)],
win2 = C[lower.tri(C)]))
npairs <- nrow(Y)
get_comm <- function(xs, comm) {
# Given a vector of journals `xs` and a membership list `comm`,
# return the community ID of each element of `xs`
in_list <- function(e, l) which(sapply(l, function(li) e %in% li))
idx <- sapply(xs, function(x) in_list(x, comm))
as.vector(idx)
}
X <- matrix(0, npairs, n + ngroups)
X[cbind(1:npairs, col(C)[lower.tri(C)])] <- 1 # winning journal
X[cbind(1:npairs, row(C)[lower.tri(C)])] <- -1 # losing journal
X[cbind(1:npairs, n + get_comm(col(C)[lower.tri(C)], G))] <- 1 # winning community
X[cbind(1:npairs, n + get_comm(row(C)[lower.tri(C)], G))] <- -1 # losing community
colnames(X) <- varNames <- c(colnames(C), paste0('Community', 1:ngroups))
X <- X[, -1] # First variable is redundant
model <- glm(Y ~ 0 + X, family = binomial)
summary(model)
# Get vector of ALL coefficients s.t. mean = 0
mu <- setNames(c(0, coef(model)), varNames)
mu <- mu - mean(mu)
# Convert into alpha(i, c) parametrisation
alpha <- outer(mu[1:n], mu[(n+1):(n+ngroups)], '-')
(alpha <- alpha - mean(alpha))
#-----------------------------------------------------------
# The above design matrix assumes additive effects,
# which isn't really what we discussed/are interested in.
# What if there are actually n * ngroups coefs?
# ----------------------------------------------------------
X2 <- matrix(0, npairs, n * ngroups)
X2[cbind(1:npairs, col(C)[lower.tri(C)] + n * (get_comm(row(C)[lower.tri(C)], G) - 1))] <- 1 # winning journal : losing community
X2[cbind(1:npairs, row(C)[lower.tri(C)] + n * (get_comm(col(C)[lower.tri(C)], G) - 1))] <- -1 # losing journal : winning community
colnames(X2) <- varNames2 <- c(outer(colnames(C), paste0('C', 1:ngroups), paste, sep = '__'))
X2 <- X2[, -1] # get rid of (at least) one redundant column (?)
model2 <- glm(Y ~ 0 + X2, family = binomial) ## Not sure if this really works yet. Try `?alias` to find redundancies
summary(model2)
mu2 <- setNames(c(0, coef(model2)), varNames2)
(mu2 <- mu2 - mean(mu2, na.rm = TRUE)) ## Possibly meaningless due to many NAs
# NB: this method doesn't like communities of size 1, because they will be entirely redundant
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment