-
-
Save Selbosh/71ed6418c93bab43e8abac310f5a3964 to your computer and use it in GitHub Desktop.
Example of fitting a log-linear model to journals in communities
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
set.seed(2017) | |
C <- scrooge::citations | |
n <- nrow(C) | |
#G <- sort(sample.int(n, floor(n/2))) # split into two communities | |
#G <- list(c1 = G, c2 = setdiff(1:n, G)) | |
G <- split(1:n, cutree(hclust(as.dist(cor(C))), h = .85)) # hierarchical clustering into 4 groups | |
ngroups <- length(G) | |
Y <- as.matrix(cbind(win1 = t(C)[lower.tri(C)], | |
win2 = C[lower.tri(C)])) | |
npairs <- nrow(Y) | |
get_comm <- function(xs, comm) { | |
# Given a vector of journals `xs` and a membership list `comm`, | |
# return the community ID of each element of `xs` | |
in_list <- function(e, l) which(sapply(l, function(li) e %in% li)) | |
idx <- sapply(xs, function(x) in_list(x, comm)) | |
as.vector(idx) | |
} | |
X <- matrix(0, npairs, n + ngroups) | |
X[cbind(1:npairs, col(C)[lower.tri(C)])] <- 1 # winning journal | |
X[cbind(1:npairs, row(C)[lower.tri(C)])] <- -1 # losing journal | |
X[cbind(1:npairs, n + get_comm(col(C)[lower.tri(C)], G))] <- 1 # winning community | |
X[cbind(1:npairs, n + get_comm(row(C)[lower.tri(C)], G))] <- -1 # losing community | |
colnames(X) <- varNames <- c(colnames(C), paste0('Community', 1:ngroups)) | |
X <- X[, -1] # First variable is redundant | |
model <- glm(Y ~ 0 + X, family = binomial) | |
summary(model) | |
# Get vector of ALL coefficients s.t. mean = 0 | |
mu <- setNames(c(0, coef(model)), varNames) | |
mu <- mu - mean(mu) | |
# Convert into alpha(i, c) parametrisation | |
alpha <- outer(mu[1:n], mu[(n+1):(n+ngroups)], '-') | |
(alpha <- alpha - mean(alpha)) | |
#----------------------------------------------------------- | |
# The above design matrix assumes additive effects, | |
# which isn't really what we discussed/are interested in. | |
# What if there are actually n * ngroups coefs? | |
# ---------------------------------------------------------- | |
X2 <- matrix(0, npairs, n * ngroups) | |
X2[cbind(1:npairs, col(C)[lower.tri(C)] + n * (get_comm(row(C)[lower.tri(C)], G) - 1))] <- 1 # winning journal : losing community | |
X2[cbind(1:npairs, row(C)[lower.tri(C)] + n * (get_comm(col(C)[lower.tri(C)], G) - 1))] <- -1 # losing journal : winning community | |
colnames(X2) <- varNames2 <- c(outer(colnames(C), paste0('C', 1:ngroups), paste, sep = '__')) | |
X2 <- X2[, -1] # get rid of (at least) one redundant column (?) | |
model2 <- glm(Y ~ 0 + X2, family = binomial) ## Not sure if this really works yet. Try `?alias` to find redundancies | |
summary(model2) | |
mu2 <- setNames(c(0, coef(model2)), varNames2) | |
(mu2 <- mu2 - mean(mu2, na.rm = TRUE)) ## Possibly meaningless due to many NAs | |
# NB: this method doesn't like communities of size 1, because they will be entirely redundant |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment