Skip to content

Instantly share code, notes, and snippets.

@briatte
Last active September 18, 2015 16:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save briatte/e31dc5ec6af5651d686e to your computer and use it in GitHub Desktop.
Save briatte/e31dc5ec6af5651d686e to your computer and use it in GitHub Desktop.
library(dplyr)
edges = bind_rows(
data_frame(i = "A", j = c("A", "B", "C"), w = 2),
data_frame(i = "B", j = c("B", "A"), w = 1),
data_frame(i = "A", j = c("A", "C"), w = 1),
data_frame(i = "B", j = c("B", "C", "D", "E"), w = 3)
)
# first author self-loops, with counts of co-authors
self = filter(edges, i == j)
# count number of texts per first author
n_au = table(self$i)
# remove self-loops from directed edge list
edges = filter(edges, i != j)
# count number of texts coauthored per co-author
n_co = table(edges$j)
# collapse directed ties
edges$ij = apply(edges[, 1:2 ], 1, paste0, collapse = "->")
# raw edge counts
raw = table(edges$ij)
# Newman-Fowler weights
edges = aggregate(w ~ ij, function(x) sum(1 / x), data = edges)
# re-expand to edge list
edges = data_frame(i = gsub("(.*)->(.*)", "\\1", edges$ij),
j = gsub("(.*)->(.*)", "\\2", edges$ij),
raw = as.vector(raw[ edges$ij ]), # raw edge counts
nfw = edges$w)
# Gross-Kirkland-Shalizi weights
edges = left_join(edges, aggregate(w ~ i, function(x) sum(1 / x), data = self))
edges$gsw = edges$nfw / edges$w
# sanity check
stopifnot(edges$gsw >= 0 & edges$gsw <= 1)
# final edge list: first author, co-author, edge weights
edges = select(edges, -w)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment