Skip to content

Instantly share code, notes, and snippets.

@adamhaber
Last active September 6, 2020 20:46
Show Gist options
  • Save adamhaber/4cabc2448bbcd7c14b8c3d50229db41f to your computer and use it in GitHub Desktop.
Save adamhaber/4cabc2448bbcd7c14b8c3d50229db41f to your computer and use it in GitHub Desktop.
wrangled vs non-wrangled ERGM
using LinearAlgebra,Distributed, Statistics, GraphRecipes, BenchmarkTools, LinearAlgebra, LightGraphs, Plots, RCall, ProfileView, FunctionWrappers, LoopVectorization, ThreadPools, FunctionWranglers
using FunctionWrappers: FunctionWrapper
import LightGraphs:
AbstractGraph,
SimpleGraph,
SimpleDiGraph,
AbstractEdge,
SimpleEdge,
has_edge,
is_directed,
edgetype,
src,
dst,
ne,
nv,
vertices,
has_vertex,
outneighbors,
inneighbors,
edges,
add_edge!,
rem_edge!,
erdos_renyi,
adjacency_matrix,
indegree,
outdegree,
density
abstract type AbstractERGM <: AbstractGraph{Int} end
struct erg{G <: AbstractArray{Bool}, F<:Function} <: AbstractERGM
m::G
fs::Vector{F}
trans::G
realnodecov::Union{Dict{String,Array{Float64,1}},Nothing}
catnodecov::Union{Dict{String,Array{Any,1}},Nothing}
edgecov::Union{Array{Array{Float64,2}},Nothing}
indegree::Vector{Int64}
outdegree::Vector{Int64}
n_funcs::Int
kstars_precomputed::Array{Float64,2}
end
struct erg_wrapped{G <: AbstractArray{Bool},T <: FunctionWrangler} <: AbstractERGM
m::G
fs::T
trans::G
realnodecov::Union{Dict{String,Array{Float64,1}},Nothing}
catnodecov::Union{Dict{String,Array{Any,1}},Nothing}
edgecov::Union{Array{Array{Float64,2}},Nothing}
indegree::Vector{Int64}
outdegree::Vector{Int64}
n_funcs::Int
kstars_precomputed::Array{Float64,2}
end
function erg(
g::G,
fs::Vector{F};
max_star=3,
realnodecov=nothing,
catnodecov=nothing,
edgecov=nothing,
) where {G <: AbstractArray{Bool}, F <: Function}
p = length(fs)
n = size(g)[1]
erg(
g,
fs,
collect(transpose(g)),
realnodecov,
catnodecov,
edgecov,
vec(sum(g, dims=1)),
vec(sum(g, dims=2)),
p,
kstarpre(n, max_star),
)
end
function erg_wrapped(
g::G,
fs::Vector{F};
max_star=3,
realnodecov=nothing,
catnodecov=nothing,
edgecov=nothing,
) where {G <: AbstractArray{Bool},F <: Function}
p = length(fs)
n = size(g)[1]
erg_wrapped(
g,
FunctionWrangler(fs),
collect(transpose(g)),
realnodecov,
catnodecov,
edgecov,
vec(sum(g, dims=1)),
vec(sum(g, dims=2)),
p,
kstarpre(n, max_star),
)
end
# Define some needed helper functions
is_directed(g::E) where {E <: AbstractERGM} = true
edgetype(g::E) where {E <: AbstractERGM} = SimpleEdge{Int}
ne(g::AbstractERGM) = sum(g.m)
nv(g::AbstractERGM) = @inbounds size(g.m)[1]
vertices(g::E) where {E <: AbstractERGM} = 1:nv(g)
has_vertex(g::E, v) where {E <: AbstractERGM} = v <= nv(g) && v > 0
indegree(g::E, v::T) where {E <: AbstractERGM,T <: Int} = @inbounds g.indegree[v]
outdegree(g::E, v::T) where {E <: AbstractERGM,T <: Int} = @inbounds g.outdegree[v]
density(x) = ne(x) / (nv(x) * (nv(x) - 1))
function has_edge(g::E, s, r) where {E <: AbstractERGM}
g.m[s, r]
end
function outneighbors(g::E, node) where {E <: AbstractERGM}
@inbounds return (v for v in 1:nv(g) if g.trans[v, node] && node != v)
end
function inneighbors(g::E, node) where {E <: AbstractERGM}
@inbounds return (v for v in 1:nv(g) if g.m[v, node] && v != node)
end
# Functions to turn edge on and off - does no checking
function add_edge!(g::E, s, r) where {E <: AbstractERGM}
g.m[s, r] = true
g.trans[r, s] = true
g.indegree[r] += 1
g.outdegree[s] += 1
return nothing
end
function rem_edge!(g::E, s, r) where {E <: AbstractERGM}
g.m[s, r] = false
g.trans[r, s] = false
g.indegree[r] -= 1
g.outdegree[s] -= 1
return nothing
end
# Toggle a single edge in-place
function edge_toggle!(g::E, s, r) where {E <: AbstractERGM}
if has_edge(g, s, r)
rem_edge!(g, s, r)
return nothing
else
add_edge!(g, s, r)
return nothing
end
end
# Iterator over edges/arcs
function edges(g::E) where {E <: AbstractERGM}
n = nv(g)
@inbounds return (SimpleEdge(i, j) for j = 1:n for i in 1:n if g.m[i, j] && i != j)
end
# Define iterator over dyads, excluding self-loops
function dyads(g::E) where {E <: AbstractGraph}
n = nv(g)
@inbounds return (SimpleEdge(i, j) for j = 1:n for i in 1:n if i != j)
end
# Toggle and edge and get the changes in total number of edges; really basic, but useful.
function delta_edge(g::E, s, r) where {E <: AbstractGraph}
if !has_edge(g, s, r)
return 1.0
else
return -1.0
end
end
function count_edges(g::E) where {E <: AbstractGraph}
sum(g.m)
end
# Toggle edge from s -> r, and get changes in count of reciprocal dyads
function delta_mutual(g::E, s, r) where {E <: AbstractGraph}
if !g.trans[s, r]
return 0.0
elseif !g.m[s, r]
return 1.0
else
return -1.0
end
end
function count_mutual(g::E) where {E <: AbstractGraph}
sum(g.m .& g.trans)
end
# delta k-stars using a precalculated table of binomials - MUCH faster
# to maintain compatibility with both erg and simplegraphs, explicitly pass the required degree integer
function delta_istar(g::E, s, r, k) where {E <: AbstractGraph}
if !has_edge(g, s, r)
return g.kstars_precomputed[indegree(g, r) + 1, k - 1]
else
return -g.kstars_precomputed[indegree(g, r) - 1 + 1, k - 1]
end
end
function delta_ostar(g::E, s, r, k) where {E <: AbstractGraph}
if !has_edge(g, s, r)
return g.kstars_precomputed[outdegree(g, s) + 1, k - 1]
else
return -g.kstars_precomputed[outdegree(g, s) - 1 + 1, k - 1]
end
end
# Change in mixed 2-stars
function delta_m2star(g::G, s, r) where {G <: AbstractGraph}
if !has_edge(g, s, r) && !has_edge(g, r, s)
return convert(Float64, indegree(g, s) + outdegree(g, r))
elseif has_edge(g, s, r) && !has_edge(g, r, s)
return -convert(Float64, indegree(g, s) + outdegree(g, r))
elseif !has_edge(g, s, r) && has_edge(g, r, s)
return convert(Float64, indegree(g, s) + outdegree(g, r) - 2)
else
return convert(Float64, -indegree(g, s) - outdegree(g, r) + 2)
end
end
# Change in transitive triads
# Works very well for arrays, horribly for edgelist
# Note: must use + and * operations to get SIMD - using && is much slower
function delta_ttriple(g::E, s, r) where {E <: AbstractGraph}
x = 0
@inbounds for i in vertices(g)
x +=
(g.trans[i, r] * g.trans[i, s]) +
(g.m[i, r] * g.trans[i, s]) +
(g.m[i, r] * g.m[i, s])
end
if !has_edge(g, s, r)
return convert(Float64, x)
else
return -convert(Float64, x)
end
end
# This works pretty good on arrays and MUCH better on edgelist
# prefer this function
function delta_ttriple2(g::G, s, r) where {G <: AbstractGraph}
c = 0
for i in outneighbors(g, r)
# c += g.trans[i, s]
c += has_edge(g, s, i)
end
for i in inneighbors(g, r)
# c += g.trans[i, s] + g.m[s, i]
c += has_edge(g, s, i) + has_edge(g, i, s)
end
if !has_edge(g, s, r)
return convert(Float64, c)
else
return -convert(Float64, c)
end
end
function delta_ctriple(g::E, s, r) where {E <: AbstractGraph}
x = 0
# for i in 1:n
@inbounds for i in vertices(g)
x += g.trans[i, r] * g.m[i, s]
# x += has_edge(g, r, i)*has_edge(g, i, s)
end
if !has_edge(g, s, r)
return convert(Float64, x)
else
return -convert(Float64, x)
end
end
# 500 node network benchmarks
# 20 us for simplegraph
# 1.8 us for array
# A combined transitive and cyclic triple function is really no faster
# than separate.
function delta_ttriplectriple(g::E, s, r) where {E <: AbstractGraph}
a = 0
b = 0
@simd for i in vertices(g)
a +=
(g.trans[i, r] * g.trans[i, s]) +
(g.m[i, r] * g.trans[i, s]) +
(g.m[i, r] * g.m[i, s])
b += (g.trans[i, r] * g.m[i, s])
end
if !has_edge(g, s, r)
return convert(Float64, a), -convert(Float64, b)
else
return -convert(Float64, a), convert(Float64, b)
end
end
# Effect of covariate on indegrees
function delta_nodeicov(g::G, s, r, cov_name)::Float64 where {G <: AbstractGraph}
if !has_edge(g, s, r)
return g.realnodecov[cov_name][r]
else
return -g.realnodecov[cov_name][r]
end
end
# Effect of covariate on outdegrees
function delta_nodeocov(g::G, s, r, cov_name)::Float64 where {G <: AbstractGraph}
if !has_edge(g, s, r)
return g.realnodecov[cov_name][s]
else
return -g.realnodecov[cov_name][s]
end
end
# Effect of dyadic distance on covariate p
function delta_nodediff(g::G, s, r, k, cov_name)::Float64 where {G <: AbstractGraph}
if !has_edge(g, s, r)
return abs(g.realnodecov[cov_name][s] - g.realnodecov[cov_name][r])^k
else
return -abs(g.realnodecov[cov_name][s] - g.realnodecov[cov_name][r])^k
end
end
function kstarpre(n::Int, k::Int)
m = zeros(n, k)
for j = 1:k
for i = 1:n
m[i, j] = convert(Float64, binomial(i - 1, j))
end
end
return m
end
function change_scores(g::erg, i, j)
x = zeros(g.n_funcs)
func_list = g.fs
for k = 1:g.n_funcs
x[k] = func_list[k](g, i, j)
end
return x
end
function change_scores(g::erg_wrapped, i, j)
res = zeros(Float64, length(g.fs))
smap!(res, g.fs, g, i, j)
return res
end
# Does 500 node graph in 0.18 ms
function subgraphcount(g::E) where {E <: AbstractGraph}
x = zeros(g.n_funcs)
g2 = deepcopy(g)
for j in vertices(g2)
for i in vertices(g2)
if i == j
continue
elseif !has_edge(g2, i, j)
continue
else
x -= change_scores(g2, i, j)
edge_toggle!(g2, i, j)
end
end
end
return x
end
# Generate random graph given a vector of ERGM parameters
# returns total change in graph statistics, number of toggles (and, optionally, the actual graph)
function rgraph(
theta::Vector{Float64},
g::E,
graphstats,
K::Int64;
return_graph=true,
) where {E <: AbstractGraph}
n = nv(g)
x = copy(graphstats)
g2 = deepcopy(g)
toggled = 0
@inbounds for k = 1:K
for j = 1:n
for i = 1:n
if i == j
continue
else
deltastats = change_scores(g2, i, j)
if log(rand()) < dot(theta, deltastats)
edge_toggle!(g2, i, j)
x += deltastats
toggled += 1
end
end
end
end
end
if return_graph == true
return x, g2, toggled
else
return x, toggled
end
end
function rgraphs(
theta::Vector{Float64},
g::E,
K::Int64,
N::Int64
) where {E <: AbstractGraph}
n = nv(g)
g2 = deepcopy(g)
graphs = Any[]
for n_samp = 1:N
@inbounds for k = 1:K
for j = 1:n
for i = 1:n
if i == j
continue
else
deltastats = change_scores(g2, i, j)
if log(rand()) < dot(theta, deltastats)
edge_toggle!(g2, i, j)
end
end
end
end
end
push!(graphs, deepcopy(g2))
end
return graphs
end
n = 100
g_orig = erdos_renyi(n, 0.05; is_directed=true)
g_adj = convert(Array{Bool,2}, collect(adjacency_matrix(g_orig)))
rand_covariate = randn(n) * 0.1
Signature = FunctionWrapper{Float64,Tuple{erg,Int,Int}}
funcs = [delta_edge,
delta_mutual,
(g, i, j) -> delta_istar(g, i, j, 2),
(g, i, j) -> delta_ostar(g, i, j, 2),
delta_m2star,
delta_ttriple,
(g, i, j) -> delta_nodeicov(g, i, j, "cov1"),
(g, i, j) -> delta_nodeocov(g, i, j, "cov1"),
]
m = erg(g_adj, funcs; realnodecov=Dict("cov1" => rand_covariate))
m_wrapped = erg_wrapped(g_adj, funcs; realnodecov=Dict("cov1" => rand_covariate))
change_scores(m, 10, 17)
@btime change_scores(m, 10, 17)
change_scores(m_wrapped, 10, 17)
@btime change_scores(m_wrapped, 10, 17)
subgraphcount(m)
@btime subgraphcount(m)
subgraphcount(m_wrapped)
@btime subgraphcount(m_wrapped)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment