Last active
June 10, 2019 19:48
-
-
Save rmurphy2718/29e07f872005ab1cd88019d27c1a3016 to your computer and use it in GitHub Desktop.
Simple Implementation of DEG in Heath and Parikh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
######################################################################################### | |
# Ryan L Murphy | |
# Toy illustrative implementation of DEG in | |
# Heath & Parikh 2011, Generating random graphs with tunable clustering coefficients | |
# ----------------------------------------------------------------------------------- | |
# The algorithm may get stuck for a while, this toy implementation just exits if | |
# there is no progress for some number of iterations | |
######################################################################################### | |
import networkx as nx | |
import numpy as np | |
import numpy.random as npr | |
def deg(n, d, C, max_stuck=100): | |
""" | |
Simple implementation of DEG on page 4583 | |
:param n: Number of vertices | |
:param d: Degree sequence graph to be created (the degree of every vertices) | |
:param C: Target clustering coefficient of output graph | |
:param max_stuck: Maximum number of iterations without progress before the function just exits | |
:return: Edge list of graph as a set object | |
""" | |
V = np.arange(0, n) | |
E = set() | |
M = sum(d)/2.0 | |
rd = d.copy() | |
T = C * 1.0/3.0 * sum(0.5*d*(d-1)) | |
# ~~~~~~~~~~~~~~~~ | |
# Create Triangles | |
# ~~~~~~~~~~~~~~~~ | |
while T > 0: | |
# | |
# Choose 3 nodes, without replacement, from V. | |
# with probabilities given in the paper | |
# (add small number since python doesn't like 0 values in probs) | |
# | |
probs = (rd+0.01)/sum(rd + 0.01) | |
v, u, w = npr.choice(V, size=3, replace=False, p=probs) | |
# | |
# Identify the three edges that would make a triangle | |
# | |
e1 = tuple(sorted([v, u])) | |
e2 = tuple(sorted([v, w])) | |
e3 = tuple(sorted([u, w])) | |
# | |
# Identify which edges have not yet been added | |
# | |
need_to_add = [] | |
v_appearances = 0 | |
u_appearances = 0 | |
w_appearances = 0 | |
# Check e1, e2, e3, count appearances of u, v, w | |
if e1 not in E: | |
need_to_add.append(e1) | |
v_appearances += 1 | |
u_appearances += 1 | |
# | |
if e2 not in E: | |
need_to_add.append(e2) | |
v_appearances += 1 | |
w_appearances += 1 | |
# | |
if e3 not in E: | |
need_to_add.append(e3) | |
u_appearances += 1 | |
w_appearances += 1 | |
# | |
# If all edges are already in E, decrease T by one, per paper | |
# | |
if len(need_to_add) == 0: | |
T -= 1 | |
continue | |
# | |
# Determine if residual degree is sufficient | |
# | |
if (rd[v] < v_appearances) or (rd[u] < u_appearances) or (rd[w] < w_appearances): | |
continue | |
# | |
# Add edges | |
# | |
E.update(need_to_add) | |
# | |
# Update values | |
# | |
T -= 1 | |
M -= len(need_to_add) | |
rd[v] -= v_appearances | |
rd[u] -= u_appearances | |
rd[w] -= w_appearances | |
# ~~~~~~~~~~~~~~~~ | |
# Add remaining edges | |
# ~~~~~~~~~~~~~~~~ | |
times_stuck = 0 | |
while M > 0 and times_stuck < max_stuck: | |
# Sample 2 nodes without replacement | |
probs = (rd+0.001)/sum(rd+0.001) | |
v, u = npr.choice(V, size=2, replace=False, p=probs) | |
# | |
# need to add this since python doesn't like zero-values | |
# of the probabilities in npr.choice | |
# | |
if rd[u] == 0 or rd[v] == 0: | |
times_stuck += 1 | |
else: | |
e1 = tuple(sorted([v, u])) | |
# | |
if e1 not in E: | |
E.add(e1) | |
rd[v] -= 1 | |
rd[u] -= 1 | |
M -= 1 | |
times_stuck = 0 | |
else: | |
times_stuck += 1 | |
# | |
if times_stuck == max_stuck: | |
raise Warning("Algorithm was stuck for {} iterations, returning.".format(times_stuck)) | |
return E | |
if __name__ == "__main__": | |
print("The target clustering coefs are 0.06 and 0.33") | |
# Reproduce two of the empirical results from the paper (those shown in Table 1) | |
deg_seq_1 = np.repeat(10, 1000) | |
deg_seq_2 = np.repeat(4, 1000) | |
edge_list_1 = deg(n=len(deg_seq_1), | |
d=deg_seq_1, | |
C=0.06) | |
edge_list_2 = deg(n=len(deg_seq_2), | |
d=deg_seq_2, | |
C=0.33) | |
ii = 1 | |
for el in [edge_list_1, edge_list_2]: | |
G = nx.from_edgelist(el) | |
print("The empirical degree sequence of graph {} is {}".format(ii, G.degree())) | |
print("The empirical clustering coef of graph {} is {}\n".format(ii, nx.average_clustering(G))) | |
ii += 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment