rmurphy2718/DEG.py

## DEG.py
#########################################################################################
# Ryan L Murphy
# Toy illustrative implementation of DEG in
# Heath & Parikh 2011, Generating random graphs with tunable clustering coefficients
# -----------------------------------------------------------------------------------
# The algorithm may get stuck for a while, this toy implementation just exits if
# there is no progress for some number of iterations
#########################################################################################

import networkx as nx
import numpy as np
import numpy.random as npr


def deg(n, d, C, max_stuck=100):
    """
    Simple implementation of DEG on page 4583
    :param n: Number of vertices
    :param d: Degree sequence graph to be created (the degree of every vertices)
    :param C: Target clustering coefficient of output graph
    :param max_stuck: Maximum number of iterations without progress before the function just exits
    :return: Edge list of graph as a set object
    """
    V = np.arange(0, n)
    E = set()
    M = sum(d)/2.0
    rd = d.copy()
    T = C * 1.0/3.0 * sum(0.5*d*(d-1))
    # ~~~~~~~~~~~~~~~~
    # Create Triangles
    # ~~~~~~~~~~~~~~~~
    while T > 0:
        #
        # Choose 3 nodes, without replacement, from V.
        # with probabilities given in the paper
        # (add small number since python doesn't like 0 values in probs)
        #
        probs = (rd+0.01)/sum(rd + 0.01)
        v, u, w = npr.choice(V, size=3, replace=False, p=probs)
        #
        # Identify the three edges that would make a triangle
        #
        e1 = tuple(sorted([v, u]))
        e2 = tuple(sorted([v, w]))
        e3 = tuple(sorted([u, w]))
        #
        # Identify which edges have not yet been added
        #
        need_to_add = []
        v_appearances = 0
        u_appearances = 0
        w_appearances = 0

        # Check e1, e2, e3, count appearances of u, v, w
        if e1 not in E:
            need_to_add.append(e1)
            v_appearances += 1
            u_appearances += 1
        #
        if e2 not in E:
            need_to_add.append(e2)
            v_appearances += 1
            w_appearances += 1
        #
        if e3 not in E:
            need_to_add.append(e3)
            u_appearances += 1
            w_appearances += 1
        #
        # If all edges are already in E, decrease T by one, per paper
        #
        if len(need_to_add) == 0:
            T -= 1
            continue
        #
        # Determine if residual degree is sufficient
        #
        if (rd[v] < v_appearances) or (rd[u] < u_appearances) or (rd[w] < w_appearances):
            continue
        #
        # Add edges
        #
        E.update(need_to_add)
        #
        # Update values
        #
        T -= 1
        M -= len(need_to_add)
        rd[v] -= v_appearances
        rd[u] -= u_appearances
        rd[w] -= w_appearances
    # ~~~~~~~~~~~~~~~~
    # Add remaining edges
    # ~~~~~~~~~~~~~~~~
    times_stuck = 0
    while M > 0 and times_stuck < max_stuck:
        # Sample 2 nodes without replacement
        probs = (rd+0.001)/sum(rd+0.001)
        v, u = npr.choice(V, size=2, replace=False, p=probs)
        #
        # need to add this since python doesn't like zero-values
        #  of the probabilities in npr.choice
        #
        if rd[u] == 0 or rd[v] == 0:
            times_stuck += 1
        else:
            e1 = tuple(sorted([v, u]))
            #
            if e1 not in E:
                E.add(e1)
                rd[v] -= 1
                rd[u] -= 1
                M -= 1
                times_stuck = 0
            else:
                times_stuck += 1
    #
    if times_stuck == max_stuck:
        raise Warning("Algorithm was stuck for {} iterations, returning.".format(times_stuck))
    return E


if __name__ == "__main__":

    print("The target clustering coefs are 0.06 and 0.33")

    # Reproduce two of the empirical results from the paper (those shown in Table 1)
    deg_seq_1 = np.repeat(10, 1000)
    deg_seq_2 = np.repeat(4, 1000)
    edge_list_1 = deg(n=len(deg_seq_1),
                      d=deg_seq_1,
                      C=0.06)

    edge_list_2 = deg(n=len(deg_seq_2),
                      d=deg_seq_2,
                      C=0.33)

    ii = 1
    for el in [edge_list_1, edge_list_2]:
        G = nx.from_edgelist(el)
        print("The empirical degree sequence of graph {} is {}".format(ii, G.degree()))
        print("The empirical clustering coef of graph {} is {}\n".format(ii, nx.average_clustering(G)))
        ii += 1
	#########################################################################################
	# Ryan L Murphy
	# Toy illustrative implementation of DEG in
	# Heath & Parikh 2011, Generating random graphs with tunable clustering coefficients
	# -----------------------------------------------------------------------------------
	# The algorithm may get stuck for a while, this toy implementation just exits if
	# there is no progress for some number of iterations
	#########################################################################################

	import networkx as nx
	import numpy as np
	import numpy.random as npr


	def deg(n, d, C, max_stuck=100):
	"""
	Simple implementation of DEG on page 4583
	:param n: Number of vertices
	:param d: Degree sequence graph to be created (the degree of every vertices)
	:param C: Target clustering coefficient of output graph
	:param max_stuck: Maximum number of iterations without progress before the function just exits
	:return: Edge list of graph as a set object
	"""
	V = np.arange(0, n)
	E = set()
	M = sum(d)/2.0
	rd = d.copy()
	T = C * 1.0/3.0 * sum(0.5d(d-1))
	# ~~~~~~~~~~~~~~~~
	# Create Triangles
	# ~~~~~~~~~~~~~~~~
	while T > 0:
	#
	# Choose 3 nodes, without replacement, from V.
	# with probabilities given in the paper
	# (add small number since python doesn't like 0 values in probs)
	#
	probs = (rd+0.01)/sum(rd + 0.01)
	v, u, w = npr.choice(V, size=3, replace=False, p=probs)
	#
	# Identify the three edges that would make a triangle
	#
	e1 = tuple(sorted([v, u]))
	e2 = tuple(sorted([v, w]))
	e3 = tuple(sorted([u, w]))
	#
	# Identify which edges have not yet been added
	#
	need_to_add = []
	v_appearances = 0
	u_appearances = 0
	w_appearances = 0

	# Check e1, e2, e3, count appearances of u, v, w
	if e1 not in E:
	need_to_add.append(e1)
	v_appearances += 1
	u_appearances += 1
	#
	if e2 not in E:
	need_to_add.append(e2)
	v_appearances += 1
	w_appearances += 1
	#
	if e3 not in E:
	need_to_add.append(e3)
	u_appearances += 1
	w_appearances += 1
	#
	# If all edges are already in E, decrease T by one, per paper
	#
	if len(need_to_add) == 0:
	T -= 1
	continue
	#
	# Determine if residual degree is sufficient
	#
	if (rd[v] < v_appearances) or (rd[u] < u_appearances) or (rd[w] < w_appearances):
	continue
	#
	# Add edges
	#
	E.update(need_to_add)
	#
	# Update values
	#
	T -= 1
	M -= len(need_to_add)
	rd[v] -= v_appearances
	rd[u] -= u_appearances
	rd[w] -= w_appearances
	# ~~~~~~~~~~~~~~~~
	# Add remaining edges
	# ~~~~~~~~~~~~~~~~
	times_stuck = 0
	while M > 0 and times_stuck < max_stuck:
	# Sample 2 nodes without replacement
	probs = (rd+0.001)/sum(rd+0.001)
	v, u = npr.choice(V, size=2, replace=False, p=probs)
	#
	# need to add this since python doesn't like zero-values
	# of the probabilities in npr.choice
	#
	if rd[u] == 0 or rd[v] == 0:
	times_stuck += 1
	else:
	e1 = tuple(sorted([v, u]))
	#
	if e1 not in E:
	E.add(e1)
	rd[v] -= 1
	rd[u] -= 1
	M -= 1
	times_stuck = 0
	else:
	times_stuck += 1
	#
	if times_stuck == max_stuck:
	raise Warning("Algorithm was stuck for {} iterations, returning.".format(times_stuck))
	return E


	if __name__ == "__main__":

	print("The target clustering coefs are 0.06 and 0.33")

	# Reproduce two of the empirical results from the paper (those shown in Table 1)
	deg_seq_1 = np.repeat(10, 1000)
	deg_seq_2 = np.repeat(4, 1000)
	edge_list_1 = deg(n=len(deg_seq_1),
	d=deg_seq_1,
	C=0.06)

	edge_list_2 = deg(n=len(deg_seq_2),
	d=deg_seq_2,
	C=0.33)

	ii = 1
	for el in [edge_list_1, edge_list_2]:
	G = nx.from_edgelist(el)
	print("The empirical degree sequence of graph {} is {}".format(ii, G.degree()))
	print("The empirical clustering coef of graph {} is {}\n".format(ii, nx.average_clustering(G)))
	ii += 1