Last active
February 3, 2021 05:12
-
-
Save chathurawidanage/0b4c0a9bd49a05f5efb50c25ecb5ae94 to your computer and use it in GitHub Desktop.
Graph Analytics HW1 Sample Codes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import networkx as nx | |
# Degree distribution | |
# Input: A graph | |
# Output: find degrees and plot their distribution | |
def Degree_Distribution(G): | |
degree = G.degree() | |
degree = [ deg for (v,deg) in degree ] | |
plot_distribution(degree, xlabel='Degree ($k$)', | |
ylabel='Number of nodes with degree $k$ ($N_k$)', title='Degree distributions') | |
# Connected components analysis | |
# Input: A graph | |
# Find the sizes of all connected components and plot the distribution | |
def CC_Distribution(G): | |
cc_sorted = sorted(nx.connected_components(G), key=len, reverse=True) | |
# print statistics of the top 5 components (if exist) | |
topcc = min(len(cc_sorted), 5) | |
for i in range(topcc): | |
cc = cc_sorted[i] | |
cc_graph = G.subgraph(cc) | |
n = cc_graph.number_of_nodes() | |
m = cc_graph.number_of_edges() | |
n_percent = (n/G.number_of_nodes()) * 100 | |
print("Largest component #", i+1) | |
print("Number of vertices:", n, " (", n_percent, ")", "\nNumber of edges: ", m, "\n") | |
cc_sizes = [len(c) for c in cc_sorted] | |
plot_distribution(cc_sizes, xlabel='Weakly connected component size', | |
ylabel='Count', title='Connected component size distributions') | |
# Clustering coefficient analysis | |
# Input: A graph | |
# Find the local clustering coefficient of all vertices and plot distribution | |
def Clustering_Analysis(G): | |
clust = nx.clustering(G) | |
local_clust_coefficient = [ v for v in clust.values() ] | |
avg_clust_coefficient = sum(local_clust_coefficient)/G.number_of_nodes() | |
print("Average clustering coefficient: ", avg_clust_coefficient) | |
#plot the distribution of clustering coefficient | |
plot_distribution(local_clust_coefficient, xlabel='Clustering coefficient', | |
ylabel='Number of vertices', title='Clustering coefficient distributions', | |
xlog=False, ylog=True, showLine=False) | |
# Shortest path analysis | |
# Input: A graph | |
# Find shortest paths in the largest 5 componets and plot distribution | |
def ShortestPaths_Analysis(G): | |
cc_sorted = sorted(nx.connected_components(G), key=len, reverse=True) | |
# find shortest paths in top 5 components | |
topcc = min(len(cc_sorted), 5) | |
for i in range(topcc) : | |
cc = cc_sorted[i] | |
cc_graph = G.subgraph(cc) | |
if(len(cc)>30000): | |
print("This component is too large. Using ten single-source shortest paths.") | |
cc = list(cc) | |
cc_graph = G.subgraph(cc) | |
shortest_path_lens = [] | |
for i in range(10): | |
length = nx.single_source_shortest_path_length(cc_graph, cc[i]) | |
shortest_path_lens += [ v for v in length.values() ] | |
else: | |
all_shortest_path_dict = dict(nx.all_pairs_shortest_path_length(cc_graph)) | |
shortest_path_lens = [] | |
for val1 in all_shortest_path_dict.values(): | |
for val in val1.values(): | |
shortest_path_lens.append(val) | |
plot_distribution(shortest_path_lens, xlabel='Shortest path lengths (hops)', | |
ylabel='Number of paths', title='Shortest path lengths distributions', | |
xlog=False, ylog=False, showLine=True, intAxis=True) | |
# Helper functions for plotting | |
import matplotlib.pyplot as plt | |
from matplotlib.ticker import MaxNLocator | |
plt.style.use('ggplot') | |
#plt.style.use('seaborn-ticks') | |
#plt.style.use('seaborn-notebook') | |
plt.rcParams['lines.linewidth']=3 | |
plt.rcParams['xtick.labelsize']=12 | |
plt.rcParams['ytick.labelsize']=12 | |
plt.rcParams['axes.labelsize']=14 | |
def plot_distribution (data, xlabel='', ylabel='', title='', xlog = True, ylog= True, showLine=False, intAxis=False) : | |
counts = {} | |
for item in data : | |
if item not in counts : | |
counts [ item ] = 0 | |
counts [ item ] += 1 | |
counts = sorted ( counts.items () ) | |
fig = plt.figure () | |
ax = fig.add_subplot (111) | |
ax.scatter ([ k for (k , v ) in counts ] , [ v for (k , v ) in counts ]) | |
if(len(counts)<20): # for tiny graph | |
showLine=True | |
if showLine==True: | |
ax.plot ([ k for (k , v ) in counts ] , [ v for (k , v ) in counts ]) | |
if xlog == True: | |
ax.set_xscale ( 'log') | |
if ylog == True: | |
ax.set_yscale ( 'log') | |
if intAxis == True: | |
gca = fig.gca() | |
gca.xaxis.set_major_locator(MaxNLocator(integer=True)) | |
ax.set_xlabel ( xlabel) | |
ax.set_ylabel ( ylabel ) | |
plt.title ( title ) | |
#fig.savefig ( "degree_distribution.png" ) | |
def plot_degree_bar (G) : | |
degs = {} | |
for n in G.nodes () : | |
deg = G.degree ( n ) | |
if deg not in degs : | |
degs [ deg ] = 0 | |
degs [ deg ] += 1 | |
items = sorted ( degs.items () ) | |
fig = plt.figure () | |
ax = fig.add_subplot (111) | |
print(items) | |
ax.bar([ k for (k , v ) in items ] , [ v for (k , v ) in items ]) | |
ax.set_xlabel ( 'Degree ($k$)') | |
ax.set_ylabel ( 'Number of nodes with degree $k$ ($N_k$)') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment