Skip to content

Instantly share code, notes, and snippets.

@weaselj
Created October 13, 2012 21:00
Show Gist options
  • Save weaselj/3886135 to your computer and use it in GitHub Desktop.
Save weaselj/3886135 to your computer and use it in GitHub Desktop.
OPA2centralityTemplate for Coursera SNA class
# Coursera SNA optional Programming Assignment 2 template
# load the igraph library
# you may have to install this module if you haven't already
import igraph as ig
import numpy as np
# read in the graph in GML format
# it is a sampled collection of pages from a strange set of seed categories:
# Math, Sociology, and Chemistry
# Change this to be your local file location
# if you are using Windows, replace the \ in the path with a double \, e.g.
# g = read.graph("C:\\Users\\rool\\Documents\\My Dropbox\\Education\\Social Network Analysis\\Week 3\\wikipedia.gml",format="gml")
g = ig.read('wikipedia.gml',format='gml')
# obtain summary information about the graph
print g.summary()
# obtain the undirected degree distribution (the GML file itself is directed)
degrees = g.degree()
# fit the power-law distribution. If $D < 0.05, the Kolmogorov Smirnov test tells
# us that the distribution is power-law.
# Also, look for the estimated power-law exponent $alpha and $xmin (the point at
# which you should start fitting the distribution)
# make sure you have executed all the code in plfit.R before running this function
# an explanation is here: http://tuvalu.santafe.edu/~aaronc/powerlaws/
# you can download the file directly here: http://tuvalu.santafe.edu/~aaronc/powerlaws/plfit.py
from plfit import plfit
a, xmin, L = plfit(degrees)
print 'Power-Law fit - alpha:', a, ' x-min: ', xmin, ' log-likelihood: ', L
# plot the cumulative empirical distribution
y = map(degrees.count, range(max(degrees)+1))
x = range(len(y))
cumy = map(lambda i:float(sum(y[i:]))/sum(y), x)
import matplotlib.pyplot as plt
plt.loglog(x,cumy, lw=2)
plt.xlabel("degree k")
plt.xlabel("P(x) >= k")
# overlay the fitted distribution
startval = cumy[xmin]
xfit = range(xmin, max(x))
fittedvals = map(lambda x:x**(-a+1)*startval/xmin**(-a+1), xfit)
#fittedvals = map(lambda x:x**(-a+1)*startval/xmin**(-a+1), xfit)
plt.loglog(xfit,fittedvals, 'r--', lw=4)
# calculate the in and out degrees separately
# use the degree() function and options for calculating directed degree
# see the documentation here: http://igraph.sourceforge.net/documentation.html
# see which nodes have the max out and indegree
# for example, if you were to store the outdegree in the vector od, you could look up the page name like so:
#maxodnode = g.vs[od.index(max(od))]
# find undirected betweenness scores and then nodes with the max betweenness
# warning, can be slow with large graphs, you may consider cutoff= instead
bb = g.betweenness(directed=False, cutoff=5)
maxbbnode = g.vs[bb.index(max(bb))]
print 'Max betweenness: ', maxbbnode['label']
# this high betweennes node may seem a bit surprising
# you can check out its neighbors like this.
print 'Neighbors of max betweenness: ', g.vs[g.neighborhood(maxbbnode)]['label']
# calculate Page Rank and find the node having the highest pagerank
# you'll want the $vector portion of the answer returned
# the assignment doesn't ask about this, but it's good to know how to do this...
pr = g.pagerank()
maxprnode = g.vs[pr.index(max(pr))]
print 'Max Page Rank: ', maxprnode
# calculate the Bonacich alpha-centrality of a lattice
# I'm not quite convinced that igraph calculates these correctly, but the behavior makes sense
# to me for these values of alpha
glfour = ig.Graph.Lattice([4,4])
#unfortunately igraph python doesn't implement alpha centrality either
#ac = g.alpha_centrality(alpha=-0.5)
ac_node_size = map(lambda x:20.0*x/(max(ac)-min(ac)), ac)
ig.plot(glfour, layout=glfour.layout('grid'), vertex_size=ac_node_size)
#ac = g.alpha_centrality(alpha=+0.25)
ac_node_size = map(lambda x:20.0*x/(max(ac)-min(ac)), ac)
ig.plot(glfour, layout=glfour.layout('grid'), vertex_size=ac_node_size)
# Coursera SNA optional Programming Assignment 2 template
# load the igraph library
# you may have to install this module if you haven't already
import networkx as nx
import numpy as np
# read in the graph in GML format
# it is a sampled collection of pages from a strange set of seed categories:
# Math, Sociology, and Chemistry
# Change this to be your local file location
# if you are using Windows, replace the \ in the path with a double \, e.g.
# g = read.graph("C:\\Users\\rool\\Documents\\My Dropbox\\Education\\Social Network Analysis\\Week 3\\wikipedia.gml",format="gml")
g = nx.read_gml('wikipedia.gml',relabel=True)
# obtain summary information about the graph
print nx.info(g)
# obtain the undirected degree distribution (the GML file itself is directed)
degrees = g.degree().values()
# fit the power-law distribution. If $D < 0.05, the Kolmogorov Smirnov test tells
# us that the distribution is power-law.
# Also, look for the estimated power-law exponent $alpha and $xmin (the point at
# which you should start fitting the distribution)
# make sure you have executed all the code in plfit.R before running this function
# an explanation is here: http://tuvalu.santafe.edu/~aaronc/powerlaws/
# you can download the file directly here: http://tuvalu.santafe.edu/~aaronc/powerlaws/plfit.py
from plfit import plfit
a, xmin, L = plfit(degrees)
print 'Power-Law fit - alpha:', a, ' x-min: ', xmin, ' log-likelihood: ', L
# plot the cumulative empirical distribution
y = nx.degree_histogram(g)
x = range(len(y))
cumy = map(lambda i:float(sum(y[i:]))/sum(y), x)
import matplotlib.pyplot as plt
plt.loglog(x,cumy, lw=2)
plt.xlabel("degree k")
plt.xlabel("P(x) >= k")
# overlay the fitted distribution
startval = cumy[xmin]
xfit = range(xmin, max(x))
fittedvals = map(lambda x:x**(-a+1)*startval/xmin**(-a+1), xfit)
#fittedvals = map(lambda x:x**(-a+1)*startval/xmin**(-a+1), xfit)
plt.loglog(xfit,fittedvals, 'r--', lw=4)
# calculate the in and out degrees separately
# use the degree() function and options for calculating directed degree
# see the documentation here: http://igraph.sourceforge.net/documentation.html
# see which nodes have the max out and indegree
# for example, if you were to store the outdegree in the vector od, you could look up the page name like so:
#g.nodes()[od.index(max(od))]
# find undirected betweenness scores and then nodes with the max betweenness
# warning, can be slow with large graphs, you may consider k= instead
ug = g.to_undirected()
bb = nx.betweenness_centrality(ug)
maxbbnode = g.nodes()[bb.index(max(bb))]
print 'Max betweenness: ', maxbbnode
# this high betweennes node may seem a bit surprising
# you can check out its neighbors like this.
print 'Neighbors of max betweenness: ', g.neighbors(maxbbnode)
# calculate Page Rank and find the node having the highest pagerank
# you'll want the $vector portion of the answer returned
# the assignment doesn't ask about this, but it's good to know how to do this...
import operator as op
pr = nx.pagerank(g)
maxprnode = max(pr.iteritems(), key=op.itemgetter(1))[0]
print 'Max Page Rank: ', maxprnode
# calculate the Bonacich alpha-centrality of a lattice
# I'm not quite convinced that igraph calculates these correctly, but the behavior makes sense
# to me for these values of alpha
glfour = nx.grid_2d_graph(4,4)
#unfortunately netwrokx doesn't implement alpha centrality
#ac = nx.alpha_centrality(glfour, alpha=-0.5)
acw = map(lambda x: ac[x], glfour.nodes())
ac_node_size = map(lambda x:20.0*x/(max(acw)-min(acw)), acw)
nx.draw_networkx(glfour, node_size=ac_node_size)
#ac = nx.alpha_centrality(glfour, alpha=+0.25)
acw = map(lambda x: ac[x], glfour.nodes())
ac_node_size = map(lambda x:20.0*x/(max(acw)-min(acw)), acw)
nx.draw_networkx(glfour, node_size=ac_node_size)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment