weaselj/OPA2centralityTemplateIGraph.py

## OPA2centralityTemplateIGraph.py
# Coursera SNA optional Programming Assignment 2 template

# load the igraph library
# you may have to install this module if you haven't already
import igraph as ig
import numpy as np

# read in the graph in GML format
# it is a sampled collection of pages from a strange set of seed categories:
# Math, Sociology, and Chemistry
# Change this to be your local file location
#  if you are using Windows, replace the \ in the path with a double \, e.g.
# g = read.graph("C:\\Users\\rool\\Documents\\My Dropbox\\Education\\Social Network Analysis\\Week 3\\wikipedia.gml",format="gml")

g = ig.read('wikipedia.gml',format='gml')

# obtain summary information about the graph
print g.summary()

# obtain the undirected degree distribution (the GML file itself is directed)
degrees = g.degree()

# fit the power-law distribution. If $D < 0.05, the Kolmogorov Smirnov test tells
# us that the distribution is power-law.
# Also, look for the estimated power-law exponent $alpha and $xmin (the point at
# which you should start fitting the distribution)
# make sure you have executed all the code in plfit.R before running this function
# an explanation is here: http://tuvalu.santafe.edu/~aaronc/powerlaws/
# you can download the file directly here: http://tuvalu.santafe.edu/~aaronc/powerlaws/plfit.py
from plfit import plfit
a, xmin, L = plfit(degrees)
print 'Power-Law fit - alpha:',  a, ' x-min: ', xmin, ' log-likelihood: ', L

# plot the cumulative empirical distribution
y = map(degrees.count, range(max(degrees)+1))
x = range(len(y))

cumy = map(lambda i:float(sum(y[i:]))/sum(y), x)

import matplotlib.pyplot as plt

plt.loglog(x,cumy, lw=2)
plt.xlabel("degree k")
plt.xlabel("P(x) >= k")

# overlay the fitted distribution
startval = cumy[xmin]
xfit = range(xmin, max(x))
fittedvals = map(lambda x:x**(-a+1)*startval/xmin**(-a+1), xfit)
#fittedvals = map(lambda x:x**(-a+1)*startval/xmin**(-a+1), xfit)
plt.loglog(xfit,fittedvals, 'r--', lw=4)

# calculate the in and out degrees separately
# use the degree() function and options for calculating directed degree
# see the documentation here: http://igraph.sourceforge.net/documentation.html

# see which nodes have the max out and indegree
# for example, if you were to store the outdegree in the vector od, you could look up the page name like so:
#maxodnode = g.vs[od.index(max(od))]

# find undirected betweenness scores and then nodes with the max betweenness
# warning, can be slow with large graphs, you may consider cutoff= instead
bb = g.betweenness(directed=False, cutoff=5)
maxbbnode = g.vs[bb.index(max(bb))]
print 'Max betweenness: ', maxbbnode['label']

# this high betweennes node may seem a bit surprising
# you can check out its neighbors like this.
print 'Neighbors of max betweenness: ', g.vs[g.neighborhood(maxbbnode)]['label']

# calculate Page Rank and find the node having the highest pagerank
# you'll want the $vector portion of the answer returned
# the assignment doesn't ask about this, but it's good to know how to do this...
pr = g.pagerank()
maxprnode = g.vs[pr.index(max(pr))]
print 'Max Page Rank: ', maxprnode

# calculate the Bonacich alpha-centrality of a lattice
# I'm not quite convinced that igraph calculates these correctly, but the behavior makes sense
# to me for these values of alpha
glfour = ig.Graph.Lattice([4,4])
#unfortunately igraph python doesn't implement alpha centrality either
#ac = g.alpha_centrality(alpha=-0.5)
ac_node_size = map(lambda x:20.0*x/(max(ac)-min(ac)), ac)
ig.plot(glfour, layout=glfour.layout('grid'), vertex_size=ac_node_size)

#ac = g.alpha_centrality(alpha=+0.25)
ac_node_size = map(lambda x:20.0*x/(max(ac)-min(ac)), ac)
ig.plot(glfour, layout=glfour.layout('grid'), vertex_size=ac_node_size)

## OPA2centralityTemplateNetworkX.py
# Coursera SNA optional Programming Assignment 2 template

# load the igraph library
# you may have to install this module if you haven't already
import networkx as nx
import numpy as np

# read in the graph in GML format
# it is a sampled collection of pages from a strange set of seed categories:
# Math, Sociology, and Chemistry
# Change this to be your local file location
#  if you are using Windows, replace the \ in the path with a double \, e.g.
# g = read.graph("C:\\Users\\rool\\Documents\\My Dropbox\\Education\\Social Network Analysis\\Week 3\\wikipedia.gml",format="gml")

g = nx.read_gml('wikipedia.gml',relabel=True)

# obtain summary information about the graph
print nx.info(g)

# obtain the undirected degree distribution (the GML file itself is directed)
degrees = g.degree().values()

# fit the power-law distribution. If $D < 0.05, the Kolmogorov Smirnov test tells
# us that the distribution is power-law.
# Also, look for the estimated power-law exponent $alpha and $xmin (the point at
# which you should start fitting the distribution)
# make sure you have executed all the code in plfit.R before running this function
# an explanation is here: http://tuvalu.santafe.edu/~aaronc/powerlaws/
# you can download the file directly here: http://tuvalu.santafe.edu/~aaronc/powerlaws/plfit.py
from plfit import plfit
a, xmin, L = plfit(degrees)
print 'Power-Law fit - alpha:',  a, ' x-min: ', xmin, ' log-likelihood: ', L

# plot the cumulative empirical distribution
y = nx.degree_histogram(g)
x = range(len(y))

cumy = map(lambda i:float(sum(y[i:]))/sum(y), x)

import matplotlib.pyplot as plt

plt.loglog(x,cumy, lw=2)
plt.xlabel("degree k")
plt.xlabel("P(x) >= k")

# overlay the fitted distribution
startval = cumy[xmin]
xfit = range(xmin, max(x))
fittedvals = map(lambda x:x**(-a+1)*startval/xmin**(-a+1), xfit)
#fittedvals = map(lambda x:x**(-a+1)*startval/xmin**(-a+1), xfit)
plt.loglog(xfit,fittedvals, 'r--', lw=4)

# calculate the in and out degrees separately
# use the degree() function and options for calculating directed degree
# see the documentation here: http://igraph.sourceforge.net/documentation.html

# see which nodes have the max out and indegree
# for example, if you were to store the outdegree in the vector od, you could look up the page name like so:
#g.nodes()[od.index(max(od))]

# find undirected betweenness scores and then nodes with the max betweenness
# warning, can be slow with large graphs, you may consider k= instead
ug = g.to_undirected()
bb = nx.betweenness_centrality(ug)
maxbbnode = g.nodes()[bb.index(max(bb))]
print 'Max betweenness: ', maxbbnode

# this high betweennes node may seem a bit surprising
# you can check out its neighbors like this.
print 'Neighbors of max betweenness: ', g.neighbors(maxbbnode)

# calculate Page Rank and find the node having the highest pagerank
# you'll want the $vector portion of the answer returned
# the assignment doesn't ask about this, but it's good to know how to do this...
import operator as op
pr = nx.pagerank(g)
maxprnode = max(pr.iteritems(), key=op.itemgetter(1))[0]
print 'Max Page Rank: ', maxprnode

# calculate the Bonacich alpha-centrality of a lattice
# I'm not quite convinced that igraph calculates these correctly, but the behavior makes sense
# to me for these values of alpha
glfour = nx.grid_2d_graph(4,4)
#unfortunately netwrokx doesn't implement alpha centrality
#ac = nx.alpha_centrality(glfour, alpha=-0.5)
acw = map(lambda x: ac[x], glfour.nodes())
ac_node_size = map(lambda x:20.0*x/(max(acw)-min(acw)), acw)
nx.draw_networkx(glfour, node_size=ac_node_size)

#ac = nx.alpha_centrality(glfour, alpha=+0.25)
acw = map(lambda x: ac[x], glfour.nodes())
ac_node_size = map(lambda x:20.0*x/(max(acw)-min(acw)), acw)
nx.draw_networkx(glfour, node_size=ac_node_size)
	# Coursera SNA optional Programming Assignment 2 template

	# load the igraph library
	# you may have to install this module if you haven't already
	import igraph as ig
	import numpy as np

	# read in the graph in GML format
	# it is a sampled collection of pages from a strange set of seed categories:
	# Math, Sociology, and Chemistry
	# Change this to be your local file location
	# if you are using Windows, replace the \ in the path with a double \, e.g.
	# g = read.graph("C:\\Users\\rool\\Documents\\My Dropbox\\Education\\Social Network Analysis\\Week 3\\wikipedia.gml",format="gml")

	g = ig.read('wikipedia.gml',format='gml')

	# obtain summary information about the graph
	print g.summary()

	# obtain the undirected degree distribution (the GML file itself is directed)
	degrees = g.degree()

	# fit the power-law distribution. If $D < 0.05, the Kolmogorov Smirnov test tells
	# us that the distribution is power-law.
	# Also, look for the estimated power-law exponent $alpha and $xmin (the point at
	# which you should start fitting the distribution)
	# make sure you have executed all the code in plfit.R before running this function
	# an explanation is here: http://tuvalu.santafe.edu/~aaronc/powerlaws/
	# you can download the file directly here: http://tuvalu.santafe.edu/~aaronc/powerlaws/plfit.py
	from plfit import plfit
	a, xmin, L = plfit(degrees)
	print 'Power-Law fit - alpha:', a, ' x-min: ', xmin, ' log-likelihood: ', L

	# plot the cumulative empirical distribution
	y = map(degrees.count, range(max(degrees)+1))
	x = range(len(y))

	cumy = map(lambda i:float(sum(y[i:]))/sum(y), x)

	import matplotlib.pyplot as plt

	plt.loglog(x,cumy, lw=2)
	plt.xlabel("degree k")
	plt.xlabel("P(x) >= k")

	# overlay the fitted distribution
	startval = cumy[xmin]
	xfit = range(xmin, max(x))
	fittedvals = map(lambda x:x*(-a+1)startval/xmin**(-a+1), xfit)
	#fittedvals = map(lambda x:x*(-a+1)startval/xmin**(-a+1), xfit)
	plt.loglog(xfit,fittedvals, 'r--', lw=4)

	# calculate the in and out degrees separately
	# use the degree() function and options for calculating directed degree
	# see the documentation here: http://igraph.sourceforge.net/documentation.html

	# see which nodes have the max out and indegree
	# for example, if you were to store the outdegree in the vector od, you could look up the page name like so:
	#maxodnode = g.vs[od.index(max(od))]

	# find undirected betweenness scores and then nodes with the max betweenness
	# warning, can be slow with large graphs, you may consider cutoff= instead
	bb = g.betweenness(directed=False, cutoff=5)
	maxbbnode = g.vs[bb.index(max(bb))]
	print 'Max betweenness: ', maxbbnode['label']

	# this high betweennes node may seem a bit surprising
	# you can check out its neighbors like this.
	print 'Neighbors of max betweenness: ', g.vs[g.neighborhood(maxbbnode)]['label']

	# calculate Page Rank and find the node having the highest pagerank
	# you'll want the $vector portion of the answer returned
	# the assignment doesn't ask about this, but it's good to know how to do this...
	pr = g.pagerank()
	maxprnode = g.vs[pr.index(max(pr))]
	print 'Max Page Rank: ', maxprnode

	# calculate the Bonacich alpha-centrality of a lattice
	# I'm not quite convinced that igraph calculates these correctly, but the behavior makes sense
	# to me for these values of alpha
	glfour = ig.Graph.Lattice([4,4])
	#unfortunately igraph python doesn't implement alpha centrality either
	#ac = g.alpha_centrality(alpha=-0.5)
	ac_node_size = map(lambda x:20.0*x/(max(ac)-min(ac)), ac)
	ig.plot(glfour, layout=glfour.layout('grid'), vertex_size=ac_node_size)

	#ac = g.alpha_centrality(alpha=+0.25)
	ac_node_size = map(lambda x:20.0*x/(max(ac)-min(ac)), ac)
	ig.plot(glfour, layout=glfour.layout('grid'), vertex_size=ac_node_size)