Skip to content

Instantly share code, notes, and snippets.

@diogojc
Created November 25, 2011 19:55
Show Gist options
  • Save diogojc/1394300 to your computer and use it in GitHub Desktop.
Save diogojc/1394300 to your computer and use it in GitHub Desktop.
parameterizing and plotting Power Laws in python (Zipf example)
import numpy as np
def powerLaw(y, x):
"""
'When the frequency of an event varies as power of some attribute of that
event the frequency is said to follow a power law.' (wikipedia)
This is represented by the following equation, where c and alpha are
constants:
y = c . x ^ alpha
Args
--------
y: array with frequency of events >0
x: numpy array with attribute of events >0
Output
--------
(c, alpha)
c: the maximum frequency of any event
alpha: defined by (Newman, 2005 for details):
alpha = 1 + n * sum(ln( xi / xmin )) ^ -1
"""
c = 0
alpha = .0
if len(y) and len(y)==len(x):
c = max(y)
xmin = float(min(x))
alpha = 1 + len(x) * pow(sum(np.log(x/xmin)),-1)
return (c, alpha)
import matplotlib.pyplot as plt
def plotPowerLaws(y, x, c=[], alpha=[]):
"""
Plots the relationship between x and y and a fitted power law on LogLog
scale.
Args
--------
y: array with frequency of events >0
x: array with attribute of events >0
c: array of cs for various power laws
alpha: array of alphas for various power laws
"""
plt.figure()
plt.loglog()
plt.plot(x,
y,
'r+')
for _c, _alpha in zip(c,alpha):
plt.plot( (1, max(x)),
(_c, _c * pow(max(x), _alpha)),
label='~x^%.2f' % _alpha)
plt.legend()
plt.show()
if __name__ == '__main__':
"""
Checking Zipfs law, where the frequency and rank of a word follow a
specific power law, using the nltk genesis text in english.
"""
from collections import defaultdict
import numpy as np
from nltk.corpus import genesis
words = genesis.raw('english-web.txt').split(' ')
y = defaultdict(int)
for i in words:
y[i]+=1
y = sorted(y.values(),reverse=True)
x = np.array(xrange(1,len(y)+1))
c, alpha = powerLaw(y, x)
print 'According to Zipfs law %.2f should be close to 1.' % alpha
plotPowerLaws(y, x, [c,c], [-1,-alpha])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment