Skip to content

Instantly share code, notes, and snippets.

@vedantk
Created February 14, 2011 06:54
Show Gist options
  • Save vedantk/825572 to your computer and use it in GitHub Desktop.
Save vedantk/825572 to your computer and use it in GitHub Desktop.
Traces the bloodlines of academic royalty by sifting through Wikipedia.
#!/usr/bin/python
# Traces the bloodlines of academic royalty by sifting through Wikipedia.
import re
import urllib2
from BeautifulSoup import BeautifulSoup
opener = urllib2.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
advisor = re.compile("advisor", re.IGNORECASE)
prefix = "http://en.wikipedia.org"
def procLink(link, level=0):
try:
page = opener.open(link).read()
soup = BeautifulSoup(page)
except:
return []
name = soup.find(id="firstHeading").string
vcard = soup.find("table", "infobox vcard")
if not vcard:
return []
row = vcard.find("tr", text=advisor)
if not row:
return name
mentors = row.findNext("td").findChildren("a")
refs = [prefix + elt['href'] for elt in mentors]
print level * ">> ", name
children = [procLink(ref, level + 1) for ref in refs]
return [name] + [children]
if __name__ == '__main__':
print procLink("http://en.wikipedia.org/wiki/Feynman")
print procLink("http://en.wikipedia.org/wiki/John_McCarthy_(computer_scientist)")
sample = """
Richard Feynman
>> John Archibald Wheeler
>> >> Karl Herzfeld
>> >> >> Friedrich Hasenöhrl
>> >> >> >> Franz S. Exner
>> >> >> >> >> August Kundt
>> >> >> >> >> >> Heinrich Gustav Magnus
>> >> >> >> >> >> >> Eilhard Mitscherlich
>> >> >> >> >> >> >> >> Friedrich Stromeyer
>> >> >> >> >> >> >> >> >> Johann Friedrich Gmelin
>> >> >> >> >> >> >> >> >> >> Philipp Friedrich Gmelin
>> >> >> >> >> >> >> >> >> >> >> Burchard Mauchart
>> >> >> >> >> >> >> >> >> >> >> >> Elias Rudolph Camerarius Jr.
>> >> >> >> >> >> >> >> >> >> >> >> >> Elias Rudolph Camerarius, Sr.
>> >> >> >> >> >> >> >> >> >> >> >> >> >> Georg Balthasar Metzger
>> >> >> >> >> >> >> >> >> >> >> >> >> >> >> Johann Georg Macasius
>> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> Johannes Musaeus
>> >> >> >> >> >> >> >> >> >> >> >> >> >> >> Emmanuel Stupanus
>> >> >> >> >> >> >> >> >> Louis Nicolas Vauquelin
>> >> >> >> >> >> >> >> >> >> Antoine François, comte de Fourcroy
[u'Richard Feynman', [[u'John Archibald Wheeler', [[u'Karl Herzfeld', [[u'Friedrich Hasen\xf6hrl', [[u'Franz S. Exner', [[u'August Kundt', [[u'Heinrich Gustav Magnus', [[u'Eilhard Mitscherlich', [[u'Friedrich Stromeyer', [[u'Johann Friedrich Gmelin', [[u'Philipp Friedrich Gmelin', [[u'Burchard Mauchart', [[u'Elias Rudolph Camerarius Jr.', [[u'Elias Rudolph Camerarius, Sr.', [[u'Georg Balthasar Metzger', [[u'Johann Georg Macasius', [[u'Johannes Musaeus', [[]]], []]], [u'Emmanuel Stupanus', [[]]]]]]]]]]]]], []]], [u'Louis Nicolas Vauquelin', [[u'Antoine Fran\xe7ois, comte de Fourcroy', [[]]]]]]]]]]]]]]]]]]]]]]]
John McCarthy (computer scientist)
>> Solomon Lefschetz
>> >> William Edward Story
>> >> >> Felix Klein
>> >> >> >> Julius Plücker
>> >> >> >> >> Christian Ludwig Gerling
>> >> >> >> >> >> Carl Friedrich Gauss
>> >> >> >> >> >> >> Johann Friedrich Pfaff
>> >> >> >> >> >> >> >> Abraham Gotthelf Kästner
>> >> >> >> >> >> >> >> >> Christian August Hausen
>> >> >> >> >> >> >> >> >> >> Johann Christoph Wichmannshausen
>> >> >> >> >> >> >> >> >> >> >> Otto Mencke
>> >> >> >> >> >> >> >> >> >> >> >> Jakob Thomasius
>> >> >> >> Rudolf Lipschitz
>> >> >> >> >> Johann Peter Gustav Lejeune Dirichlet
>> >> >> >> >> >> Siméon Denis Poisson
>> >> >> >> >> >> >> Joseph Louis Lagrange
>> >> >> >> >> >> >> >> Leonhard Euler
>> >> >> >> >> >> >> >> >> Johann Bernoulli
>> >> >> >> >> >> >> Pierre-Simon Laplace
>> >> >> >> >> >> Joseph Fourier
>> >> >> >> >> >> >> Joseph Louis Lagrange
>> >> >> >> >> >> >> >> Leonhard Euler
>> >> >> >> >> >> >> >> >> Johann Bernoulli
>> >> >> Carl Neumann
[u'John McCarthy (computer scientist)', [[u'Solomon Lefschetz', [[u'William Edward Story', [[u'Felix Klein', [[u'Julius Pl\xfccker', [[u'Christian Ludwig Gerling', [[u'Carl Friedrich Gauss', [[u'Johann Friedrich Pfaff', [[u'Abraham Gotthelf K\xe4stner', [[u'Christian August Hausen', [[u'Johann Christoph Wichmannshausen', [[u'Otto Mencke', [[u'Jakob Thomasius', [[]]]]]]]]]]]]]]]]]]], [u'Rudolf Lipschitz', [[u'Johann Peter Gustav Lejeune Dirichlet', [[u'Sim\xe9on Denis Poisson', [[u'Joseph Louis Lagrange', [[u'Leonhard Euler', [[u'Johann Bernoulli', [u'Jacob Bernoulli']]]]]], [u'Pierre-Simon Laplace', [u"Jean le Rond d'Alembert", []]]]], [u'Joseph Fourier', [[u'Joseph Louis Lagrange', [[u'Leonhard Euler', [[u'Johann Bernoulli', [u'Jacob Bernoulli']]]]]]]]]], []]]]], [u'Carl Neumann', [[], []]]]]]]]]
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment