Skip to content

Instantly share code, notes, and snippets.

@dgouldin
Created March 23, 2015 02:33
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dgouldin/4bdd1fdd957c85aad85c to your computer and use it in GitHub Desktop.
Save dgouldin/4bdd1fdd957c85aad85c to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
from xml.etree import ElementTree
import requests
import unicodecsv as csv
import sys
url = "http://en.wikipedia.org/wiki/List_of_Game_of_Thrones_characters"
response = requests.get(url)
def get_characters(table):
for row in table.findall('tr')[2:]:
td = row.find('td')
if td is None or td.attrib.get('colspan'):
continue
a = td.find('a')
if a is not None:
yield a.text
else:
yield td.text
tree = ElementTree.fromstring(response.content)
tables = filter(lambda t: t.findtext("./tr/th") == "Name",
tree.findall(".//table[@class='wikitable']"))
writer = csv.writer(sys.stdout)
for table in tables:
for character in get_characters(table):
writer.writerow((character,))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment