Skip to content

Instantly share code, notes, and snippets.

@ahpex
Last active November 24, 2020 15:58
Show Gist options
  • Save ahpex/6b55871a18db03024a07c8d24838395e to your computer and use it in GitHub Desktop.
Save ahpex/6b55871a18db03024a07c8d24838395e to your computer and use it in GitHub Desktop.
Fetch 'Wheel of Time WoT' character names
# Fetch a list of character names for the Wheel of Time (WoT)
# and print them as Dokuwiki links.
from urllib.request import urlopen
from bs4 import BeautifulSoup
def fetch_all_names():
# The WoT page groups some first letters together
first_letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'op', 'r', 's', 't', 'uz']
all_names = [fetch_character_names(x) for x in first_letters]
flat_names = [item for sublist in all_names for item in sublist]
for name in flat_names:
print(name)
def fetch_character_names(firstletter):
html = urlopen(f'http://www.encyclopaedia-wot.org/characters/{firstletter}/index.html')
bs = BeautifulSoup(html.read(), 'lxml')
name_links = bs.table.find_all('li')
names = [create_dokuwiki_link(link) for link in name_links]
return names
def create_dokuwiki_link(link):
href = link.a['href']
text = link.a.get_text()
return f' - [[http://www.encyclopaedia-wot.org/{href}|{text}]]'
if __name__ == "__main__":
fetch_all_names()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment