Last active
November 24, 2020 15:58
-
-
Save ahpex/6b55871a18db03024a07c8d24838395e to your computer and use it in GitHub Desktop.
Fetch 'Wheel of Time WoT' character names
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Fetch a list of character names for the Wheel of Time (WoT) | |
# and print them as Dokuwiki links. | |
from urllib.request import urlopen | |
from bs4 import BeautifulSoup | |
def fetch_all_names(): | |
# The WoT page groups some first letters together | |
first_letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'op', 'r', 's', 't', 'uz'] | |
all_names = [fetch_character_names(x) for x in first_letters] | |
flat_names = [item for sublist in all_names for item in sublist] | |
for name in flat_names: | |
print(name) | |
def fetch_character_names(firstletter): | |
html = urlopen(f'http://www.encyclopaedia-wot.org/characters/{firstletter}/index.html') | |
bs = BeautifulSoup(html.read(), 'lxml') | |
name_links = bs.table.find_all('li') | |
names = [create_dokuwiki_link(link) for link in name_links] | |
return names | |
def create_dokuwiki_link(link): | |
href = link.a['href'] | |
text = link.a.get_text() | |
return f' - [[http://www.encyclopaedia-wot.org/{href}|{text}]]' | |
if __name__ == "__main__": | |
fetch_all_names() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment