Created
April 28, 2017 04:03
-
-
Save samclane/83e85046bf3f169c44815a9773673ad2 to your computer and use it in GitHub Desktop.
Scrape weapon names from roll20
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from lxml import html | |
import requests | |
from collections import namedtuple | |
import json | |
def unique(list): | |
seen = set() | |
seen_add = seen.add | |
return [x for x in list if not (x in seen or seen_add(x))] | |
Weapon = namedtuple('Weapon', 'name cost hit_dice') | |
weaponlist = [] | |
page = requests.get('https://roll20.net/compendium/dnd5e/Weapons#content') | |
tree = html.fromstring(page.content) | |
weapons = tree.xpath('/html/body/div[2]/div[2]/div[2]/div/div[2]/table/tbody') | |
for tbody in weapons: | |
for tr in tbody.getchildren(): | |
for c in tr.getchildren(): | |
w = [] | |
for t in tr.itertext(): | |
if t != '\n': | |
w.append(t) | |
w[1] = int(w[1].split()[0]) * 200 | |
w[2] = w[2].split()[0] | |
w = Weapon(*w[0:3]) | |
weaponlist.append(w) | |
print("\n") | |
print("\n") | |
weaponlist = unique(weaponlist) | |
js = json.dumps([item._asdict() for item in weaponlist]) | |
print(js) | |
pass | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I could have used BeautifulSoup to get around the nasty xpath stuff, but I'm stupid like that.