Skip to content

Instantly share code, notes, and snippets.

@jitsejan
Created December 24, 2020 20:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jitsejan/6df3a7db678aa2588e0f5d6d0660e8be to your computer and use it in GitHub Desktop.
Save jitsejan/6df3a7db678aa2588e0f5d6d0660e8be to your computer and use it in GitHub Desktop.
Simple example on how to use xpaths to get data from a website.
from dataclasses import dataclass
import lxml.html
import requests
from typing import Iterator
BASE_URL = "https://zelda.gamepedia.com"
HEADERS = {
'User-Agent': 'Mozilla/5.0'
}
@dataclass(frozen=True)
class Item:
name: str
price: int
def __repr__(self):
return (f'{self.__class__.__name__}'
f'(name={self.name}, price={self.price})')
def _get_tree_from_url(url: str) -> lxml.html.etree:
resp = session.get(url)
return lxml.html.fromstring(resp.text)
def get_item_links() -> Iterator[str]:
items_url = f"{BASE_URL}/Items_in_The_Legend_of_Zelda"
tree = _get_tree_from_url(items_url)
for elem in tree.cssselect("li.gallerybox .gallerytext p a"):
yield f"{BASE_URL}{elem.attrib['href'].split('#')[0]}"
def get_item_details(link: str) -> Item:
tree = _get_tree_from_url(link)
try:
name = tree.cssselect("meta[property='og:title']")[0].attrib['content']
price = int(tree.xpath("//tr[th//text()[contains(., 'Cost(s)')]]/td/div")[0].text)
return Item(name, price)
except:
pass # No price for this item
session = requests.Session()
session.headers = HEADERS
items = []
for link in get_item_links():
item_data = get_item_details(link)
(items.append(item_data) if item_data is not None else None)
items.sort(key=lambda x: x.price, reverse=True)
print(items)
# [Item(name=Bow, price=980), Item(name=Boomerang, price=300), Item(name=Blue Ring, price=250), Item(name=Arrow, price=80), Item(name=Red Water of Life, price=68), Item(name=Blue Candle, price=60), Item(name=Food, price=60), Item(name=Blue Water of Life, price=40), Item(name=Bomb, price=20), Item(name=Heart Container, price=4)]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment