Skip to content

Instantly share code, notes, and snippets.

@gbit-is
Created May 18, 2024 16:46
Show Gist options
  • Save gbit-is/19cb0e1e860c77b59749b61c26f2feb5 to your computer and use it in GitHub Desktop.
Save gbit-is/19cb0e1e860c77b59749b61c26f2feb5 to your computer and use it in GitHub Desktop.
import requests
from bs4 import BeautifulSoup
import json
url = "https://tl.is/ihlutir/skjakort.html"
#url = "http://localhost:8000/tolvulistinn.html"
url_page_2 = "https://tl.is/ihlutir/skjakort.html/facet=page-2"
def pprint(msg):
try:
print(json.dumps(msg,indent=2))
except:
print(msg)
def get_html_data(url):
r = requests.get(url)
soup = BeautifulSoup(r.content, 'html.parser')
return soup
def find_products(soup):
mydivs = soup.find_all("div", {"class": "rz-product-card"})
products = [ ]
for div in mydivs:
product = { }
title_div = div.find("div", { "class" : "container-heading"})
title_string = title_div.strings
title_strings = list(title_string)
product["name"] = title_strings[0]
product["SKU"] = title_strings[1]
price_div = div.find("div", { "class" : "container-price"})
price_string = price_div.string
price_string_clean = ''.join(ch for ch in price_string if ch.isdigit())
product["price"] = int(price_string_clean)
products.append(product)
return products
soup = get_html_data(url)
products = find_products(soup)
soup = get_html_data(url_page_2)
products += find_products(soup)
pprint(products)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment