Skip to content

Instantly share code, notes, and snippets.

@gbit-is
Created May 18, 2024 17:30
Show Gist options
  • Save gbit-is/b5a38e5218261cb283ddf42fc5dd6c99 to your computer and use it in GitHub Desktop.
Save gbit-is/b5a38e5218261cb283ddf42fc5dd6c99 to your computer and use it in GitHub Desktop.
import requests
from bs4 import BeautifulSoup
import json
url = "https://www.computer.is/is/products/skjakort"
url_page_2 = "https://www.computer.is/is/products/skjakort#page=2"
url_page_3 = "https://www.computer.is/is/products/skjakort#page=3"
urls = [ url, url_page_2, url_page_3 ]
#url = "http://localhost:8000/computeris.html"
product_names = [ "skjákort" ]
def pprint(msg):
try:
print(json.dumps(msg,indent=2))
except:
print(msg)
def get_html_data(url):
r = requests.get(url)
soup = BeautifulSoup(r.content, 'html.parser')
return soup
def find_products(soup):
products = [ ]
all_products_div = soup.find("div", {"id": "products"})
product_divs = soup.find_all("div", { "class" : "col-sm-4"})
for product_div in product_divs:
product_classes = product_div.attrs["class"]
if "text-right" in product_classes:
continue
product = { }
product_title_div = product_div.find("h3", { "class" : "product-title"})
product_title_string = list(product_title_div.strings)
product_title_string = "".join(product_title_string).strip()
if product_title_string.split(" ")[0].lower() in product_names:
product_title_string = " ".join(product_title_string.split(" ")[1:])
price_div = product_div.find("span", { "class", "product-price"})
price_string = price_div.text.strip()
price_string_clean = int(''.join(ch for ch in price_string if ch.isdigit()))
product["name"] = product_title_string
product["price"] = price_string_clean
products.append(product)
return products
products = [ ]
for url in urls:
soup = get_html_data(url)
products += find_products(soup)
pprint(products)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment