Skip to content

Instantly share code, notes, and snippets.

@gbit-is
Created May 18, 2024 16:00
Show Gist options
  • Save gbit-is/19db762ecab4cc3a7279700cc244d698 to your computer and use it in GitHub Desktop.
Save gbit-is/19db762ecab4cc3a7279700cc244d698 to your computer and use it in GitHub Desktop.
import requests
from bs4 import BeautifulSoup
import json
url = "https://tolvutaekni.is/collections/skjakort"
url = "http://localhost:8000/tt.html"
def pprint(msg):
try:
print(json.dumps(msg,indent=2))
except:
print(msg)
def get_html_data(url):
r = requests.get(url)
soup = BeautifulSoup(r.content, 'html.parser')
return soup
def find_script(soup):
all_scripts = soup.findAll("script")
for script in all_scripts:
if "name" in script.attrs:
if script.attrs["name"] == "sliderule-tracking":
return True, script
return False
def extract_json(script):
script_data = script.string
json_data_raw = script_data.split("slideruleData.collection.rawProducts")
json_data_raw_split = json_data_raw[1].split("\n")
products = [ ]
for line in json_data_raw_split:
if "title" in line:
products.append(line)
for product in products:
product_last_char = product[-1]
if product_last_char == ",":
product = product[:-1]
product_json = json.loads(product)
pprint(product_json)
soup = get_html_data(url)
script_found, script = find_script(soup)
if script_found:
extract_json(script)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment