-
-
Save adlerweb/399a195e6d08d25b44c9ccf50bf0d7a1 to your computer and use it in GitHub Desktop.
example code to scrape a site for stocked products
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib.request | |
from bs4 import BeautifulSoup | |
from time import sleep | |
import sys | |
url = "http://127.0.0.1:8000/" | |
interesting = ['pLarge'] | |
pollFreq = 15*60 | |
while True: | |
# 1 -> Webseite laden und in Variable speichern | |
with urllib.request.urlopen(url) as f: | |
#with open('test.html') as f: | |
site = f.read() | |
site = site.decode('utf-8') | |
# 2 -> Liste von Produkten erkennen | |
soup = BeautifulSoup(site, 'html.parser') | |
products = soup.find("table", class_="category") | |
products = products.find_all("tr", attrs={"data-id": True}) | |
# 3 -> Produkte und Lagerbestände prüfen | |
productinfo = [] | |
for product in products: | |
sku = product.find("td", class_="sku").get_text() | |
stock = int(product.find("td", class_="stocked").get_text()) | |
volume = int(product.find_all("td")[3].get_text()) | |
productinfo.append({"sku": sku, "stock": stock, "volume": volume}) | |
# 4 -> Info ausgeben | |
match = 0 | |
for product in productinfo: | |
if product["sku"] in interesting: | |
if product["stock"] >= product["volume"]: | |
print(product) | |
match = match + 1 | |
if match > 0: | |
sys.exit(1) | |
sleep(pollFreq) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment