Skip to content

Instantly share code, notes, and snippets.

@kazi331
Created March 5, 2023 04:59
Show Gist options
  • Save kazi331/e660659ad84cb10a7dc5d2d37fea4e70 to your computer and use it in GitHub Desktop.
Save kazi331/e660659ad84cb10a7dc5d2d37fea4e70 to your computer and use it in GitHub Desktop.
scrap products from website with python
import requests
import json
from bs4 import BeautifulSoup
# The URL of the website to scrape
url = "https://www.startech.com.bd/laptop-notebook"
# Send a GET request to the website and get the HTML content
response = requests.get(url)
html_content = response.content
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(html_content, "html.parser")
# Create an empty list to store the product data
product_data = []
# Find all the product elements
product_elements = soup.find_all("div", {"class": "p-item-inner"})
print(f"Number of products found: {len(product_elements)}") # Debugging line
# Loop through the product elements and extract relevant data
for product in product_elements:
# Extract the product name
product_name = product.find("h4", {"class": "p-item-name"}).text.strip()
# find all short descriptions
short_description_list = product.find("div", {
"class": "short-description"
}).find_all("ul")
# loop through description items
descs = []
for description in short_description_list:
desc = description.find_all("li")
desc_texts = [d.text.strip() for d in desc]
descs.extend(desc_texts)
# Extract the product price
price_text = product.find("div", {"class": "p-item-price"}).find("span").text.strip()
product_price_number = price_text.split("\u09f3")[0].replace(",", "")
product_price = int(product_price_number)
# Extract the product image URL
product_image = product.find("img")["src"]
# Create a dictionary with the product data
product_dict = {
"name": product_name,
"price": product_price,
"image_url": product_image,
"descs": descs
}
# Append the product dictionary to the product data list
product_data.append(product_dict)
# Print the length of the product data list and the product data list itself
print(f"Number of products scraped: {len(product_data)}") # Debugging line
# print(product_data) # Debugging line
# Convert the product data to a JSON string
json_data = json.dumps(product_data)
# Print the JSON data
print(json_data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment