Skip to content

Instantly share code, notes, and snippets.

@eliac7
Created May 23, 2023 19:44
Show Gist options
  • Save eliac7/d18a80bd308d0caf36f5f47980ab0765 to your computer and use it in GitHub Desktop.
Save eliac7/d18a80bd308d0caf36f5f47980ab0765 to your computer and use it in GitHub Desktop.
Python script for scraping product data from a website using Selenium WebDriver. It automates the search process for a list of product codes on https://www.plaisio.gr/ and saves the results in a CSV file. The script utilizes Chrome WebDriver and includes error handling for cases where no search results are found. The code is organized into funct…
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
CODES = [
# List of product codes to search for
3991016, 3990583, 4176618, 4176596, 3853780, 4079914, 4080688, 4080718, 4172930, 4006402,
3860795, 4006399, 3989178, 4108108, 4111656, 3578224, 4141210, 3903907, 4234413, 3846229,
3820912, 3177998, 4033213, 3530795, 4048350, 3530728, 3522385, 3843378, 3731448, 4018567,
3808769, 4148525, 3889262, 4111648, 3935450, 3919161, 3787591, 3023966, 2488167, 2049546,
2417405, 1352407, 3899330, 3899373, 3899438, 4186060, 4180747, 4180739, 4180720, 4101162,
4101170, 4100700, 3794644, 3862135, 4238745, 4238737, 3950778, 3950743, 3901793, 3885690
]
def scrape_product_data(driver, codes):
# Open a CSV file to store the scraped data
with open("product_results.csv", "w", newline="", encoding="utf-8") as csv_file:
writer = csv.writer(csv_file)
writer.writerow(["code", "product_title", "product_link", "product_price"]) # Header
# Open the website in Chrome
driver.get("https://www.plaisio.gr/")
# Find the search input field
search_input = driver.find_element(By.CLASS_NAME, "search-input")
print(f"Searching for {len(codes)} products...")
for code in codes:
# Clear the search input and enter the product code
search_input.clear()
search_input.send_keys(str(code))
search_input.send_keys(Keys.RETURN)
# Wait until the search results container is visible
wait = WebDriverWait(driver, 10)
search_results = wait.until(EC.visibility_of_element_located((By.CLASS_NAME, "search-results__articles.border-right")))
try:
# Find the first search result
first_li = search_results.find_element(By.TAG_NAME, "li")
# Extract the product title and link
product_title = first_li.find_element(By.CLASS_NAME, "description").find_element(By.CLASS_NAME, "ais-Highlight").text
product_link = first_li.find_element(By.TAG_NAME, "a").get_attribute("href")
# Extract the product price
product_price_element = first_li.find_element(By.CLASS_NAME, "prices")
product_price = product_price_element.find_element(By.CLASS_NAME, "current-price").text
except:
# If no search results found, leave the fields empty
product_title = ""
product_link = ""
product_price = ""
# Write the results to the CSV file
writer.writerow([code, product_title, product_link, product_price])
def main():
# Set up Chrome WebDriver
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.maximize_window()
# Scrape product data
scrape_product_data(driver, CODES)
# Close the browser
driver.quit()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment