Created
January 7, 2023 02:06
-
-
Save jaydeepkarale/2289836a23d5b5840dd641502fa7fcf3 to your computer and use it in GitHub Desktop.
Amazon Product Scrapper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
import dataclasses | |
import pygsheets | |
import json | |
import time | |
from playwright.sync_api import sync_playwright | |
import re | |
import pywhatkit | |
import pyautogui | |
import keyboard | |
import logging | |
import sys | |
logger = logging.getLogger("amazonpricetracker") | |
logging.basicConfig( | |
stream=sys.stdout, | |
format="%(asctime)s--%(levelname)s--%(message)s", | |
level=logging.INFO, | |
) | |
@dataclasses.dataclass | |
class ProductStructure: | |
name: str | |
price: int | |
url: str | |
model_number: str = "NA" | |
date: datetime = datetime.datetime.now() | |
def send_alert(product_name: str = "Sample", product_price: int = 10): | |
try: | |
logger.info("Trying to send whatsapp message") | |
pywhatkit.sendwhatmsg_instantly( | |
"<ADD PHONE NUMBER HERE WITH COUNTRY CODE including + sign>", | |
f"!!! PRICE DROP !!!\n\nOn {product_name}.\n\nPrice is now *_{product_price}_*", | |
wait_time=60, | |
tab_close=False, | |
) | |
time.sleep(2) | |
pyautogui.click() | |
time.sleep(1) | |
keyboard.press_and_release("enter") | |
except Exception as ex: | |
logger.error(f"Error in sending message {str(ex)}") | |
def write_data_to_google_sheet(data: ProductStructure): | |
try: | |
logger.info("Writing to google sheets") | |
client = pygsheets.authorize(service_file="amazondemopricetrackeryoutube-9fc4428f2223.json") | |
sh = client.open('Amazon_Price_Tracker') | |
wks = sh.sheet1 | |
wks.append_table( | |
[json.dumps( | |
datetime.datetime.now(), indent=4, sort_keys=True, default=str | |
), | |
data.name, | |
data.model_number, | |
data.url, | |
data.price], | |
start="A2", | |
end=None, | |
dimension="ROWS", | |
overwrite=True, | |
) | |
data_as_df = wks.get_as_df() | |
previous_price = data_as_df.Price.iloc[-2] | |
current_price = data_as_df.Price.iloc[-1] | |
if previous_price > current_price: | |
send_alert(product_name=data.name, product_price=data.price) | |
logger.info("Completed writing to google sheets") | |
except Exception as ex: | |
logger.error(f"Error writing data to google sheet {str(ex)}") | |
def scrape_data(): | |
with sync_playwright() as playwright: | |
browser = playwright.chromium.launch(headless=True) | |
page = browser.new_page() | |
try: | |
logger.info("Scraping data from Amazon") | |
page.goto("https://www.amazon.in/") | |
page.get_by_role("textbox", name="Search").fill("legion 5i pro") | |
page.locator(".s-suggestion").first.click() | |
with page.expect_popup() as popup_info: | |
page.get_by_role("link", name="Lenovo Legion 5 Pro Intel Core i7-12700H 16\" (40.64cm) QHD IPS 165Hz 500Nits Gaming Laptop (16GB/1TB SSD/Win 11/Office 2021/NVIDIA RTX 3060 6GB/Alexa/3 Month Game Pass/Storm Grey/2.49Kg), 82RF00DYIN").click() | |
page1 = popup_info.value | |
page.goto(page1.url) | |
product_name, model = page.locator("#titleSection").all_inner_texts()[0].split(",") | |
product_price = page.locator("#corePriceDisplay_desktop_feature_div").locator(".a-price-whole") | |
integer_price = int(re.sub(r"[^\d]","",product_price.all_inner_texts()[0])) | |
data = ProductStructure( | |
name=product_name, | |
price=integer_price, | |
model_number=model.strip(), | |
url=page1.url | |
) | |
logger.info("Scraping data from Amazon completed, returning data") | |
return data | |
except Exception as ex: | |
logger.error(f"Failed to scrape data with error {str(ex)}") | |
if __name__ == "__main__": | |
logger.info("Starting script") | |
data: ProductStructure = scrape_data() | |
write_data_to_google_sheet(data) | |
logger.info("Completed script") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment