This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import asyncio | |
import os | |
import random | |
import time | |
from dataclasses import dataclass | |
from typing import List, Optional, Set | |
import pandas as pd | |
from bs4 import BeautifulSoup | |
from curl_cffi import requests |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import asyncio | |
import os | |
from curl_cffi import requests | |
from bs4 import BeautifulSoup | |
from tenacity import retry, stop_after_attempt, wait_random | |
class AsinScraper: | |
def __init__(self): | |
self.session = requests.Session() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
{ | |
"asin": "B09MRFNDZ6", | |
"url": "https://www.amazon.com/X-Box-Gaming-Console-Bundle-Controllers/dp/B09MRFNDZ6/ref=sr_1_1", | |
"name": "X-Box Series X Gaming Console Bundle - 1TB SSD Black X-Box Console with Two Wireless Controllers -Black and White -and ahaghug Authorized HDMI Cable X-Box Series X Gaming Console Bundle - 1TB SSD Black X-Box Console with Two Wireless Controllers -Black and White -and ahaghug Authorized HDMI Cable", | |
"sponsored": "false", | |
"initial_price": 604.49, | |
"final_price": 558, | |
"currency": "USD", | |
"sold": 300, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import streamlit as st | |
import pandas as pd | |
import plotly.express as px | |
import json | |
import re | |
from pathlib import Path | |
# Setup dark theme dashboard | |
st.set_page_config( | |
page_title="Hotel Analysis Dashboard", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import asyncio | |
from datetime import timedelta | |
from typing import Dict, Optional, Set | |
from crawlee.playwright_crawler import PlaywrightCrawler | |
from crawlee.proxy_configuration import ProxyConfiguration | |
from playwright.async_api import Page, ElementHandle | |
class GoogleMapsScraper: | |
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import asyncio | |
from datetime import timedelta | |
from typing import Dict, Optional, Set | |
from crawlee.playwright_crawler import PlaywrightCrawler | |
from playwright.async_api import Page, ElementHandle | |
class GoogleMapsScraper: | |
""" | |
Scraper for extracting business listing data from Google Maps. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from playwright.async_api import async_playwright | |
from tenacity import retry, stop_after_attempt, wait_fixed | |
from dataclasses import dataclass | |
from typing import List, Optional | |
from datetime import datetime | |
import asyncio | |
import json | |
import os | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
{ | |
"title": "14.1in Windows 11 Pro Laptop, 8GB DDR4, 512GB SSD Computer, Intel Celeron, 1920x1080, 1TB Expansion, Silver", | |
"product_url": "https://www.walmart.com/ip/Temlicolo-14-1-Laptop-8GB-RAM-PC-512GB-SSD-Intel-Celeron-N4020C-up-to-2-8GHz-Windows-11-Pro-Webcam-1TB-SSD-Expansion-Silver/1519228026?classType=VARIANT&selectedSellerId=101196098&from=/search", | |
"current_price": "Now$22789", | |
"previous_price": "$499.99", | |
"rating": "4.5", | |
"num_reviews": "220", | |
"shipping_info": "Free shipping, arrives in 2 days" | |
}, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import asyncio | |
import random | |
import json | |
from playwright.async_api import async_playwright | |
async def scroll_and_extract(page): | |
previous_height = await page.evaluate("document.body.scrollHeight") | |
while True: | |
network_conditions = await page.evaluate( |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import asyncio | |
import json | |
import os | |
import shutil | |
from aiohttp import ClientSession, ClientTimeout | |
from urllib.parse import urlparse, urlencode | |
from playwright.async_api import async_playwright | |
# Function to extract the domain from a URL | |
def extract_domain(url): |
NewerOlder