Skip to content

Instantly share code, notes, and snippets.

@nvllsvm
Created May 3, 2023 14:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nvllsvm/37a7393729aa45ab11ebeb46a1d1e2a1 to your computer and use it in GitHub Desktop.
Save nvllsvm/37a7393729aa45ab11ebeb46a1d1e2a1 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import asyncio
import hashlib
import json
import logging
import pathlib
from playwright.async_api import async_playwright
import bs4 # beautifulsoup4
LOGGER = logging.getLogger('scrape')
def sha256sum(data):
hasher = hashlib.sha256()
hasher.update(data)
return hasher.hexdigest()
def get_status(content):
soup = bs4.BeautifulSoup(content, features='html.parser')
for found in soup.find_all('script'):
if found.get('type') == 'text/javascript':
for line in found.get_text().splitlines():
line = line.strip()
if line.startswith('dataLayer.push(') and line.endswith(')'):
line = line.strip(')').strip('dataLayer.push(')
return json.loads(line)
async def main():
tracking_num = '9405508205497576264031'
logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
previous_hash = None
while True:
LOGGER.info('Checking')
async with async_playwright() as p:
for browser_type in [p.firefox]:
browser = await browser_type.launch()
page = await browser.new_page()
await page.set_viewport_size({"width": 1600, "height": 1200})
await page.goto(f'https://tools.usps.com/go/TrackConfirmAction?qtc_tLabels1={tracking_num}')
await asyncio.sleep(5)
status = get_status(await page.content())
if status:
for impression in status['ecommerce']['impressions']:
LOGGER.info('%s (%s)', impression['eventCode'], impression['category'])
pathlib.Path('content.html').write_text(await page.content())
await page.screenshot(path='screenshot.png')
await browser.close()
current_hash = sha256sum(pathlib.Path('screenshot.png').read_bytes())
if not previous_hash:
previous_hash = current_hash
elif previous_hash != current_hash:
LOGGER.info('Delivered?!')
exit()
LOGGER.info('Sleeping')
await asyncio.sleep(300)
if __name__ == '__main__':
asyncio.run(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment