Skip to content

Instantly share code, notes, and snippets.

@creativesalam
Forked from fnneves/amazon_bot_1.py
Created October 30, 2020 11:11
Show Gist options
  • Save creativesalam/c716a803f5af2e58e1b9700c2931be9c to your computer and use it in GitHub Desktop.
Save creativesalam/c716a803f5af2e58e1b9700c2931be9c to your computer and use it in GitHub Desktop.
import requests
from glob import glob
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
from time import sleep
# http://www.networkinghowtos.com/howto/common-user-agent-list/
HEADERS = ({'User-Agent':
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',
'Accept-Language': 'en-US, en;q=0.5'})
# imports a csv file with the url's to scrape
prod_tracker = pd.read_csv('trackers/TRACKER_PRODUCTS.csv', sep=';')
prod_tracker_URLS = prod_tracker.url
# fetch the url
page = requests.get(prod_tracker_URLS[0], headers=HEADERS)
# create the object that will contain all the info in the url
soup = BeautifulSoup(page.content, features="lxml")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment