Skip to content

Instantly share code, notes, and snippets.

@impshum
Last active July 3, 2019 16:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save impshum/2abc5c0d8050e30c913faaa83a1ebf11 to your computer and use it in GitHub Desktop.
Save impshum/2abc5c0d8050e30c913faaa83a1ebf11 to your computer and use it in GitHub Desktop.
from bs4 import BeautifulSoup
from requests import get
from fake_useragent import UserAgent
import csv
max_pages = 2
products_per_page = 100
min_price = 10
max_price = 100
products = []
ua = UserAgent()
def lovely_soup(u):
r = get(u, headers={'User-Agent': ua.chrome})
return BeautifulSoup(r.text, 'lxml')
def check(item, switch):
if item and not switch:
item = item.text
elif item and switch:
item = item['href']
else:
item = 'N/A'
return item
def write_csv():
with open('data.csv', 'w') as f:
columns = ['title', 'price', 'summary', 'brand', 'link']
writer = csv.DictWriter(f, fieldnames=columns)
writer.writeheader()
for data in products:
writer.writerow(data)
def main():
for i in range(1, max_pages + 1):
soup = lovely_soup(
f'https://www.ebay.com/b/Cell-Phone-Smartphone-Parts/43304/bn_151926?rt=nc&_pgn={i}&_ipg={products_per_page}&_sop=15&_udlo={min_price}&_udhi={max_price}')
for items in soup.findAll('li', {'class': 's-item'}):
item_title = items.find('h3', {'class': 's-item__title'})
item_price = items.find('span', {'class': 's-item__price'})
item_summary = items.find('div', {'class': 's-item__summary'})
item_brand = items.find('span', {'class': 's-item__dynamic'})
item_link = items.find('a', {'class': 's-item__link'})
title = check(item_title, 0)
price = check(item_price, 0)
summary = check(item_summary, 0)
brand = check(item_brand, 0)
link = check(item_link, 1)
products.append({'title': title, 'price': price,
'summary': summary, 'brand': brand, 'link': link})
write_csv()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment