Skip to content

Instantly share code, notes, and snippets.

@protorob
Created November 21, 2019 17:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save protorob/d826ec1387e0b992f51066f810950fbe to your computer and use it in GitHub Desktop.
Save protorob/d826ec1387e0b992f51066f810950fbe to your computer and use it in GitHub Desktop.
Scrapper for a website
# Import libraries
import requests
from bs4 import BeautifulSoup
website = 'https://www.sito.it/bombon/bomboniere-matrimonio.html?p='
output_file = open('bomboniere.csv', 'w+')
counter = 1
for current_page in range(1,67):
website_url = website + str(current_page)
page = requests.get(website_url)
soup = BeautifulSoup(page.text, 'html.parser')
single_product_container = soup.find_all('li', class_='item')
# product_title_container = soup.find_all(class_='product-name')
for product_container in single_product_container:
product_image = product_container.find(class_='product-image-area')
product_img_src = product_image.find('img')
product_link = product_image.find('a', class_='product-image').get('href')
#get product description
request_prod_page = requests.get(product_link)
prod_soup = BeautifulSoup(request_prod_page.text, 'html.parser')
#check if there's a product description
if prod_soup.find('div', class_='short-description'):
product_description = prod_soup.find('div', class_='short-description').text
product_description = product_description.replace("\t", " ").replace("\r", " ").replace("\n", " ")
else:
product_description = ' - '
#get product image src
img_url = product_img_src['src']
#get product title
product_title = product_container.find(class_='product-name').find('a').contents[0]
#get product code
product_cod = product_container.find(class_='details-area').find('p', attrs={'style':'color:#777;'}).find('b')
product_cod = product_cod.contents[0]
#format output
string_to_write = f'{counter};{product_cod};{product_title};{product_description};{img_url};{product_link}\n'
print(string_to_write)
output_file.write(string_to_write)
counter += 1
output_file.close
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment