hannesdatta/scraper.py

## scraper.py
# FINAL CODE
import requests
from bs4 import BeautifulSoup

# Define the URL and user-agent header
url = 'https://www.coolblue.nl/tweedekans-product/2191236'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
                  '(KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
}

# Send an HTTP GET request to the URL with the specified headers
response = requests.get(url, headers=headers)
response.encoding = response.apparent_encoding

# Extract the HTML source code from the response
source_code = response.text

# Parse the HTML using BeautifulSoup
soup = BeautifulSoup(source_code, 'html.parser')

# Extract product information
product_name = soup.find(class_='js-product-name').get_text().strip()
former_price = soup.find(class_='sales-price__former-price').get_text().strip()
current_price = soup.find(class_='sales-price__current js-sales-price-current').get_text().strip()
reviews = soup.find(class_='review-rating__reviews text--truncate').get_text().strip()
current_state = soup.find('li', class_='inline-list__item js-inline-list-item').get_text().strip()

# Extract additional information from the status list
status = soup.find('ul', attrs={'class': 'list list--bullet'})
status_items = status.find_all('li')

# Create a list to store the extracted status information
status_info = []

for state in status_items:
    status_info.append(state.get_text().strip())

# Print the extracted information
print("Status Information:")
for item in status_info:
    print(item)

print("Product Name:", product_name)
print("Former Price:", former_price)
print("Current Price:", current_price)
print("Reviews:", reviews[0:7].strip(), reviews[7:20].strip())
print("Current State:", current_state)

## wrong_code.py
# ORIGINAL CODE RECEIVED BY STUDENT FOR DEUBGGING
import requests
from bs4 import BeautifulSoup

url = 'https://www.coolblue.nl/tweedekans-product/2191236'
header= {'User-agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'}
phone_request = requests.get(url, headers = header)
phone_request.encoding = phone_request.apparent_encoding
phone_source_code = phone_request.text

soup = BeautifulSoup(phone_source_code)

product_name = soup.find(class_='js-product-name').get_text().strip()
former_price = soup.find(class_='sales-price__former-price').get_text().strip()
current_price = soup.find(class_='sales-price__current js-sales-price-current').get_text().strip()
reviews = soup.find(class_='review-rating__reviews text--truncate').get_text().strip()
current_state = soup.find('li', class_='inline-list__item js-inline-list-item').get_text().strip()


status = soup.find('ul',attrs={'class':'list list--bullet'})
status_phone = status.find_all('li')

out = []

for state in status_phone: out.append(status_phone.get_text())
out

print(product_name)
print(former_price)
print(current_price)
print(reviews[0:7].strip())
print(reviews[7:20].strip())
print(current_state)
	# FINAL CODE
	import requests
	from bs4 import BeautifulSoup

	# Define the URL and user-agent header
	url = 'https://www.coolblue.nl/tweedekans-product/2191236'
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
	'(KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
	}

	# Send an HTTP GET request to the URL with the specified headers
	response = requests.get(url, headers=headers)
	response.encoding = response.apparent_encoding

	# Extract the HTML source code from the response
	source_code = response.text

	# Parse the HTML using BeautifulSoup
	soup = BeautifulSoup(source_code, 'html.parser')

	# Extract product information
	product_name = soup.find(class_='js-product-name').get_text().strip()
	former_price = soup.find(class_='sales-price__former-price').get_text().strip()
	current_price = soup.find(class_='sales-price__current js-sales-price-current').get_text().strip()
	reviews = soup.find(class_='review-rating__reviews text--truncate').get_text().strip()
	current_state = soup.find('li', class_='inline-list__item js-inline-list-item').get_text().strip()

	# Extract additional information from the status list
	status = soup.find('ul', attrs={'class': 'list list--bullet'})
	status_items = status.find_all('li')

	# Create a list to store the extracted status information
	status_info = []

	for state in status_items:
	status_info.append(state.get_text().strip())

	# Print the extracted information
	print("Status Information:")
	for item in status_info:
	print(item)

	print("Product Name:", product_name)
	print("Former Price:", former_price)
	print("Current Price:", current_price)
	print("Reviews:", reviews[0:7].strip(), reviews[7:20].strip())
	print("Current State:", current_state)
	# ORIGINAL CODE RECEIVED BY STUDENT FOR DEUBGGING
	import requests
	from bs4 import BeautifulSoup

	url = 'https://www.coolblue.nl/tweedekans-product/2191236'
	header= {'User-agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'}
	phone_request = requests.get(url, headers = header)
	phone_request.encoding = phone_request.apparent_encoding
	phone_source_code = phone_request.text

	soup = BeautifulSoup(phone_source_code)

	product_name = soup.find(class_='js-product-name').get_text().strip()
	former_price = soup.find(class_='sales-price__former-price').get_text().strip()
	current_price = soup.find(class_='sales-price__current js-sales-price-current').get_text().strip()
	reviews = soup.find(class_='review-rating__reviews text--truncate').get_text().strip()
	current_state = soup.find('li', class_='inline-list__item js-inline-list-item').get_text().strip()



	status = soup.find('ul',attrs={'class':'list list--bullet'})
	status_phone = status.find_all('li')

	out = []

	for state in status_phone: out.append(status_phone.get_text())
	out

	print(product_name)
	print(former_price)
	print(current_price)
	print(reviews[0:7].strip())
	print(reviews[7:20].strip())
	print(current_state)