aflansburg/amzreader.py

## amzreader.py
# This involves some manual work depending on the type and category of an item
# See below !! CATEGORY / ITEM SPECIFIC INFO !!

import requests
from bs4 import BeautifulSoup as BS


# !! CATEGORY / ITEM SPECIFIC INFO
# This function is for the list filter and is some information that unfortunately couldn't be exluded with Beautiful Soup
# (or I just couldn't figure it out)
def f(x):
    if x == 'Automotive' or x == 'Replacement Parts' or x == 'Shocks, Struts & Suspension' or x == 'Chassis' or x == 'Body Lift Kits' or x == 'Enter your model numberto make sure this fits.':
        return x


url = "https://www.amazon.com/dp/B00B2B3ZU8"

page = requests.get(url)
html_contents = page.text

soup = BS(html_contents, "html.parser")

data = []

for span in soup.find_all('span', {'class': 'a-list-item'}):
    data.append(span.text)

# cut out some of the characters and a unicode character
# this may require some tweaking
data = [i.strip(' ') for i in data]
data = [i.strip('\n') for i in data]
data = [i.strip(' ') for i in data]
data = [i.strip('\n') for i in data]
data = [i.strip('\t') for i in data]

data = [i.strip(u'\u203a') for i in data]
data = list(filter(None, data))
data = [i.strip('\n') for i in data]
data = [i.strip('\t') for i in data]
data = [i.replace('\n', '') for i in data]

removals = list(filter(f, data))

data = [i for i in data if i not in removals]

for i in data:
    print(i)
	# This involves some manual work depending on the type and category of an item
	# See below !! CATEGORY / ITEM SPECIFIC INFO !!

	import requests
	from bs4 import BeautifulSoup as BS


	# !! CATEGORY / ITEM SPECIFIC INFO
	# This function is for the list filter and is some information that unfortunately couldn't be exluded with Beautiful Soup
	# (or I just couldn't figure it out)
	def f(x):
	if x == 'Automotive' or x == 'Replacement Parts' or x == 'Shocks, Struts & Suspension' or x == 'Chassis' or x == 'Body Lift Kits' or x == 'Enter your model numberto make sure this fits.':
	return x


	url = "https://www.amazon.com/dp/B00B2B3ZU8"

	page = requests.get(url)
	html_contents = page.text

	soup = BS(html_contents, "html.parser")

	data = []

	for span in soup.find_all('span', {'class': 'a-list-item'}):
	data.append(span.text)

	# cut out some of the characters and a unicode character
	# this may require some tweaking
	data = [i.strip(' ') for i in data]
	data = [i.strip('\n') for i in data]
	data = [i.strip(' ') for i in data]
	data = [i.strip('\n') for i in data]
	data = [i.strip('\t') for i in data]

	data = [i.strip(u'\u203a') for i in data]
	data = list(filter(None, data))
	data = [i.strip('\n') for i in data]
	data = [i.strip('\t') for i in data]
	data = [i.replace('\n', '') for i in data]

	removals = list(filter(f, data))

	data = [i for i in data if i not in removals]

	for i in data:
	print(i)