Skip to content

Instantly share code, notes, and snippets.

@tbbooher
Created March 25, 2023 21:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tbbooher/edb12abcfb84a1bf125d1ebafceac4f3 to your computer and use it in GitHub Desktop.
Save tbbooher/edb12abcfb84a1bf125d1ebafceac4f3 to your computer and use it in GitHub Desktop.
from bs4 import BeautifulSoup
import csv
import re
with open('edmunds.html', 'r') as f:
html = f.read()
soup = BeautifulSoup(html, 'html.parser')
results = []
for vehicle in soup.find_all("div", class_="vehicle-info"):
title = re.sub(r'\s+', ' ', vehicle.find("div",
class_="text-blue-50").text.strip())
desc = re.sub(r'\s+', ' ', vehicle.find("div",
class_="text-gray-dark").text.strip())
price = vehicle.find("span", class_="heading-3").text.strip()
span = vehicle.find("span", title="Car Mileage")
if span is not None:
mileage = span.find_next_sibling("span").text.strip()
else:
mileage = "N/A"
print("Title:", title)
print("Desc:", desc)
print("Price:", price)
print("Miles:", mileage)
result = [title, desc, price, mileage]
results.append(result)
for result in results:
print(result)
# create a csv of the results
with open('output_edmunds.csv', 'w', newline='') as f:
writer = csv.writer(f)
writer.writerow(['Title', 'Desc', 'Price', 'Condition'])
writer.writerows(results)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment