Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd
def getListingLinks(link):
# Open the driver
driver = webdriver.Chrome(executable_path="/Users/erikgregorywebb/Downloads/chromedriver 2")
driver.get(link)
# Save the links
listing_links = []
links = driver.find_elements_by_css_selector('.listing-item-link')
for link in links:
listing_links.append(str(link.get_attribute('href')))
driver.close()
return listing_links
def getListingContent(listing_link):
# Open the driver
driver = webdriver.Chrome(executable_path="/Users/erikgregorywebb/Downloads/chromedriver 2")
driver.get(listing_link)
# Collect listing informtion
try:
title = driver.find_element_by_css_selector('.listingDetails-title')
location = driver.find_element_by_css_selector('.listingDetails-location')
price = driver.find_element_by_css_selector('.listingDetails-price')
views = driver.find_element_by_css_selector('.viewsDesktop-viewsNumber')
favorites = driver.find_element_by_css_selector('.viewsDesktop-favoritedNumber')
description = driver.find_element_by_css_selector('.listingDescription-text')
name = driver.find_element_by_css_selector('.listingContactSeller-firstName-value')
# Compile into list
listing = [title.text, location.text, price.text, views.text, favorites.text, description.text, name.text, listing_link]
driver.close()
return listing
except:
print("An error occured.")
driver.close()
def getListings(url):
links = getListingLinks(url)
listings = []
# Loop over each listing link
for i in range(0, 10):
time.sleep(3)
try:
listing = getListingContent(links[i])
listings.append(listing)
except:
print("An error occured:", links[i])
# Create DataFrame, clean variables
df = pd.DataFrame(listings, columns = ['title', 'location', 'price', 'views', 'favorites', 'description', 'name', 'link'])
return df
def cleanLlistings(df):
# Split the location variable into location and days_online
df['location'], df['days_online'] = df['location'].str.split('|', 1).str
# Remove the dollar sign in price
df['price'] = df['price'].str.replace('$', '')
df['price'] = df['price'].str.replace(',', '')
# Convert from string to numeric
df['views'] = pd.to_numeric(df['views'])
df['favorites'] = pd.to_numeric(df['favorites'])
return df
def main(url):
start_time = time.time()
# Process
raw_df = getListings(url)
df = cleanLlistings(raw_df)
# Export
df.to_csv("/Users/erikgregorywebb/Documents/Python/ksl-scrapper/listings.csv", sep = ',')
print("--- %s seconds ---" % round(time.time() - start_time, 2))
return df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.