florean/dmv_scraper.py

## dmv_scraper.py
from lxml import html
import requests
import re


# URL for DMV office page.
DMV_URL = "https://fortress.wa.gov/dol/dolprod/dsdoffices/OfficeInfo.aspx?cid=45&oid=23"

# Get the page content.
page = requests.get(DMV_URL)

# Parse the HTML and create a tree.
tree = html.fromstring(page.content)

# Get the wait times.
wait_times = tree.xpath('//*[@id="ctl00_Main_waittime"]/text()')

# Get just the time rows.
wait_times = [x for x in wait_times if x.strip()][1:]

# Regex out the hours and minutes
time_regex = re.compile("(\d+).*?(\d+).*")

# Find the wait time
hours, minutes = time_regex.match(wait_times[0]).groups()

# Convert to integers and normalize to minutes.
minutes = int(minutes) + int(hours) * 60
	from lxml import html
	import requests
	import re


	# URL for DMV office page.
	DMV_URL = "https://fortress.wa.gov/dol/dolprod/dsdoffices/OfficeInfo.aspx?cid=45&oid=23"

	# Get the page content.
	page = requests.get(DMV_URL)

	# Parse the HTML and create a tree.
	tree = html.fromstring(page.content)

	# Get the wait times.
	wait_times = tree.xpath('//*[@id="ctl00_Main_waittime"]/text()')

	# Get just the time rows.
	wait_times = [x for x in wait_times if x.strip()][1:]

	# Regex out the hours and minutes
	time_regex = re.compile("(\d+).?(\d+).")

	# Find the wait time
	hours, minutes = time_regex.match(wait_times[0]).groups()

	# Convert to integers and normalize to minutes.
	minutes = int(minutes) + int(hours) * 60