Skip to content

Instantly share code, notes, and snippets.

@rzhade3
Created January 24, 2022 08:52
Show Gist options
  • Save rzhade3/0bdde47bd08c8305fa528a878fcc24ef to your computer and use it in GitHub Desktop.
Save rzhade3/0bdde47bd08c8305fa528a878fcc24ef to your computer and use it in GitHub Desktop.
Location of all stores that sell Orca passes. Scraped from https://www.soundtransit.org/sites/default/files/documents/orca_retail_locations.pdf and put in a more viewable format (GeoJSON)
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
from geojson import Point, FeatureCollection
import geojson
def create_points():
points = []
f = open('undecoded_coords.txt', 'r')
for line in f.readlines():
store_name, address, hours, coords = line.strip().split('|')
lat, lng = coords.split(',')
feature = Point((float(lng), float(lat)))
feature['properties'] = {
'name': store_name,
'address': address,
'hours': hours
}
points.append(feature)
f.close()
return points
def add_to_geojson(points):
existing_points = geojson.load(open('stores.json', 'r')).geometries
all_points = FeatureCollection(existing_points + points)
return all_points
def main():
points = create_points()
collection = add_to_geojson(points)
with open('new_stores.json', 'w') as f:
geojson.dump(collection, f)
if __name__ == '__main__':
main()
import re
from geopy.geocoders import Nominatim
from geojson import Point, FeatureCollection
import geojson
import time
def parse_raw(f_name):
f = open(f_name, 'r')
stores = []
for line in f:
# Not a location
if '•' not in line:
# SE Seattle, NW Seattle etc. are not cities
if 'Seattle' in line:
city = 'Seattle'
else:
city = line.strip()
continue
loc_and_hour_string = line.strip()
loc_string, hours = loc_and_hour_string.split('•')
matches = re.match(r"([a-z\s]+)(.+)", loc_string, re.I)
store_name, loc = matches[1], matches[2]
address = loc.strip() + ', ' + city + ', WA'
# Get rid of the trailing comma
store_name, address, hours = store_name.strip(), address.strip(), hours.strip()
stores.append((store_name, address, hours))
return stores
def get_coords(address):
geolocator = Nominatim(user_agent="orca_scraper")
return geolocator.geocode(address)
def convert_to_geojson(stores):
undecoded_stores = []
decoded_stores = []
for store in stores:
store_name, address, hours = store
location = get_coords(address)
if location is None:
undecoded_stores.append(store)
continue
lat, lon = location.latitude, location.longitude
feature = Point((lon, lat))
feature['properties'] = {
'name': store_name,
'address': address,
'hours': hours
}
decoded_stores.append(feature)
time.sleep(1) # Don't overload the API with calls
return undecoded_stores, FeatureCollection(decoded_stores)
def main():
FILENAME = 'orca_locations.txt'
stores = parse_raw(FILENAME)
undecoded_stores, decoded_stores = convert_to_geojson(stores)
with open('stores.json', 'w') as f:
f.write(geojson.dumps(decoded_stores))
with open('undecoded_stores.txt', 'w') as f:
for store, address, hours in undecoded_stores:
f.write(f'{store}|{address}|{hours}\n')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment