Created
January 24, 2022 08:52
-
-
Save rzhade3/0bdde47bd08c8305fa528a878fcc24ef to your computer and use it in GitHub Desktop.
Location of all stores that sell Orca passes. Scraped from https://www.soundtransit.org/sites/default/files/documents/orca_retail_locations.pdf and put in a more viewable format (GeoJSON)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from geojson import Point, FeatureCollection | |
import geojson | |
def create_points(): | |
points = [] | |
f = open('undecoded_coords.txt', 'r') | |
for line in f.readlines(): | |
store_name, address, hours, coords = line.strip().split('|') | |
lat, lng = coords.split(',') | |
feature = Point((float(lng), float(lat))) | |
feature['properties'] = { | |
'name': store_name, | |
'address': address, | |
'hours': hours | |
} | |
points.append(feature) | |
f.close() | |
return points | |
def add_to_geojson(points): | |
existing_points = geojson.load(open('stores.json', 'r')).geometries | |
all_points = FeatureCollection(existing_points + points) | |
return all_points | |
def main(): | |
points = create_points() | |
collection = add_to_geojson(points) | |
with open('new_stores.json', 'w') as f: | |
geojson.dump(collection, f) | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from geopy.geocoders import Nominatim | |
from geojson import Point, FeatureCollection | |
import geojson | |
import time | |
def parse_raw(f_name): | |
f = open(f_name, 'r') | |
stores = [] | |
for line in f: | |
# Not a location | |
if '•' not in line: | |
# SE Seattle, NW Seattle etc. are not cities | |
if 'Seattle' in line: | |
city = 'Seattle' | |
else: | |
city = line.strip() | |
continue | |
loc_and_hour_string = line.strip() | |
loc_string, hours = loc_and_hour_string.split('•') | |
matches = re.match(r"([a-z\s]+)(.+)", loc_string, re.I) | |
store_name, loc = matches[1], matches[2] | |
address = loc.strip() + ', ' + city + ', WA' | |
# Get rid of the trailing comma | |
store_name, address, hours = store_name.strip(), address.strip(), hours.strip() | |
stores.append((store_name, address, hours)) | |
return stores | |
def get_coords(address): | |
geolocator = Nominatim(user_agent="orca_scraper") | |
return geolocator.geocode(address) | |
def convert_to_geojson(stores): | |
undecoded_stores = [] | |
decoded_stores = [] | |
for store in stores: | |
store_name, address, hours = store | |
location = get_coords(address) | |
if location is None: | |
undecoded_stores.append(store) | |
continue | |
lat, lon = location.latitude, location.longitude | |
feature = Point((lon, lat)) | |
feature['properties'] = { | |
'name': store_name, | |
'address': address, | |
'hours': hours | |
} | |
decoded_stores.append(feature) | |
time.sleep(1) # Don't overload the API with calls | |
return undecoded_stores, FeatureCollection(decoded_stores) | |
def main(): | |
FILENAME = 'orca_locations.txt' | |
stores = parse_raw(FILENAME) | |
undecoded_stores, decoded_stores = convert_to_geojson(stores) | |
with open('stores.json', 'w') as f: | |
f.write(geojson.dumps(decoded_stores)) | |
with open('undecoded_stores.txt', 'w') as f: | |
for store, address, hours in undecoded_stores: | |
f.write(f'{store}|{address}|{hours}\n') | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment