Last active
August 29, 2015 14:13
-
-
Save jackmaney/4a98cfdfef61e1d5a097 to your computer and use it in GitHub Desktop.
A queue for geocoding addresses with geopy that respects a per-second rate limit
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from time import time | |
class RateLimitedGeocodingQueue(object): | |
def __init__(self, geocoder, addresses, per_sec_rate_limit=5): | |
self.geocoder = geocoder | |
self.addresses = [x for x in addresses] # Cloning list of addresses so we can pop with impunity | |
self.per_sec_rate_limit = per_sec_rate_limit | |
self.total_processed_this_sec = 0 | |
self.stopwatch_start = None | |
self.stopwatch_end = None | |
def time_this_interval(self): | |
""" | |
The amount of time that has passed between start and stop | |
""" | |
if self.stopwatch_start is None: | |
self.stopwatch_start = time() | |
return 0 | |
if self.stopwatch_end is None: | |
self.stopwatch_end = time() | |
return self.stopwatch_end - self.stopwatch_start | |
def restart_interval_timer(self): | |
""" | |
Reset the stopwatch... | |
""" | |
self.stopwatch_start = time() | |
self.stopwatch_end = self.stopwatch_start | |
self.total_processed_this_sec = 0 | |
def can_process(self): | |
""" | |
Determines whether or not we have to wait in order to not exceed the rate limit | |
""" | |
if self.time_this_interval() > 1: | |
self.restart_interval_timer() | |
return True | |
return self.total_processed_this_sec < self.per_sec_rate_limit | |
def process(self): | |
""" | |
Geocodes the list of addresses, respecting the rate limit. | |
Returns a list of geopy.location.Location objects (or Nones). | |
""" | |
result = [] | |
self.stopwatch_start = time() | |
while self.addresses: | |
self.stopwatch_end = time() | |
if self.can_process(): | |
address = self.addresses.pop(0) | |
# TODO: Add a bit of error validation (try/except, etc) | |
result.append(self.geocoder.geocode(address)) | |
self.total_processed_this_sec += 1 | |
return result | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment