Skip to content

Instantly share code, notes, and snippets.

@perrygeo
Last active September 13, 2016 16:59
Show Gist options
  • Save perrygeo/60633de1b2cd90b07b3d to your computer and use it in GitHub Desktop.
Save perrygeo/60633de1b2cd90b07b3d to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import asyncio
import json
import random
import sys
import time
import requests
import click
import mapbox
geocoder = mapbox.Geocoder()
def _geocode(input):
"""Forward geocode the input; echo the most likely Feature
This is syncronous but can be run asychronously via loop.run_in_executor
"""
res = geocoder.forward(input.strip())
# todo, log stderr
if res.status_code == 200:
# todo, handle data below threshold
click.echo(json.dumps(res.geojson()['features'][0]))
else:
pass # todo, handle error
@asyncio.coroutine
def process_places(fh, loop, geocoder, delay=0.25):
while True:
data = yield from read_line(fh, loop)
if data is None:
break
click.echo("got data {}".format(data), err=True)
yield from loop.run_in_executor(None, _geocode, data)
click.echo("processed data {}".format(data), err=True)
yield from asyncio.sleep(delay)
@asyncio.coroutine
def read_line(fh, loop):
line = yield from loop.run_in_executor(None, fh.readline)
if len(line) == 0:
return None
return line.rstrip("\n")
@click.command()
@click.argument("places", default="-", type=click.File())
@click.option("--pool", default=8, type=int)
@click.option("--delay", default=0.1, type=float)
def main(places, pool, delay):
loop = asyncio.get_event_loop()
tasks = [asyncio.async(process_places(places, loop, delay))
for _ in range(pool)]
loop.run_until_complete(asyncio.wait(tasks))
if __name__ == '__main__':
main()
Portland, OR
Bend, OR
Salem, OR
Medford, OR
Arcata, CA
Redding, CA
Reno, NV
Santa Barbara, CA
Denver, CO
San Diego, CA
Las Vegas, NV
Bismark, ND
Lawrence, KS
Boston, MA
New York, NY
Baltimore, MD
@perrygeo
Copy link
Author

perrygeo commented Jan 2, 2016

$ batch_geocode.py < places.txt | fio collect | geojson-summary
got data Portland, OR
got data Bend, OR
got data Salem, OR
got data Medford, OR
got data Arcata, CA
got data Redding, CA
got data Reno, NV
got data Santa Barbara, CA
processed data Medford, OR
processed data Arcata, CA
processed data Bend, OR
processed data Portland, OR
processed data Salem, OR
processed data Redding, CA
processed data Reno, NV
processed data Santa Barbara, CA
got data Denver, CO
got data San Diego, CA
got data Las Vegas, NV
got data Bismark, ND
got data Lawrence, KS
processed data Las Vegas, NV
got data Boston, MA
processed data San Diego, CA
got data New York, NY
got data Baltimore, MD
processed data Denver, CO
processed data Bismark, ND
processed data Lawrence, KS
processed data Boston, MA
processed data New York, NY
processed data Baltimore, MD

16 points

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment