Skip to content

Instantly share code, notes, and snippets.

@ptpt
Created September 5, 2019 16:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ptpt/b106fee8253b4c78c148e7a57192f159 to your computer and use it in GitHub Desktop.
Save ptpt/b106fee8253b4c78c148e7a57192f159 to your computer and use it in GitHub Desktop.
A command line tool used to download Mapillary data via Mapillary API v3
#!/usr/bin/env python3
"""A command line tool used to download Mapillary data via Mapillary API v3 https://www.mapillary.com/developer/api-documentation/.
This tool fetches data and prints them line by line (http://ndjson.org/), and it handles pagination automatically.
Usage:
mapillary_download.py URL [HEADER...]
Examples:
Download map images in the United States:
mapillary_download.py "https://a.mapillary.com/v3/images?iso_countries=US&per_page=10000&client_id=YOUR_CLIENT"
Download image detections in a bounding box:
mapillary_download.py "https://a.mapillary.com/v3/map_features?layers=segmentations&bbox=13,13,14,14&client_id=YOUR_CLIENT" "Authorization: Bearer AUTH_TOKEN"
"""
from __future__ import print_function
import sys
import json
import logging
import requests
LOG = logging.getLogger()
handler = logging.StreamHandler(sys.stderr)
handler.setLevel(logging.INFO)
handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)-6s - %(message)s'))
LOG.addHandler(handler)
LOG.setLevel(logging.INFO)
def iteratorize(body):
if isinstance(body, list):
for b in body:
yield b
elif isinstance(body, dict):
features = body.get('features')
if isinstance(features, list) and body.get('type') == 'FeatureCollection':
for f in features:
yield f
else:
yield body
else:
yield body
def stringify_response(resp):
lines = []
lines.append('{0} {1}'.format(resp.status_code, resp.reason))
ctype = resp.headers.get('content-type')
lines.append('Content-Type: {0}'.format(ctype))
lines.append(resp.text)
return '\n'.join(lines)
def download(url, headers=None):
"""
Fetch and iterate responses by sending the initial url request.
"""
if headers is None:
headers = {}
headers.setdefault('accept', 'application/json')
while True:
LOG.info('GET %s', url)
resp = requests.get(url, headers=headers, allow_redirects=True)
if 200 <= resp.status_code < 300:
try:
body = resp.json()
except json.JSONDecodeError as err:
LOG.error('Error JSON parsing response: %s', stringify_response(resp))
raise err
for item in iteratorize(body):
yield item
next_link = resp.links.get('next')
if next_link is None:
break
url = next_link['url']
elif 500 < resp.status_code:
LOG.warning('Retrying on response: %s', stringify_response(resp))
else:
LOG.error('Error response: %s', stringify_response(resp))
resp.raise_for_status()
if __name__ == '__main__':
if len(sys.argv) < 2:
print('usage: mapillary_download URL [HEADER...]', file=sys.stderr)
sys.exit(1)
url = sys.argv[1]
# parse headers
headers = {}
for pair in sys.argv[1:]:
try:
header, value = pair.split(':', 1)
except ValueError:
print('Invalid header {0}'.format(pair), file=sys.stderr)
sys.exit(1)
header = header.strip()
value = value.strip()
headers[header.lower()] = value
count = 0
for item in download(url, headers=headers):
print(json.dumps(item))
count += 1
logging.info('Got %s items', count)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment