Skip to content

Instantly share code, notes, and snippets.

@kk7ds
Last active November 30, 2022 01:04
Show Gist options
  • Star 16 You must be signed in to star a gist
  • Fork 3 You must be signed in to fork a gist
  • Save kk7ds/e47b50bd80c405dcdfb9cf44c1448137 to your computer and use it in GitHub Desktop.
Save kk7ds/e47b50bd80c405dcdfb9cf44c1448137 to your computer and use it in GitHub Desktop.
#!/usr/bin/python3 -u
#
# Copyright Dan Smith <dsmith+ustopo@danplanet.com>
#
# This downloads USGS topo GeoPDF maps in bulk, by default the current
# high-resolution 7.5-minute maps. For reference, all of Oregon comes to
# 96GB, all of Washington is 78G. For each state, the index will be downloaded
# into the root of the per-state directory, which is how you find which
# section map you need, by name.
#
# To use this, you need python3 and python3-requests. On debian or ubuntu,
#
# sudo apt-get install python3-requests
#
# Should be enough. Anything else, probably:
#
# pip install requests
#
# Run this with -h to get some help on how to use this, where to get a
# CSV map dump, etc.
#
# Super quick way to get a region (use your own lat/lon):
#
# $ curl -O http://thor-f5.er.usgs.gov/ngtoc/metadata/misc/topomaps_all.zip
# $ unzip topomaps_all.zip
# $ unzip ustopo_current.zip
# $ python3 ustopo_fetch.py --center 45.5,-122.0 --radius 100 ustopo_current.csv
#
#
import argparse
import csv
import functools
import math
import os
import sys
import textwrap
import requests
MiB = 1024 * 1024
INDEX_BASE = 'https://store.usgs.gov/assets/MOD/StoreFiles/zoom/pdf/'
TYPES = ('7.5', '15', '30')
INDEX_OVERRIDES = {
'MA': 'MA-RI-CT',
'RI': 'MA-RI-CT',
'CT': 'MA-RI-CT',
'MD': 'MD-DE-DC',
'DE': 'MD-DE-DC',
'DC': 'MD-DE-DC',
'NH': 'NH-VT',
'VT': 'NH-VT',
}
def tempfile(fn):
return '%s.part' % fn
def purge_temp_on_error(func):
@functools.wraps(func)
def inner(url, fn):
try:
return func(url, fn)
except:
tempfn = tempfile(fn)
if os.path.exists(tempfn):
print('Removing partial file %s' % tempfn)
os.remove(tempfn)
raise
return inner
@purge_temp_on_error
def download_file(url, fn):
tmpfn = tempfile(fn)
with requests.get(url, stream=True) as r:
if r.status_code == 200:
size = 0
with open(tmpfn, 'wb') as outf:
for chunk in r.iter_content(chunk_size=65536):
if os.isatty(1) and (size == 0 or size > MiB):
sys.stdout.write(
'Writing %s...%i MiB\r' % (fn, size / MiB))
sys.stdout.flush()
outf.write(chunk)
size += len(chunk)
os.rename(tmpfn, fn)
if os.isatty(1):
sys.stdout.write('%s\r' % (' ' * 80))
sys.stdout.flush()
print('%s %i MiB done' % (fn, size / (1024*1024)))
return True
else:
print('Failed to fetch %s' % url)
return False
def great_circle_distance(latlong_a, latlong_b):
"""
>>> coord_pairs = [
... # between eighth and 31st and eighth and 30th
... [(40.750307,-73.994819), (40.749641,-73.99527)],
... # sanfran to NYC ~2568 miles
... [(37.784750,-122.421180), (40.714585,-74.007202)],
... # about 10 feet apart
... [(40.714732,-74.008091), (40.714753,-74.008074)],
... # inches apart
... [(40.754850,-73.975560), (40.754851,-73.975561)],
... ]
>>> for pair in coord_pairs:
... great_circle_distance(pair[0], pair[1]) # doctest: +ELLIPSIS
83.325362855055...
4133342.6554530...
2.7426970360283...
0.1396525521278...
"""
EARTH_RADIUS = 6378137 # earth radius in meters
lat1, lon1 = latlong_a
lat2, lon2 = latlong_b
dLat = math.radians(lat2 - lat1)
dLon = math.radians(lon2 - lon1)
a = (math.sin(dLat / 2) * math.sin(dLat / 2) +
math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) *
math.sin(dLon / 2) * math.sin(dLon / 2))
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
d = EARTH_RADIUS * c
# Return in miles
return d * 0.000621371
def should_get_tile(center, radius, n_lat, s_lat, e_lon, w_lon):
ne_corner = (n_lat, e_lon)
sw_corner = (s_lat, w_lon)
nw_corner = (n_lat, w_lon)
se_corner = (n_lat, e_lon)
ne_dist = great_circle_distance(center, ne_corner)
sw_dist = great_circle_distance(center, sw_corner)
nw_dist = great_circle_distance(center, nw_corner)
se_dist = great_circle_distance(center, se_corner)
closest_corner = min(ne_dist, sw_dist, nw_dist, se_dist)
if closest_corner <= radius:
return True
def process_csv(args):
f = csv.reader(open(args.csv_file))
header = next(f)
# The first format is the mapviewer subset; the second is the
# full database dump
header_options = ('downloadURL', 'Download Product S3', 'product_url')
extent_options = ('extent', 'Grid Size', 'grid_size')
type_options = ('datasets', 'Version', 'edition')
url_index = None
for i, header_option in enumerate(header_options):
try:
url_index = header.index(header_option)
extent_index = header.index(extent_options[i])
type_index = header.index(type_options[i])
break
except (ValueError, IndexError):
pass
if url_index is None:
print('Unknown CSV format. The USGS has changed their format multiple')
print('times in the past, so this may not be your fault. Comment here:')
print('https://gist.github.com/kk7ds/e47b50bd80c405dcdfb9cf44c1448137')
print('to let me know.')
return False
if not os.path.isdir(args.dest):
os.mkdir(args.dest)
if args.center and 'N Lat' not in header:
print('--center is compatible only with a full dump CSV '
'(i.e. topomaps_all.csv)')
return False
if args.center:
try:
lat, lon = args.center.split(',')
center = (float(lat), float(lon))
except ValueError:
print('Invalid --center=%r ; '
'use something like 45.125,-122.876' % (
args.center))
return False
else:
center = None
warnings = []
for row in f:
if args.explain_headers:
for k, v in dict(zip(header, row)).items():
print('%s = %s' % (k, v))
return
try:
url = row[url_index]
extent = row[extent_index]
type_ = row[type_index]
except IndexError:
# Empty line, likely at the end of the file
continue
if args.type not in type_:
if type_ not in warnings:
print('Skipping map types %s' % type_)
warnings.append(type_)
continue
if '%s minute' % args.extent not in extent.lower():
if extent not in warnings:
print('Skipping map extents %s' % extent)
warnings.append(extent)
continue
basefn = os.path.basename(url)
state = basefn.split('_')[0]
if args.state and args.state.upper() != state:
if state not in warnings:
print('Skipping maps for state %s' % state)
warnings.append(state)
continue
if center:
try:
n_lat = float(row[header.index('N Lat')])
s_lat = float(row[header.index('S Lat')])
e_lon = float(row[header.index('E Long')])
w_lon = float(row[header.index('W Long')])
except ValueError as e:
print('CSV file has unparseable Lat/Lon bounding: %s' % e)
return False
if not should_get_tile(center, args.radius,
n_lat, s_lat, e_lon, w_lon):
if 'bounding' not in warnings:
print('Skipping maps outside %i mi bounding box' % (
args.radius))
warnings.append('bounding')
continue
if not os.path.isdir(os.path.join(args.dest, state)):
os.mkdir(os.path.join(args.dest, state))
state_for_index = INDEX_OVERRIDES.get(state, state)
indexfn = os.path.join(args.dest, state, 'index-%s.pdf' % state)
if not os.path.exists(indexfn) and not args.no_index:
download_file('%s/%s.pdf' % (INDEX_BASE, state_for_index), indexfn)
fn = os.path.join(args.dest, state, basefn)
if os.path.exists(fn):
print('Skipping existing %s' % fn)
continue
download_file(url, fn)
def main():
parser = argparse.ArgumentParser(
formatter_class=argparse.RawTextHelpFormatter,
description=textwrap.dedent("""
This is a bulk downloader for USGS 7.5 minute Topographical maps in
GeoPDF format. It is intentionally verbose, and is designed to be
run interactively, providing information about what it is doing along
the way. At a minimum, you need to provide it with a CSV file of map
information (see below) for it to process. By default, the results
will be stored in pdf/$state, along with the map index.
"""),
epilog=textwrap.dedent("""
You can get a CSV file to feed this in one of two ways.
First (and recommended), you can download the entire database dump,
by getting the zip file here:
http://thor-f5.er.usgs.gov/ngtoc/metadata/misc/topomaps_all.zip
Unzip this file and you will find a number of ZIP files inside. You
likely want the 'ustopo_current.zip'. Unzip that as well and use the
'ustopo_current.csv' file inside.
Second, you can query a subset of the database visually by going here:
https://viewer.nationalmap.gov/basic/?basemap=b1&category=histtopo%2Custopo
Zoom and pan the map to a region, click "Find Products" and then
"Save as CSV" after results are displayed.
By default, every map in the provided CSV file will be fetched. To
limit to just one state, use "--state XX" with a two-letter
abbreviation. To limit to an area, provide a lat/lon coordinate
via --center and adjust --radius to control how far around that
center point we go for maps (note the radius defines a square, not
a circle).
"""))
parser.add_argument('csv_file',
help='Path to a CSV file of map data')
parser.add_argument('--dest', default='pdf',
help='Where to save the PDF files (default: "pdf/")')
parser.add_argument('--state', default=None,
help='Only download maps for this state '
'(two-letter abbreviation; ex: "WA")')
parser.add_argument('--type', default='Current',
choices=('Current', 'Historical'),
help='Type of map to get (default: Current')
parser.add_argument('--extent', default=TYPES[0],
choices=TYPES,
help='Type of map to get '
'default: %s)' % TYPES[0])
parser.add_argument('--center', default=None,
help=('Only download maps around this center lat/lon '
'(example: 45.125,-121.876)'))
parser.add_argument('--radius', default=100, type=int,
help=('Radius (in miles) around center to fetch '
'(default: 100'))
parser.add_argument('--no-index', action='store_true',
help='Do not download state indexes')
parser.add_argument('--explain_headers', action='store_true',
help='For debugging only: explain headers')
args = parser.parse_args()
try:
return 1 if process_csv(args) else 0
except KeyboardInterrupt:
return 2
if __name__ == '__main__':
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment