Skip to content

Instantly share code, notes, and snippets.

@dbreunig
Last active December 11, 2023 13:06
Show Gist options
  • Save dbreunig/ce692c3bb7a6f03188d9f2a1637a8729 to your computer and use it in GitHub Desktop.
Save dbreunig/ce692c3bb7a6f03188d9f2a1637a8729 to your computer and use it in GitHub Desktop.
A (very) simple python CLI to download Overture Places data to a sqlite3 db, given a bounding box.
import click
import duckdb
import sqlite_utils
# Set up click
@click.command()
@click.option("--minx", default=-122.347183)
@click.option("--maxx", default=-122.218437)
@click.option("--miny", default=37.748729)
@click.option("--maxy", default=37.800290)
def load(minx, maxx, miny, maxy):
# Remove the tmp file if it exists
if os.path.exists("/tmp/overture.db"):
os.remove("/tmp/overture.db")
# Set up a temp duckdb database
db = duckdb.connect("/tmp/overture.db")
db.sql("""
INSTALL spatial;
INSTALL httpfs;
LOAD spatial;
LOAD httpfs;
SET s3_region='us-west-2';
""")
# Create a table and download the Overture data into it,
# bounded by the given parameters
db.sql(f"""
CREATE TABLE places AS
SELECT *
FROM read_parquet('s3://overturemaps-us-west-2/release/2023-07-26-alpha.0/theme=places/type=*/*')
WHERE
bbox.minx > {minx}
and bbox.maxx < {maxx}
and bbox.miny > {miny}
and bbox.maxy < {maxy}
""")
# Move the data into a SQLite database
cursor = db.execute("SELECT * FROM places")
rows = cursor.fetchall()
columns = [desc[0] for desc in cursor.description]
dicts = [dict(zip(columns, row)) for row in rows]
overture = sqlite_utils.Database("overture.db")
overture["places"].insert_all(dicts, pk="id", replace=True)
# Get the count
places_loaded = overture.execute("SELECT COUNT(*) FROM places").fetchone()[0]
print("Loaded {} places".format(places_loaded))
if __name__ == "__main__":
load()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment