Skip to content

Instantly share code, notes, and snippets.

@r-leyshon
Created December 15, 2023 10:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save r-leyshon/222dd4039143398ad9574fba83c32752 to your computer and use it in GitHub Desktop.
Save r-leyshon/222dd4039143398ad9574fba83c32752 to your computer and use it in GitHub Desktop.
Ingest all UK LSOA21 boundaries from ONS Geoportal
import requests
import geopandas as gpd
import pandas as pd
ENDPOINT = "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/"
"Lower_layer_Super_Output_Areas_2021_EW_BFC_V8/FeatureServer/0/query"
params = {
"where": "1=1", # SQL clauses can go here
"outSR": 4326, # CRS that you want
"f": "geoJSON", # response format
"resultOffset": 0, # parameter used for pagination later
}
def request_to_gdf(url:str, query_params:dict) -> gpd.GeoDataFrame:
"""Send a get request to ArcGIS API & Convert to GeoDataFrame.
Only works when asking for features and GeoJSON format.
Parameters
----------
url : str
The url endpoint.
query_params : dict
A dictionary of query parameter : value pairs.
Returns
-------
requests.response
The response from ArcGIS API server. Useful for paginated requests
later.
gpd.GeoDataFrame
A GeoDataFrame of the requested geometries in the crs specified by the
response metadata.
Raises
------
requests.exceptions.RequestException
The response was not ok.
"""
# this approach will only work with geoJSON
query_params["f"] = "geoJSON"
# get the response
response = requests.get(url, params=query_params)
if response.ok:
# good response (hopefully, but be careful for JSONDecodeError)
content = response.json()
return (
response, # we'll need the response again later for pagination
gpd.GeoDataFrame.from_features(
content["features"],
crs=content["crs"]["properties"]["name"]
# safest to get crs from response
))
else:
# cases where a traditional bad response may be returned
raise requests.RequestException(
f"HTTP Code: {response.status_code}, Status: {response.reason}"
)
offset = len(gdf) # number of records to offset by
all_lsoas = gdf # we can append our growing gdf of LSOA boundaries to this
while more_pages:
params["resultOffset"] += offset # increment the records to ingest
response, gdf = request_to_gdf(ENDPOINT, params)
content = response.json()
all_lsoas = pd.concat([all_lsoas, gdf])
try:
more_pages = content["properties"]["exceededTransferLimit"]
except KeyError:
# rather than exceededTransferLimit = False, it disappears...
more_pages = False
all_lsoas = all_lsoas.reset_index(drop=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment