Created
May 1, 2023 14:33
-
-
Save TomAugspurger/4197f2849ee39b50aef61b4ee8575e3f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"id": "a5951d7e-39c0-4b93-86af-e0e7e6c09d19", | |
"metadata": {}, | |
"source": [ | |
"# Delta Table format for MS Buildings\n", | |
"\n", | |
"The MS buildings dataset contains building footprint polygons that are partitioned by Country and quadykey." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "b05831c5-1d63-4822-ae51-b1275d94870f", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"!pip install -U deltalake pyquadkey2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "8bee92e5-b95d-44bd-9c7d-fd2ab41a7fb7", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"import planetary_computer\n", | |
"import geopandas\n", | |
"import pandas as pd\n", | |
"import fiona\n", | |
"import deltalake" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "759cb87a-031a-4fdf-9115-fa6ee662a587", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 93.3 ms, sys: 74.6 ms, total: 168 ms\n", | |
"Wall time: 455 ms\n" | |
] | |
} | |
], | |
"source": [ | |
"sas_token = planetary_computer.sas.get_token(\"bingmlbuildings\", \"footprints\").token\n", | |
"storage_options = {\"account_name\": \"bingmlbuildings\", \"sas_token\": sas_token}\n", | |
"%time dt = deltalake.DeltaTable(\"abfs://footprints/delta/2023-04-25/ml-buildings.parquet/\", storage_options=storage_options)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "061efb7c-62b3-4fb6-99b2-d03d9ea619b8", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 5.17 ms, sys: 531 µs, total: 5.7 ms\n", | |
"Wall time: 5.65 ms\n" | |
] | |
} | |
], | |
"source": [ | |
"quadkeys = [122110222, 120333320]\n", | |
"\n", | |
"%time uris = dt.file_uris(partition_filters=[(\"RegionName\", \"=\", \"Turkey\"), (\"quadkey\", \"in\", quadkeys)])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "8c4d820f-9a35-47ab-ab00-d884925ac4a3", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 322 ms, sys: 66.8 ms, total: 389 ms\n", | |
"Wall time: 754 ms\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>geometry</th>\n", | |
" <th>meanHeight</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>POLYGON ((34.34346 36.66417, 34.34359 36.66420...</td>\n", | |
" <td>-1.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>POLYGON ((34.39339 37.06736, 34.39354 37.06730...</td>\n", | |
" <td>-1.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>POLYGON ((34.11536 36.88733, 34.11535 36.88741...</td>\n", | |
" <td>-1.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>POLYGON ((34.37202 36.64425, 34.37212 36.64428...</td>\n", | |
" <td>-1.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>POLYGON ((34.41499 36.83042, 34.41494 36.83053...</td>\n", | |
" <td>-1.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" geometry meanHeight\n", | |
"0 POLYGON ((34.34346 36.66417, 34.34359 36.66420... -1.0\n", | |
"1 POLYGON ((34.39339 37.06736, 34.39354 37.06730... -1.0\n", | |
"2 POLYGON ((34.11536 36.88733, 34.11535 36.88741... -1.0\n", | |
"3 POLYGON ((34.37202 36.64425, 34.37212 36.64428... -1.0\n", | |
"4 POLYGON ((34.41499 36.83042, 34.41494 36.83053... -1.0" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"import pandas as pd\n", | |
"import geopandas\n", | |
"\n", | |
"dfs = [pd.read_parquet(uri, storage_options=storage_options) for uri in uris]\n", | |
"df = pd.concat(dfs)\n", | |
"gdf = geopandas.GeoDataFrame(df, geometry=geopandas.GeoSeries.from_wkb(df.geometry), crs=\"WGS84\")\n", | |
"gdf.head()" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.9" | |
}, | |
"widgets": { | |
"application/vnd.jupyter.widget-state+json": { | |
"state": {}, | |
"version_major": 2, | |
"version_minor": 0 | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment