Skip to content

Instantly share code, notes, and snippets.

@martinfleis
Created March 28, 2022 16:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save martinfleis/7f68536a0fe6f3f30f1b834484d9cbab to your computer and use it in GitHub Desktop.
Save martinfleis/7f68536a0fe6f3f30f1b834484d9cbab to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"import geopandas\n",
"import pandas\n",
"import numpy\n",
"\n",
"from tobler.area_weighted import area_interpolate, _area_tables_binning\n",
"from libpysal.examples import load_example\n",
"\n",
"from geopandas.testing import assert_geodataframe_equal"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Example not available: Charleston1\n",
"Example not downloaded: Chicago parcels\n",
"Example not downloaded: Chile Migration\n",
"Example not downloaded: Spirals\n"
]
}
],
"source": [
"c1 = load_example('Charleston1')\n",
"c2 = load_example('Charleston2')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"crs = 6569\n",
"tracts = geopandas.read_file(c1.get_path('sc_final_census2.shp')).to_crs(crs)\n",
"zip_codes = geopandas.read_file(c2.get_path('CharlestonMSA2.shp')).to_crs(crs)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# create larger dfs for a better benchmark\n",
"tracts = pandas.concat([tracts] * 20)\n",
"zip_codes = pandas.concat([zip_codes] * 20)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"tracts['pct_poverty'] = tracts.POV_POP/tracts.POV_TOT\n",
"tracts['cat'] = numpy.random.randint(0, 10, len(tracts))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# create DOK matrix\n",
"table = _area_tables_binning(tracts, zip_codes, \"auto\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<2340x840 sparse matrix of type '<class 'numpy.float32'>'\n",
"\twith 136800 stored elements in Dictionary Of Keys format>"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"table"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"DOK matrix"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"679 ms ± 55.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%%timeit\n",
"results = area_interpolate(\n",
" source_df=tracts, \n",
" target_df=zip_codes, \n",
" intensive_variables=['pct_poverty'], \n",
" extensive_variables=['EMP_MALE'],\n",
" categorical_variables=['cat'],\n",
" table=table\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"CSR matrix"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"csr = table.tocsr()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"14.1 ms ± 182 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"results_csr = area_interpolate(\n",
" source_df=tracts, \n",
" target_df=zip_codes, \n",
" intensive_variables=['pct_poverty'], \n",
" extensive_variables=['EMP_MALE'],\n",
" categorical_variables=['cat'],\n",
" table=csr\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"assert_geodataframe_equal(results, results_csr)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"CSC matrix"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"csc = table.tocsc()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"15.9 ms ± 74.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"results_csc = area_interpolate(\n",
" source_df=tracts, \n",
" target_df=zip_codes, \n",
" intensive_variables=['pct_poverty'], \n",
" extensive_variables=['EMP_MALE'],\n",
" categorical_variables=['cat'],\n",
" table=csc\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"LIL matrix"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"lil = table.tolil()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"35.3 ms ± 556 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"results_lil = area_interpolate(\n",
" source_df=tracts, \n",
" target_df=zip_codes, \n",
" intensive_variables=['pct_poverty'], \n",
" extensive_variables=['EMP_MALE'],\n",
" categorical_variables=['cat'],\n",
" table=lil\n",
")"
]
}
],
"metadata": {
"interpreter": {
"hash": "8d1b2c984ad473d756980598d6fae8279815dc9c89a9d51a262cfb04eba7ee8f"
},
"kernelspec": {
"display_name": "Python 3.9.7 ('geo_dev')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment