Created
March 28, 2022 16:26
-
-
Save martinfleis/7f68536a0fe6f3f30f1b834484d9cbab to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import geopandas\n", | |
"import pandas\n", | |
"import numpy\n", | |
"\n", | |
"from tobler.area_weighted import area_interpolate, _area_tables_binning\n", | |
"from libpysal.examples import load_example\n", | |
"\n", | |
"from geopandas.testing import assert_geodataframe_equal" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Example not available: Charleston1\n", | |
"Example not downloaded: Chicago parcels\n", | |
"Example not downloaded: Chile Migration\n", | |
"Example not downloaded: Spirals\n" | |
] | |
} | |
], | |
"source": [ | |
"c1 = load_example('Charleston1')\n", | |
"c2 = load_example('Charleston2')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"crs = 6569\n", | |
"tracts = geopandas.read_file(c1.get_path('sc_final_census2.shp')).to_crs(crs)\n", | |
"zip_codes = geopandas.read_file(c2.get_path('CharlestonMSA2.shp')).to_crs(crs)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# create larger dfs for a better benchmark\n", | |
"tracts = pandas.concat([tracts] * 20)\n", | |
"zip_codes = pandas.concat([zip_codes] * 20)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"tracts['pct_poverty'] = tracts.POV_POP/tracts.POV_TOT\n", | |
"tracts['cat'] = numpy.random.randint(0, 10, len(tracts))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# create DOK matrix\n", | |
"table = _area_tables_binning(tracts, zip_codes, \"auto\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<2340x840 sparse matrix of type '<class 'numpy.float32'>'\n", | |
"\twith 136800 stored elements in Dictionary Of Keys format>" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"table" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"DOK matrix" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"679 ms ± 55.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"results = area_interpolate(\n", | |
" source_df=tracts, \n", | |
" target_df=zip_codes, \n", | |
" intensive_variables=['pct_poverty'], \n", | |
" extensive_variables=['EMP_MALE'],\n", | |
" categorical_variables=['cat'],\n", | |
" table=table\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"CSR matrix" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"csr = table.tocsr()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"14.1 ms ± 182 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"results_csr = area_interpolate(\n", | |
" source_df=tracts, \n", | |
" target_df=zip_codes, \n", | |
" intensive_variables=['pct_poverty'], \n", | |
" extensive_variables=['EMP_MALE'],\n", | |
" categorical_variables=['cat'],\n", | |
" table=csr\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"assert_geodataframe_equal(results, results_csr)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"CSC matrix" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"csc = table.tocsc()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"15.9 ms ± 74.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"results_csc = area_interpolate(\n", | |
" source_df=tracts, \n", | |
" target_df=zip_codes, \n", | |
" intensive_variables=['pct_poverty'], \n", | |
" extensive_variables=['EMP_MALE'],\n", | |
" categorical_variables=['cat'],\n", | |
" table=csc\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"LIL matrix" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"lil = table.tolil()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"35.3 ms ± 556 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | |
] | |
} | |
], | |
"source": [ | |
"%%timeit\n", | |
"results_lil = area_interpolate(\n", | |
" source_df=tracts, \n", | |
" target_df=zip_codes, \n", | |
" intensive_variables=['pct_poverty'], \n", | |
" extensive_variables=['EMP_MALE'],\n", | |
" categorical_variables=['cat'],\n", | |
" table=lil\n", | |
")" | |
] | |
} | |
], | |
"metadata": { | |
"interpreter": { | |
"hash": "8d1b2c984ad473d756980598d6fae8279815dc9c89a9d51a262cfb04eba7ee8f" | |
}, | |
"kernelspec": { | |
"display_name": "Python 3.9.7 ('geo_dev')", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.7" | |
}, | |
"orig_nbformat": 4 | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment