|
__doc__ = """ |
|
Quick Check for specific Period of AirNow |
|
========================================= |
|
|
|
This script shows how to get an estimate of monitor coverage using census |
|
population data and EPA's Remote Sensing Information Gateway and AirNow. |
|
|
|
To summarize: |
|
|
|
1. Query a time period for Ozone and PM measurements. |
|
2. Take unique locations that reported for any hour. |
|
3. Spatially intersect with county boundaries. |
|
4. Sum population with ozone, pm, both or neither measurements. |
|
|
|
Expected output |
|
--------------- |
|
|
|
If run as-is, you should get the following output. |
|
|
|
| | Pop [M] | County% | Pop% | |
|
|:---------------|----------:|----------:|-------:| |
|
| Ozone and PM25 | 210.8 | 15.3 | 62.8 | |
|
| Ozone Only | 26.7 | 7.3 | 8 | |
|
| PM25 Only | 24.9 | 6.1 | 7.4 | |
|
| Neither | 73.1 | 71.3 | 21.8 | |
|
|
|
|
|
You can modify the dates (bdate, edate) for period of interest to get similar output. |
|
|
|
Requirements |
|
------------ |
|
|
|
Requires Python3, numpy, pandas, requests, geopandas and pyrsig |
|
on most systems, these can be satisfied with the command |
|
|
|
python -m pip install numpy pandas requests geopandas pyrsig |
|
|
|
Requires US Census Tiger Line County Shapefile as a zip file. This can be |
|
downloaded at https://www2.census.gov/geo/tiger/TIGER2024/COUNTY/tl_2024_us_county.zip |
|
|
|
""" |
|
__version__ = '1.0.0' |
|
|
|
import numpy as np |
|
import pandas as pd |
|
import pyrsig |
|
import requests |
|
import geopandas as gpd |
|
|
|
bdate = '2024-07-01T00Z' |
|
edate = '2024-07-07T00Z' |
|
|
|
api = pyrsig.RsigApi() |
|
|
|
# Download Ozone and PM data for time period |
|
pmdf = api.to_dataframe('airnow.pm25', bdate=bdate, edate=edate) |
|
o3df = api.to_dataframe('airnow.ozone', bdate=bdate, edate=edate) |
|
|
|
|
|
cntydf = gpd.read_file('tl_2024_us_county.zip') |
|
# Get county total population estimate B01001_001E for all counties as a dataframe |
|
r = requests.get('https://api.census.gov/data/2023/acs/acs5?get=B01001_001E,GEO_ID&for=county') |
|
j = r.json() |
|
df = pd.DataFrame.from_records(j[1:], columns=j[0]) |
|
df['POP'] = df['B01001_001E'].astype('l') |
|
# Add population to county geodataframe |
|
cntydf = pd.merge(cntydf, df[['GEO_ID', 'POP']], left_on='GEOIDFQ', right_on='GEO_ID', how='left') |
|
|
|
# Create unique site locations for O3 and PM25 |
|
o3gdf = o3df.groupby(['LATITUDE(deg)', 'LONGITUDE(deg)', 'SITE_NAME'], as_index=False).first() |
|
o3gdf = gpd.GeoDataFrame(o3gdf, geometry=gpd.points_from_xy(o3gdf['LONGITUDE(deg)'], o3gdf['LATITUDE(deg)']), crs=4326).to_crs(cntydf.crs) |
|
pmgdf = pmdf.groupby(['LATITUDE(deg)', 'LONGITUDE(deg)', 'SITE_NAME'], as_index=False).first() |
|
pmgdf = gpd.GeoDataFrame(pmgdf, geometry=gpd.points_from_xy(pmgdf['LONGITUDE(deg)'], pmgdf['LATITUDE(deg)']), crs=4326).to_crs(cntydf.crs) |
|
|
|
# Identify which counties have ozone, pm, or both |
|
cntydf['haso3'] = cntydf.geometry.intersects(o3gdf.geometry.union_all()) |
|
cntydf['haspm'] = cntydf.geometry.intersects(pmgdf.geometry.union_all()) |
|
cntydf['hasboth'] = cntydf['haso3'] & cntydf['haspm'] |
|
|
|
# Store as a categorical variable |
|
cntydf['MON'] = 'Neither' |
|
cntydf.loc[cntydf.haso3, 'MON'] = 'Ozone Only' |
|
cntydf.loc[cntydf.haspm, 'MON'] = 'PM25 Only' |
|
cntydf.loc[cntydf.hasboth, 'MON'] = 'Ozone and PM25' |
|
|
|
# Create a national summary with millions of people, percent of people, and percent of county. |
|
popdf = cntydf.groupby('MON')[['POP']].agg(POP=('POP', 'sum'), CNT=('POP', 'count')) |
|
popdf['County%'] = popdf['CNT'] / popdf['CNT'].sum() * 100 |
|
popdf['Pop%'] = popdf['POP'] / popdf['POP'].sum() * 100 |
|
popdf['Pop [M]'] = popdf['POP'] / 1e6 |
|
popdf = popdf.loc[['Ozone and PM25', 'Ozone Only', 'PM25 Only', 'Neither']] |
|
popdf.index.name = None |
|
|
|
# Print Output as a Markdown |
|
print(popdf.round(1)[['Pop [M]', 'County%', 'Pop%']].to_markdown()) |
|
# save results to disk |
|
popdf.to_csv(f'censuscounties_{bdate}_{edate}.csv') |