Skip to content

Instantly share code, notes, and snippets.

@afnanenayet
Last active February 14, 2022 04:27
Show Gist options
  • Save afnanenayet/1da89b4dd2d1517a9cbbfaace6f97732 to your computer and use it in GitHub Desktop.
Save afnanenayet/1da89b4dd2d1517a9cbbfaace6f97732 to your computer and use it in GitHub Desktop.
Add FIPS data to a CSV
"""A script to augment a CSV with FIPS data.
"""
import requests
import requests_futures
import pandas as pd
from typing import cast, Dict, Any
import pdb
import numpy as np
from loguru import logger
import json
from requests_futures.sessions import FuturesSession
from concurrent.futures import as_completed
results = []
READABLE_FILE_PARAMS = {
"file_okay": True,
"dir_okay": False,
"path_type": "Path",
}
def get_fips_request(row) -> Dict[str, Any]:
# Get response from API
payload = {
"latitude": row["lat_tract"],
"longitude": row["long_tract"],
"format": "json",
}
return payload
def get_fips_data(row) -> str:
"""Get FIPS data for a row in the dataframe column.
We constrain this operation to a method so we can map over each row in the
DataFrame.
Args:
row: The row to retrieve FIPS information for.
Returns:
The FIPS string for the given row.
"""
url = "https://geo.fcc.gov/api/census/block/find"
# Get response from API
payload = {
"latitude": row["lat_tract"],
"longitude": row["long_tract"],
"format": "json",
}
try:
response = requests.get(url, params=payload)
# Parse json in response
data = response.json()
if not response.ok:
return "error"
return data["County"]["FIPS"]
except Exception:
print(f"Got an error trying to get FIPS code for lat: {row.lat_tract} lon: {row.long_tract}")
return "error"
def augment_fips() -> None:
"""Augment a CSV with FIPS information.
This script will read from INPUT_FNAME and write the augmented CSV to OUTPUT_FNAME.
"""
input_fname = "urbanization-census-tract.csv"
output_fname = "urbanization-census-tract-updated.csv"
df = cast(pd.DataFrame, pd.read_csv(input_fname))
df["lat_tract"] = df["lat_tract"].astype(float)
df["long_tract"] = df["long_tract"].astype(float)
logger.info("Loaded DataFrame")
session = FuturesSession()
# Map over each row to get FIPS data.
payloads = df.apply(get_fips_request, axis=1)
url = "https://geo.fcc.gov/api/census/block/find"
futures = [session.get(url, params=payload) for payload in payloads]
for i in range(len(futures)):
futures[i].i = i
logger.info("Generated request queue")
all_resps = [None] * len(payloads)
fips_codes = []
for f in as_completed(futures):
resp = f.result()
all_resps[f.i] = resp
for resp in all_resps:
if resp is None or not resp.ok:
logger.error("Response not OK")
fips_codes.append("error")
continue
try:
data = resp.json()
fips_codes.append(data["County"]["FIPS"])
continue
except Exception as e:
logger.error(f"Error decoding response: {e}")
fips_codes.append("error")
continue
pdb.set_trace()
df["FIPS"] = np.array(fips_codes)
df.to_csv(output_fname)
if __name__ == "__main__":
augment_fips()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment