Last active
January 15, 2018 15:10
-
-
Save leifulstrup/3c661f427bf987866c76ec7035bd1141 to your computer and use it in GitHub Desktop.
getNAICSdescription(naicsCode) #using Code For America Server
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# http://naics.codeforamerica.org #includes details on queries | |
#!!! missing NAICS codes - key errors | |
# see https://www.census.gov/eos/www/naics/ | |
# get 2017 updated xml NAICS code listing here - https://www.census.gov/eos/www/naics/2017NAICS/2017_NAICS_Structure.xlsx | |
import requests | |
from bs4 import BeautifulSoup | |
import re | |
import ast | |
#!!! need to add ast error handling - see below | |
#use dynamic programming to build up internal lookup to avoid expensive web queries | |
naicsDescriptionLookup = {} | |
def getNAICSdescription(naicsCode): | |
try: | |
naicsDescriptionLookup[naicsCode] | |
except KeyError: | |
year = 2012 # last year Code For America has | |
queryTemplate = 'http://naics.codeforamerica.org/v0/q?year={year!s}&code={naicsCode}' | |
query = queryTemplate.format(year = year, naicsCode = naicsCode) | |
#print(query) | |
page = requests.get(query) | |
content = ast.literal_eval(page.content.decode("utf-8")) #!!! need to add ast error handling | |
naicsDescriptionLookup[naicsCode] = content['title'] | |
#print(content['title']) | |
#print(content['description']) | |
#print(content['index']) | |
#assert type(content) is dict | |
return naicsDescriptionLookup[naicsCode] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment