Skip to content

Instantly share code, notes, and snippets.

@leifulstrup
Last active January 15, 2018 15:10
Show Gist options
  • Save leifulstrup/3c661f427bf987866c76ec7035bd1141 to your computer and use it in GitHub Desktop.
Save leifulstrup/3c661f427bf987866c76ec7035bd1141 to your computer and use it in GitHub Desktop.
getNAICSdescription(naicsCode) #using Code For America Server
# http://naics.codeforamerica.org #includes details on queries
#!!! missing NAICS codes - key errors
# see https://www.census.gov/eos/www/naics/
# get 2017 updated xml NAICS code listing here - https://www.census.gov/eos/www/naics/2017NAICS/2017_NAICS_Structure.xlsx
import requests
from bs4 import BeautifulSoup
import re
import ast
#!!! need to add ast error handling - see below
#use dynamic programming to build up internal lookup to avoid expensive web queries
naicsDescriptionLookup = {}
def getNAICSdescription(naicsCode):
try:
naicsDescriptionLookup[naicsCode]
except KeyError:
year = 2012 # last year Code For America has
queryTemplate = 'http://naics.codeforamerica.org/v0/q?year={year!s}&code={naicsCode}'
query = queryTemplate.format(year = year, naicsCode = naicsCode)
#print(query)
page = requests.get(query)
content = ast.literal_eval(page.content.decode("utf-8")) #!!! need to add ast error handling
naicsDescriptionLookup[naicsCode] = content['title']
#print(content['title'])
#print(content['description'])
#print(content['index'])
#assert type(content) is dict
return naicsDescriptionLookup[naicsCode]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment