Skip to content

Instantly share code, notes, and snippets.

@ubershmekel
Created June 6, 2020 07:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ubershmekel/82d84c0834e90aeb8b1b46e42833fd03 to your computer and use it in GitHub Desktop.
Save ubershmekel/82d84c0834e90aeb8b1b46e42833fd03 to your computer and use it in GitHub Desktop.
import glob
import os
unknown_location_acronym = 'tbd'
# https://gist.github.com/rogerallen/1583593
us_state_to_abbrev = {
'Alabama': 'AL',
'Alaska': 'AK',
'American Samoa': 'AS',
'Arizona': 'AZ',
'Arkansas': 'AR',
'California': 'CA',
'Colorado': 'CO',
'Connecticut': 'CT',
'Delaware': 'DE',
'District of Columbia': 'DC',
'Florida': 'FL',
'Georgia': 'GA',
'Guam': 'GU',
'Hawaii': 'HI',
'Idaho': 'ID',
'Illinois': 'IL',
'Indiana': 'IN',
'Iowa': 'IA',
'Kansas': 'KS',
'Kentucky': 'KY',
'Louisiana': 'LA',
'Maine': 'ME',
'Maryland': 'MD',
'Massachusetts': 'MA',
'Michigan': 'MI',
'Minnesota': 'MN',
'Mississippi': 'MS',
'Missouri': 'MO',
'Montana': 'MT',
'Nebraska': 'NE',
'Nevada': 'NV',
'New Hampshire': 'NH',
'New Jersey': 'NJ',
'New Mexico': 'NM',
'New York': 'NY',
'North Carolina': 'NC',
'North Dakota': 'ND',
'Northern Mariana Islands':'MP',
'Ohio': 'OH',
'Oklahoma': 'OK',
'Oregon': 'OR',
'Pennsylvania': 'PA',
'Puerto Rico': 'PR',
'Rhode Island': 'RI',
'South Carolina': 'SC',
'South Dakota': 'SD',
'Tennessee': 'TN',
'Texas': 'TX',
'Utah': 'UT',
'Vermont': 'VT',
'Virgin Islands': 'VI',
'Virginia': 'VA',
'Washington': 'WA',
'Washington DC': 'DC',
'West Virginia': 'WV',
'Wisconsin': 'WI',
'Wyoming': 'WY',
'Unknown Location': unknown_location_acronym,
}
def gen_id_text(text, state_abbrev):
new_lines = []
city = ''
city_abbrev = ''
if state_abbrev == unknown_location_acronym:
city_abbrev = unknown_location_acronym
if state_abbrev == 'dc':
city_abbrev = 'dc'
city_prefix = '## '
city_index = 1
for line in text.splitlines():
line = line.strip()
if line.startswith(city_prefix):
city = line[len(city_prefix):].strip()
# transform the city name to a more id-friendly string
city_abbrev = city.replace(' ', '').replace('.', '').lower()
if '**Links**' in line:
# Links are starting, use this section to add an id
id_line = f'pb-id: {state_abbrev}-{city_abbrev}-{city_index}'
#print(id_line)
new_lines.append(id_line)
new_lines.append('')
city_index += 1
elif 'link' in line.lower():
print(f"Found a links in: {line} but did not process")
new_lines.append(line)
return '\n'.join(new_lines)
if __name__ == "__main__":
src_dir = os.path.relpath(os.path.dirname(__file__))
md_dir = os.path.join(src_dir, '..', 'reports')
for md_file in glob.glob(md_dir + '/*.md'):
print(f"Reading '{os.path.basename(md_file)}'")
with open(md_file, 'rb') as fin:
fname = os.path.basename(md_file)
state_name = fname.replace('.md', '')
state_abbrev = us_state_to_abbrev[state_name].lower()
text = fin.read().decode('utf-8')
new_text = gen_id_text(text=text, state_abbrev=state_abbrev)
with open(md_file, 'wb') as fout:
fout.write(new_text.encode('utf-8'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment