Created
June 6, 2020 07:53
-
-
Save ubershmekel/82d84c0834e90aeb8b1b46e42833fd03 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import glob | |
import os | |
unknown_location_acronym = 'tbd' | |
# https://gist.github.com/rogerallen/1583593 | |
us_state_to_abbrev = { | |
'Alabama': 'AL', | |
'Alaska': 'AK', | |
'American Samoa': 'AS', | |
'Arizona': 'AZ', | |
'Arkansas': 'AR', | |
'California': 'CA', | |
'Colorado': 'CO', | |
'Connecticut': 'CT', | |
'Delaware': 'DE', | |
'District of Columbia': 'DC', | |
'Florida': 'FL', | |
'Georgia': 'GA', | |
'Guam': 'GU', | |
'Hawaii': 'HI', | |
'Idaho': 'ID', | |
'Illinois': 'IL', | |
'Indiana': 'IN', | |
'Iowa': 'IA', | |
'Kansas': 'KS', | |
'Kentucky': 'KY', | |
'Louisiana': 'LA', | |
'Maine': 'ME', | |
'Maryland': 'MD', | |
'Massachusetts': 'MA', | |
'Michigan': 'MI', | |
'Minnesota': 'MN', | |
'Mississippi': 'MS', | |
'Missouri': 'MO', | |
'Montana': 'MT', | |
'Nebraska': 'NE', | |
'Nevada': 'NV', | |
'New Hampshire': 'NH', | |
'New Jersey': 'NJ', | |
'New Mexico': 'NM', | |
'New York': 'NY', | |
'North Carolina': 'NC', | |
'North Dakota': 'ND', | |
'Northern Mariana Islands':'MP', | |
'Ohio': 'OH', | |
'Oklahoma': 'OK', | |
'Oregon': 'OR', | |
'Pennsylvania': 'PA', | |
'Puerto Rico': 'PR', | |
'Rhode Island': 'RI', | |
'South Carolina': 'SC', | |
'South Dakota': 'SD', | |
'Tennessee': 'TN', | |
'Texas': 'TX', | |
'Utah': 'UT', | |
'Vermont': 'VT', | |
'Virgin Islands': 'VI', | |
'Virginia': 'VA', | |
'Washington': 'WA', | |
'Washington DC': 'DC', | |
'West Virginia': 'WV', | |
'Wisconsin': 'WI', | |
'Wyoming': 'WY', | |
'Unknown Location': unknown_location_acronym, | |
} | |
def gen_id_text(text, state_abbrev): | |
new_lines = [] | |
city = '' | |
city_abbrev = '' | |
if state_abbrev == unknown_location_acronym: | |
city_abbrev = unknown_location_acronym | |
if state_abbrev == 'dc': | |
city_abbrev = 'dc' | |
city_prefix = '## ' | |
city_index = 1 | |
for line in text.splitlines(): | |
line = line.strip() | |
if line.startswith(city_prefix): | |
city = line[len(city_prefix):].strip() | |
# transform the city name to a more id-friendly string | |
city_abbrev = city.replace(' ', '').replace('.', '').lower() | |
if '**Links**' in line: | |
# Links are starting, use this section to add an id | |
id_line = f'pb-id: {state_abbrev}-{city_abbrev}-{city_index}' | |
#print(id_line) | |
new_lines.append(id_line) | |
new_lines.append('') | |
city_index += 1 | |
elif 'link' in line.lower(): | |
print(f"Found a links in: {line} but did not process") | |
new_lines.append(line) | |
return '\n'.join(new_lines) | |
if __name__ == "__main__": | |
src_dir = os.path.relpath(os.path.dirname(__file__)) | |
md_dir = os.path.join(src_dir, '..', 'reports') | |
for md_file in glob.glob(md_dir + '/*.md'): | |
print(f"Reading '{os.path.basename(md_file)}'") | |
with open(md_file, 'rb') as fin: | |
fname = os.path.basename(md_file) | |
state_name = fname.replace('.md', '') | |
state_abbrev = us_state_to_abbrev[state_name].lower() | |
text = fin.read().decode('utf-8') | |
new_text = gen_id_text(text=text, state_abbrev=state_abbrev) | |
with open(md_file, 'wb') as fout: | |
fout.write(new_text.encode('utf-8')) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment