Skip to content

Instantly share code, notes, and snippets.

@BinarySpoon
Last active October 23, 2020 10:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save BinarySpoon/ecd87e3460e2088ae415b1b9c8efc937 to your computer and use it in GitHub Desktop.
Save BinarySpoon/ecd87e3460e2088ae415b1b9c8efc937 to your computer and use it in GitHub Desktop.
def get_list_of_university_towns():
# Open and read files -->
file = open('university_towns.txt',"r")
lines = file.readlines()
file.close()
#remove empty lines -->
new_lines = []
for line in lines:
if not re.match(r'^\s$',line):
new_lines.append(line)
lines = new_lines.copy()
#Strip white space at beginning and end of each line -->
for index, line in enumerate(lines):
lines[index] = line.strip()
#Loop through lines to form a dataframe -->
data = pd.DataFrame(columns=('State','RegionName'))
i = 0
state_string = ""
region_string = ""
for line in lines:
if '[edit]' in line:
state_string = line.replace('[edit]',"")
else:
region_string = re.sub(r' \(.*',"", line) # if it begins with (, \( extract inside replace with ""
data.loc[i] = [state_string,region_string]
i+=1
return data
get_list_of_university_towns()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment