Skip to content

Instantly share code, notes, and snippets.

@chapter09
Created June 6, 2017 14:14
Show Gist options
  • Save chapter09/4c25e2efbb856b804e1c5fdf95616181 to your computer and use it in GitHub Desktop.
Save chapter09/4c25e2efbb856b804e1c5fdf95616181 to your computer and use it in GitHub Desktop.
Parsing tpc address & affiliation
#!/usr/local/bin/python3
import codecs
import re
name_addr = {}
with codecs.open('./tpc', encoding='utf-8') as infd:
for line in infd.readlines():
if "(" not in line:
continue
name = line.split("(")[0][1:].strip()
addr = line.split(",")[1].strip()
name_addr[name] = addr
new_fd = open('./tpc_new.html', 'w', encoding='utf-8')
with codecs.open('./tpc.html', encoding='utf-8') as old_fd:
for line in old_fd.readlines():
if re.search(r'.*\(.*\)$', line):
if ',' not in line:
name = line.split("(")[0][1:].strip()
addr = name_addr[name]
line = line[:-2]+", "+addr
new_fd.write(line)
else:
new_fd.write(line)
else:
new_fd.write(line)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment