Skip to content

Instantly share code, notes, and snippets.

@honno
Created March 9, 2020 10:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save honno/319cfb7859b987336cf00f08e21e475f to your computer and use it in GitHub Desktop.
Save honno/319cfb7859b987336cf00f08e21e475f to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import argparse
import sys
import re
id_pattern= re.compile("[A-Z]+")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('infile')
parser.add_argument('outfile')
args = parser.parse_args(sys.argv[1:])
with open(args.infile) as arff:
ids = []
for line in arff:
try:
id_ = id_pattern.match(line).group(0)
ids.append(id_)
except AttributeError:
continue
id_freq = {}
for id_ in ids:
if id_ in id_freq:
id_freq[id_] += 1
else:
id_freq[id_] = 1
with open(args.outfile, 'w+') as out:
for id_, count in id_freq.items():
out.write(f'{id_},{count}\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment