Skip to content

Instantly share code, notes, and snippets.

@behitek
Last active July 7, 2022 03:12
Show Gist options
  • Save behitek/73efdb471a6beacbccee2a5bec9ac97e to your computer and use it in GitHub Desktop.
Save behitek/73efdb471a6beacbccee2a5bec9ac97e to your computer and use it in GitHub Desktop.
import sys
import re
USAGE = """
python data_reader.py <input_file>
"""
"""
File format:
<pattern>
<freq>3</freq>
<score> 36.242 </score><what>sleek</what>
<where>245 380 576</where>
</pattern>
<pattern>
<freq>3</freq>
<score> 36.242 </score><what>sleek</what>
<where>245 380 576</where>
</pattern>
<pattern>
<freq>3</freq>
<score> 36.242 </score><what>sleek</what>
<where>245 380 576</where>
</pattern>
<pattern>
<freq>3</freq>
<score> 36.242 </score><what>sleek</what>
<where>245 380 576</where>
</pattern>
...
"""
if len(sys.argv) < 2:
print(USAGE)
exit(0)
def main(input_file):
records = []
with open(input_file, 'r') as f:
while True:
line = f.readline()
# Got EOF
if not line:
break
line = line.strip()
if line.startswith("<pattern>"):
freq = f.readline().strip()
assert freq.startswith("<freq>")
freq = int(re.sub(r'<[^>]+>', '', freq))
score_and_what = f.readline().strip()
assert score_and_what.startswith("<score>")
score, what = score_and_what.split("</score>")
what = re.sub(r'<[^>]+>', '', what).strip()
score = re.sub(r'<[^>]+>', '', score)
score = float(score)
where = f.readline().strip()
assert where.startswith("<where>")
where = [int(x) for x in re.sub(r'<[^>]+>', '', where).split()]
# skip </pattern>
f.readline()
records.append((freq, score, what, where))
return records
if __name__ == '__main__':
records = main(sys.argv[1])
print(records)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment