Skip to content

Instantly share code, notes, and snippets.

@kurgm
Created July 24, 2022 07:55
Show Gist options
  • Save kurgm/7f0cafe83760547b41a0238e2c3bd638 to your computer and use it in GitHub Desktop.
Save kurgm/7f0cafe83760547b41a0238e2c3bd638 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# usage : ./count_by_pattern.py path/to/dump_newest_only.txt < template.txt
import argparse
import os.path
import re
import sys
import time
from typing import NamedTuple, Optional, Pattern, Sequence, TextIO, Union
class Stat:
__slots__ = ("pattern", "count", "sample")
def __init__(self, pattern: Pattern[str]):
self.pattern = pattern
self.count = 0
self.sample = "notfound"
class StatLine(NamedTuple):
desc_type: str
desc_pattern: str
stat: Stat
PageLine = Union[StatLine, str]
def parse_stat_line(line: str) -> Optional[StatLine]:
if not line.startswith(","):
return None
columns = line.split(",")
if len(columns) != 6:
return None
desc_type = ",".join(columns[:3])
desc_pattern = columns[5]
if not (desc_pattern.startswith(">") and desc_pattern.endswith("<")):
return None
pattern = re.compile(desc_pattern[1:-1].replace(",", ","))
stat = Stat(pattern)
return StatLine(
desc_type=desc_type,
desc_pattern=desc_pattern,
stat=stat)
def print_output(lines: Sequence[PageLine], dumppath: str) -> None:
for line in lines:
if isinstance(line, StatLine):
print(
f"{line.desc_type},{line.stat.count},[[{line.stat.sample}]],"
f"{line.desc_pattern}")
continue
if line.startswith("filename : "):
filename = os.path.basename(dumppath)
print(f"filename : {filename}")
elif line.startswith("filesize : "):
filesize = os.path.getsize(dumppath)
print(f"filesize : {filesize}")
elif line.startswith("timestamp : "):
mtime = os.path.getmtime(dumppath)
timestamp = time.ctime(mtime)
print(f"timestamp : {timestamp}")
else:
print(line, end="")
def count_by_pattern(dumppath: str, template: TextIO) -> None:
lines: list[PageLine] = [
parse_stat_line(line.rstrip()) or line
for line in template
]
stats = [
line.stat
for line in lines
if isinstance(line, StatLine)
]
for line in open(dumppath):
stripped_line = line.strip()
columns = stripped_line.split("|")
if len(columns) != 3:
continue
name = columns[0].strip()
for stat in stats:
if stat.pattern.match(stripped_line):
stat.count += 1
stat.sample = name
print_output(lines, dumppath)
def main():
parser = argparse.ArgumentParser()
parser.add_argument("dump")
parser.add_argument(
"--template", nargs="?", type=argparse.FileType("r"),
default=sys.stdin)
args = parser.parse_args()
count_by_pattern(args.dump, args.template)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment