Last active
May 1, 2021 16:59
-
-
Save myl7/6118aae43eeccddd47dcc98fc60115ae to your computer and use it in GitHub Desktop.
Analyze rsyncd log with multiprocessing and handle log file UTF-8 error. Based on https://gist.github.com/taoky/91c12185c2cd38f264fe2863a6b13c27 .
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
from collections import defaultdict | |
from multiprocessing import Pool | |
import json | |
import glob | |
try: | |
import yaml | |
dumper = yaml.dump | |
except ImportError: | |
dumper = json.dumps | |
JOBS = 14 | |
LOG_GLOB = 'rsyncd.log-*' | |
def analyze(line): | |
state = 0 | |
for idx, c in enumerate(line): | |
if state == 0: | |
if c == ']': | |
if line[idx + 2:idx + 6] == 'send': | |
state = 1 | |
else: | |
return None | |
elif state == 1: | |
if c == ']': | |
if line[idx + 2:idx + 6] == 'repo': | |
state = 2 | |
else: | |
pos = line[idx:].find('(') | |
return line[idx + 2:idx + pos - 1] | |
elif state == 2: | |
if c == ')': | |
pos = line[idx:].find('/') | |
if pos < 0: | |
raise ValueError | |
return line[idx + 2:idx + pos] | |
def safe_iter(f): | |
while True: | |
try: | |
yield from f | |
break | |
except Exception as e: | |
print(e) | |
def main(): | |
files = glob.glob(LOG_GLOB) | |
files.sort() | |
counters = defaultdict(int) | |
with Pool(JOBS) as pool: | |
for file in files: | |
with open(file) as f: | |
for res in pool.map(analyze, safe_iter(f)): | |
if res is None: | |
continue | |
counters[res] += 1 | |
print(f'Analyze {file} OK') | |
if len(sys.argv) > 1: | |
file = sys.argv[1] | |
with open(file, 'w') as f: | |
f.write(dumper(counters)) | |
else: | |
print(dumper(counters)) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment