Skip to content

Instantly share code, notes, and snippets.

@mmanylov-zz
Created August 5, 2020 07:30
Show Gist options
  • Save mmanylov-zz/51f210646756d27253e1d5a6d3f02cee to your computer and use it in GitHub Desktop.
Save mmanylov-zz/51f210646756d27253e1d5a6d3f02cee to your computer and use it in GitHub Desktop.
Utilization of multiprocessing in processing large log files
# see https://www.blopig.com/blog/2016/08/processing-large-files-using-python/
import multiprocessing as mp
import os
import re
from datetime import datetime, timedelta
RESULT_FILENAME = 'result.csv'
FILENAME_TEMPLATE ='log-{date}'
CHUNK_SIZE = 1024*1024
log_pattern = re.compile(r'^SEVERITY.*(?<=first param in quotes\s")(.*)(?=").*(?<=second param\s)(\w+).*?(third param)?$')
manager = mp.Manager()
result = manager.list()
def write_result_csv(result):
work_f = open(RESULT_FILENAME, "w")
work_f.write('="first param";="second param";="third param"\n')
for line in result:
result = map(lambda x: f'="{x}"', line)
work_f.write(';'.join(result)+"\n")
work_f.close()
def get_logfile_name():
yesterday_str = datetime.strftime(datetime.now() - timedelta(1), '%Y%m%d')
name = FILENAME_TEMPLATE.format(date=yesterday_str)
return name
def process(line):
pass
def worker(filename, chunk_start, chunk_size):
with open(filename, 'r') as f:
f.seek(chunk_start)
lines = f.read(chunk_size).splitlines()
for line in lines:
process(line)
def chunkify(fname, size=CHUNK_SIZE):
file_end = os.path.getsize(fname)
with open(fname, 'rb') as f:
chunk_end = f.tell()
while True:
chunk_start = chunk_end
f.seek(size, 1)
f.readline()
chunk_end = f.tell()
yield chunk_start, chunk_end - chunk_start
if chunk_end > file_end:
break
pool = mp.Pool(mp.cpu_count())
jobs = []
filename = get_logfile_name()
for chunk_start, chunk_size in chunkify(filename):
jobs.append(pool.apply_async(worker, (filename, chunk_start, chunk_size)))
for job in jobs:
job.get()
pool.close()
pool.join()
write_result_csv(result)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment