Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
import gzip
import json
import re
import os
import datetime
import pprint
import argparse
from collections import OrderedDict
class _RegEx:
# Matches the first datetimestring
# regdate = re.compile(r'[\d\dT\-:+.]+')
_reg_date = re.compile(r'\d+\-\d+\-\d+T\d+:\d+:\d+[\.0-9]+?\+\d+:\d+')
_reg_date_withmil = re.compile(r'\d+\-\d+\-\d+T\d+:\d+:\d+\.[0-9]+\+\d+:\d+')
_reg_date_nomil = re.compile(r'\d+\-\d+\-\d+T\d+:\d+:\d+\+\d+:\d+')
#Match the server name
_reg_server = re.compile(r'[a-z]+\.[a-z]+_[a-z]+_[a-z]+')
_reg_server_message = re.compile('\w+\.\w+\.\w+.\w+\.\w+')
#Match the message string
_reg_message = re.compile(r'\{\"message\":(.*)\}')
# Extra Regexes
"""
Cloudmark Score
"""
# Cloudmark Score
_reg_cmscore = re.compile(r'\040[0-9]{0,3}?\040')
# Email to
# to=<mayur.piyalkar:test.in@servername>,
# to=<pawan.shukla@test.in>
_reg_email_to = re.compile(r'\bto=<(.*?)>')
# SPF-Policy
# policyd-spf[904]: Pass;
# policyd-spf[22677]: None;
_reg_spf_policy = re.compile(r'policyd-spf\[\d+\]:\s(Pass|None);')
# Mail Identity
# identity=mailfrom;
# identity=helo;
_reg_mail_identity = re.compile(r'\bidentity=(\w+);')
# Client IP
# client-ip=45.115.119.116;
_reg_client_ip = re.compile(
r'\bclient-ip=([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3});'
)
# Message Helo
# helo=1shared116.mum1.trans2.mail.com;
_reg_message_helo = re.compile(r"helo=(\w.*?);")
# Message Envelope from
#"""
#envelope-from=bounce--yogesh.gupta=test.in@1.com;
#"""
_reg_message_envelope = re.compile(r"envelope-from=(\w.*?);")
# Message Reciever
# receiver=yogesh.gupta@test.in
_reg_message_receiver = re.compile(
r"\breceiver=([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)"
)
# Delay
# delay=0.7
# _reg_message_delay = re.compile(r"delay[s]?=(\d+.*)?,")
_reg_message_delay = re.compile(r"delay=(\d+.*?),")
# Delays
# delays=0.65/0/0/0.05,
_reg_message_delays= re.compile(r"delays=(\d+.*?),")
# DSN
# dsn=2.0.0,
_reg_message_dsn = re.compile(r"dsn=(\d+.*?),")
# status
# status=sent
_reg_message_status = re.compile(r"status=(\w+)?\b")
# Relay
# relay=set27-active.aus-tx.mailhostbox.com[172.16.214.161]:20026
_reg_message_relay_server = re.compile(r"relay=(\w+\d+.*?),")
__slots__=[
'date',
'server_name',
'server_name_m',
'message',
'cm_score',
'email_sender',
'spf_policy',
'identity',
'client_ip',
'message_helo',
'envelope_from',
'receiver',
'mdelay',
'mdelays',
'mdsn',
'mstatus',
'mrelay',
]
def __init__(self,line):
"""
Match the regex depending on how we search
This enabled flexibility if we add more regexes
"""
try:
self.date = self._reg_date.match(line)
self.server_name = self._reg_server.search(line)
self.server_name_m = self._reg_server_message.search(line)
self.message = self._reg_message.findall(line)
self.cm_score = self._reg_cmscore.search(line)
self.email_sender = self._reg_email_to.search(line)
self.spf_policy = self._reg_spf_policy.search(line)
self.identity = self._reg_mail_identity.search(line)
self.client_ip = self._reg_client_ip.search(line)
self.message_helo = self._reg_message_helo.search(line)
self.envelope_from = self._reg_message_envelope.search(line)
self.receiver = self._reg_message_receiver.search(line)
self.mdelay = self._reg_message_delay.search(line)
self.mdelays = self._reg_message_delays.search(line)
self.mdsn = self._reg_message_dsn.search(line)
self.mstatus = self._reg_message_status.search(line)
self.mrelay = self._reg_message_relay_server.search(line)
except AttributeError:
print("Invalid")
raise Exception("Invalid String sent")
class MessageParser:
_reg_email_sender = re.compile(r'\bto=<(.*?)>')
__slots__ = [
'email_sender'
]
def __init__(self,data):
self.email_sender = self._reg_email_sender.search()
def main():
filename = os.path.join(os.getcwd(),'logfiles/outbound.20191115.log.gz')
# filename = os.path.join(os.getcwd(),'logfiles/inbound.gz')
data = {}
with gzip.open(filename, mode='rt',encoding='UTF-8') as f:
i=0
line = next(f)
# line = next(f)
while line:
reg_match = _RegEx(line)
data_dict={}
message_part = []
if reg_match.date:
try:
date = datetime.datetime \
.strptime(
reg_match.date.group(0)
,"%Y-%m-%dT%H:%M:%S%z"
)
except ValueError:
date = datetime.datetime \
.strptime(
reg_match.date.group(0)
,"%Y-%m-%dT%H:%M:%S.%f%z"
)
data_dict.update(
{'Date':str(date)
})
if reg_match.server_name:
server_name = reg_match.server_name.group(0)
data_dict.update(
{'Server Name':server_name
})
if reg_match.message:
message_dict = {}
# print("*"*20)
# print(reg_match.message[0].strip('"}\\n').split())
server_message = " ".join(reg_match.message).strip('"}\\n')
# print("*"*20)
# print(server_message)
parse_message = _RegEx(server_message)
if parse_message.date:
message_date = parse_message.date.group(0)
message_dict.update({'Message Date':message_date})
if parse_message.server_name_m:
message_server_name = parse_message.server_name_m.group(0)
message_dict.update({'Message Server':message_server_name})
if parse_message.email_sender:
message_email_sender = parse_message.email_sender.group(1)
message_dict.update({'Email To':message_email_sender})
if parse_message.spf_policy:
spf_policy = parse_message.spf_policy.group(1)
message_dict.update({'SPF-Policy':spf_policy})
if parse_message.identity:
identity = parse_message.identity.group(1)
message_dict.update({'Identity':identity})
if parse_message.client_ip:
client_ip = parse_message.client_ip.group(1)
message_dict.update({'Client IP':client_ip})
if parse_message.message_helo:
m_helo = parse_message.message_helo.group(1)
message_dict.update({'Helo':m_helo})
if parse_message.envelope_from:
menvelope_from = parse_message.envelope_from.group(1)
message_dict.update({'Envelope From':menvelope_from})
if parse_message.receiver:
receiver = parse_message.receiver.group(1)
message_dict.update({'Message Reciever':receiver})
if parse_message.mdelay:
mdelay = parse_message.mdelay.group(1)
message_dict.update({'Message Delay':mdelay})
if parse_message.mdelays:
mdelays = parse_message.mdelays.group(1)
message_dict.update({'Message Delays':mdelays})
if parse_message.mdsn:
mdsn = parse_message.mdsn.group(1)
message_dict.update({'DSN':mdsn})
if parse_message.mstatus:
mstatus = parse_message.mstatus.group(1)
message_dict.update({'Message Status':mstatus})
if parse_message.mrelay:
mrelay = parse_message.mrelay.group(1)
message_dict.update({'Message Relay Server':mrelay})
# message_part.append(message_dict)
# data_dict.update({'Message':message_part})
data_dict.update({'Message':message_dict})
# data.append(data_dict)
data.update({i:data_dict})
i=i+1
line=next(f,None)
#line=None
#print(data)
#pprint.pprint(json.dumps(data,indent=4,sort_keys=True))
jsondata=(json.dumps(data,indent=4,sort_keys=True))
with open("jsondata.json","w") as f:
f.write(jsondata)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment