Skip to content

Instantly share code, notes, and snippets.

@afro-coder
Created January 26, 2020 04:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save afro-coder/10d64459043e74770c36f98b690ceb5c to your computer and use it in GitHub Desktop.
Save afro-coder/10d64459043e74770c36f98b690ceb5c to your computer and use it in GitHub Desktop.
import gzip
import json
import re
import os
import datetime
import pprint
import argparse
from collections import OrderedDict
class _RegEx:
# Matches the first datetimestring
# regdate = re.compile(r'[\d\dT\-:+.]+')
_reg_date = re.compile(r'\d+\-\d+\-\d+T\d+:\d+:\d+[\.0-9]+?\+\d+:\d+')
_reg_date_withmil = re.compile(r'\d+\-\d+\-\d+T\d+:\d+:\d+\.[0-9]+\+\d+:\d+')
_reg_date_nomil = re.compile(r'\d+\-\d+\-\d+T\d+:\d+:\d+\+\d+:\d+')
#Match the server name
_reg_server = re.compile(r'[a-z]+\.[a-z]+_[a-z]+_[a-z]+')
_reg_server_message = re.compile('\w+\.\w+\.\w+.\w+\.\w+')
#Match the message string
_reg_message = re.compile(r'\{\"message\":(.*)\}')
# Extra Regexes
"""
Cloudmark Score
"""
# Cloudmark Score
_reg_cmscore = re.compile(r'\040[0-9]{0,3}?\040')
# Email to
# to=<mayur.piyalkar:test.in@servername>,
# to=<pawan.shukla@test.in>
_reg_email_to = re.compile(r'\bto=<(.*?)>')
# SPF-Policy
# policyd-spf[904]: Pass;
# policyd-spf[22677]: None;
_reg_spf_policy = re.compile(r'policyd-spf\[\d+\]:\s(Pass|None);')
# Mail Identity
# identity=mailfrom;
# identity=helo;
_reg_mail_identity = re.compile(r'\bidentity=(\w+);')
# Client IP
# client-ip=45.115.119.116;
_reg_client_ip = re.compile(
r'\bclient-ip=([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3});'
)
# Message Helo
# helo=1shared116.mum1.trans2.mail.com;
_reg_message_helo = re.compile(r"helo=(\w.*?);")
# Message Envelope from
#"""
#envelope-from=bounce--yogesh.gupta=test.in@1.com;
#"""
_reg_message_envelope = re.compile(r"envelope-from=(\w.*?);")
# Message Reciever
# receiver=yogesh.gupta@test.in
_reg_message_receiver = re.compile(
r"\breceiver=([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)"
)
# Delay
# delay=0.7
# _reg_message_delay = re.compile(r"delay[s]?=(\d+.*)?,")
_reg_message_delay = re.compile(r"delay=(\d+.*?),")
# Delays
# delays=0.65/0/0/0.05,
_reg_message_delays= re.compile(r"delays=(\d+.*?),")
# DSN
# dsn=2.0.0,
_reg_message_dsn = re.compile(r"dsn=(\d+.*?),")
# status
# status=sent
_reg_message_status = re.compile(r"status=(\w+)?\b")
# Relay
# relay=set27-active.aus-tx.mailhostbox.com[172.16.214.161]:20026
_reg_message_relay_server = re.compile(r"relay=(\w+\d+.*?),")
__slots__=[
'date',
'server_name',
'server_name_m',
'message',
'cm_score',
'email_sender',
'spf_policy',
'identity',
'client_ip',
'message_helo',
'envelope_from',
'receiver',
'mdelay',
'mdelays',
'mdsn',
'mstatus',
'mrelay',
]
def __init__(self,line):
"""
Match the regex depending on how we search
This enabled flexibility if we add more regexes
"""
try:
self.date = self._reg_date.match(line)
self.server_name = self._reg_server.search(line)
self.server_name_m = self._reg_server_message.search(line)
self.message = self._reg_message.findall(line)
self.cm_score = self._reg_cmscore.search(line)
self.email_sender = self._reg_email_to.search(line)
self.spf_policy = self._reg_spf_policy.search(line)
self.identity = self._reg_mail_identity.search(line)
self.client_ip = self._reg_client_ip.search(line)
self.message_helo = self._reg_message_helo.search(line)
self.envelope_from = self._reg_message_envelope.search(line)
self.receiver = self._reg_message_receiver.search(line)
self.mdelay = self._reg_message_delay.search(line)
self.mdelays = self._reg_message_delays.search(line)
self.mdsn = self._reg_message_dsn.search(line)
self.mstatus = self._reg_message_status.search(line)
self.mrelay = self._reg_message_relay_server.search(line)
except AttributeError:
print("Invalid")
raise Exception("Invalid String sent")
class MessageParser:
_reg_email_sender = re.compile(r'\bto=<(.*?)>')
__slots__ = [
'email_sender'
]
def __init__(self,data):
self.email_sender = self._reg_email_sender.search()
def main():
filename = os.path.join(os.getcwd(),'logfiles/outbound.20191115.log.gz')
# filename = os.path.join(os.getcwd(),'logfiles/inbound.gz')
data = {}
with gzip.open(filename, mode='rt',encoding='UTF-8') as f:
i=0
line = next(f)
# line = next(f)
while line:
reg_match = _RegEx(line)
data_dict={}
message_part = []
if reg_match.date:
try:
date = datetime.datetime \
.strptime(
reg_match.date.group(0)
,"%Y-%m-%dT%H:%M:%S%z"
)
except ValueError:
date = datetime.datetime \
.strptime(
reg_match.date.group(0)
,"%Y-%m-%dT%H:%M:%S.%f%z"
)
data_dict.update(
{'Date':str(date)
})
if reg_match.server_name:
server_name = reg_match.server_name.group(0)
data_dict.update(
{'Server Name':server_name
})
if reg_match.message:
message_dict = {}
# print("*"*20)
# print(reg_match.message[0].strip('"}\\n').split())
server_message = " ".join(reg_match.message).strip('"}\\n')
# print("*"*20)
# print(server_message)
parse_message = _RegEx(server_message)
if parse_message.date:
message_date = parse_message.date.group(0)
message_dict.update({'Message Date':message_date})
if parse_message.server_name_m:
message_server_name = parse_message.server_name_m.group(0)
message_dict.update({'Message Server':message_server_name})
if parse_message.email_sender:
message_email_sender = parse_message.email_sender.group(1)
message_dict.update({'Email To':message_email_sender})
if parse_message.spf_policy:
spf_policy = parse_message.spf_policy.group(1)
message_dict.update({'SPF-Policy':spf_policy})
if parse_message.identity:
identity = parse_message.identity.group(1)
message_dict.update({'Identity':identity})
if parse_message.client_ip:
client_ip = parse_message.client_ip.group(1)
message_dict.update({'Client IP':client_ip})
if parse_message.message_helo:
m_helo = parse_message.message_helo.group(1)
message_dict.update({'Helo':m_helo})
if parse_message.envelope_from:
menvelope_from = parse_message.envelope_from.group(1)
message_dict.update({'Envelope From':menvelope_from})
if parse_message.receiver:
receiver = parse_message.receiver.group(1)
message_dict.update({'Message Reciever':receiver})
if parse_message.mdelay:
mdelay = parse_message.mdelay.group(1)
message_dict.update({'Message Delay':mdelay})
if parse_message.mdelays:
mdelays = parse_message.mdelays.group(1)
message_dict.update({'Message Delays':mdelays})
if parse_message.mdsn:
mdsn = parse_message.mdsn.group(1)
message_dict.update({'DSN':mdsn})
if parse_message.mstatus:
mstatus = parse_message.mstatus.group(1)
message_dict.update({'Message Status':mstatus})
if parse_message.mrelay:
mrelay = parse_message.mrelay.group(1)
message_dict.update({'Message Relay Server':mrelay})
# message_part.append(message_dict)
# data_dict.update({'Message':message_part})
data_dict.update({'Message':message_dict})
# data.append(data_dict)
data.update({i:data_dict})
i=i+1
line=next(f,None)
#line=None
#print(data)
#pprint.pprint(json.dumps(data,indent=4,sort_keys=True))
jsondata=(json.dumps(data,indent=4,sort_keys=True))
with open("jsondata.json","w") as f:
f.write(jsondata)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment