Created
January 14, 2014 12:37
-
-
Save marshyon/8417654 to your computer and use it in GitHub Desktop.
parses postfix emails using a recipient email and pieces together the 'session' and 'log' entries for each
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
#========================================================================== | |
# | |
# FILE: parse_mail_infolog.pl | |
# | |
# USAGE: ./parse_mail_infolog.pl | |
# | |
# DESCRIPTION: taken a 'list' ( below ), look for specific email logs first by | |
# string, then by session ids to extract the entire mail | |
# conversation from an email log file | |
# NOTES: 2 'passes' ( file reads ) of each log file are taken | |
# AUTHOR: marshyon | |
# ORGANIZATION: | |
# VERSION: 0.1 | |
# CREATED: 21/07/12 10:11:47 | |
# REVISION: 14/01/14 11:35:55 | |
#========================================================================== | |
use strict; | |
use warnings; | |
use utf8; | |
use Date::Parse; | |
use POSIX qw(strftime); | |
use Modern::Perl; | |
use Data::Dumper; | |
$| = 1; | |
# CONFIGURATION SECTION | |
my $servername = '<servername> '; | |
my $from_date_str = '2012-07-15'; | |
my $from_epoch = str2time($from_date_str); | |
my $to_date_str = '2014-07-21'; | |
my $to_epoch = str2time($to_date_str); | |
my $email_recipient = shift || '<persons email>'; | |
my $dir = '<directory where maillog files are>'; | |
my @files = qw(<space separated list of files to parse>); | |
# CONFIGURATION SECTION ENDS | |
my @patterns; | |
my %session_ids = (); | |
# MAIN program starts | |
# first pass of log files - to extract session ids from email log file(s) | |
read_and_parse_files( | |
{ 'hash' => \%session_ids, 'files' => \@files, 'search' => 'emails' } ); | |
# create a match string comprising of all session ids we are interested in | |
my $pattern_match = build_pattern_match_string( { 'hash' => \%session_ids } ); | |
# second pass of log files, extracting and saving each mail session | |
read_and_parse_files( | |
{ | |
'hash' => \%session_ids, | |
'files' => \@files, | |
'search' => 'session ids', | |
'pattern_match' => $pattern_match | |
} | |
); | |
# print out the emil session information we have found in the log | |
#say Dumper(\%session_ids); | |
print_logged_email_sessions( { 'hash' => \%session_ids } ); | |
# MAIN program ends | |
############# | |
# SUBROUTINES | |
############# | |
sub build_pattern_match_string { | |
my ($p) = @_; | |
my $hash = $p->{'hash'}; | |
foreach my $id ( keys( %{$hash} ) ) { | |
push @patterns, $id; | |
} | |
my $pattern_match = join( '|', @patterns ); | |
return $pattern_match; | |
} | |
sub read_and_parse_files { | |
my ($p) = @_; | |
my $files = $p->{'files'}; | |
my $hash = $p->{'hash'}; | |
my $look_for = $p->{'search'}; | |
my $pattern_match = $p->{'pattern_match'}; | |
foreach my $file ( @{$files} ) { | |
open my $fh, "<", "$dir/$file" | |
or die "ERROR :: can't open file $dir/$file for read : $!n"; | |
LINE: | |
while (<$fh>) { | |
my $line = $_; | |
my ( $date, $res ) = split( /$servername/, $line ); | |
my $time = str2time($date); | |
next LINE | |
if ( ( $time < $from_epoch ) || ( $time > $to_epoch ) ); | |
given ($look_for) { | |
when ( $look_for =~ m{emails} ) { | |
look_for_email_recipient( | |
{ | |
'line' => $line, | |
'email_recipient' => $email_recipient, | |
'rest_of_line' => $res, | |
'hash' => $hash | |
} | |
); | |
} | |
when ( $look_for =~ m{session ids} ) { | |
look_for_session_id( | |
{ | |
'line' => $line, | |
'pattern_match' => $pattern_match, | |
'time' => $time, | |
'hash' => $hash, | |
'file' => $file | |
} | |
); | |
} | |
} | |
} | |
close $fh; | |
} | |
} | |
sub look_for_email_recipient { | |
my ($p) = @_; | |
my $line = $p->{'line'}; | |
my $email_recipient = $p->{'email_recipient'}; | |
my $res = $p->{'rest_of_line'}; | |
my $hash = $p->{'hash'}; | |
if ( $line =~ m{(?:$email_recipient)} ) { | |
my @fields = split( /\s/, $res ); | |
my $sid = $fields[1]; | |
$sid =~ s{:$}{}; | |
$hash->{$sid}->{'count'}++; | |
} | |
} | |
sub look_for_session_id { | |
my ($p) = @_; | |
my $line = $p->{'line'}; | |
my $pattern_match = $p->{'pattern_match'}; | |
my $time = $p->{'time'}; | |
my $hash = $p->{'hash'}; | |
my $file = $p->{'file'}; | |
if ( $line =~ m{($pattern_match)} ) { | |
my $dte_str = strftime "%Y-%m-%d %H%M%S", localtime($time); | |
chomp($line); | |
$hash->{$1}->{'logs'} .= $line . "\n\t"; | |
$hash->{$1}->{'file'} = $file; | |
$hash->{$1}->{'date_str'} = $dte_str; | |
} | |
} | |
sub print_logged_email_sessions { | |
my ($p) = @_; | |
my $hash = $p->{'hash'}; | |
foreach my $id ( | |
sort { $hash->{$a}->{'date_str'} cmp $hash->{$b}->{'date_str'} } | |
keys %{$hash} | |
) | |
{ | |
print $hash->{$id}->{'date_str'} . "\t" . $id . "\t" . $hash->{$id}->{'logs'} . "\n"; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment