Skip to content

Instantly share code, notes, and snippets.

@marshyon
Created January 14, 2014 12:37
Show Gist options
  • Save marshyon/8417654 to your computer and use it in GitHub Desktop.
Save marshyon/8417654 to your computer and use it in GitHub Desktop.
parses postfix emails using a recipient email and pieces together the 'session' and 'log' entries for each
#!/usr/bin/perl
#==========================================================================
#
# FILE: parse_mail_infolog.pl
#
# USAGE: ./parse_mail_infolog.pl
#
# DESCRIPTION: taken a 'list' ( below ), look for specific email logs first by
# string, then by session ids to extract the entire mail
# conversation from an email log file
# NOTES: 2 'passes' ( file reads ) of each log file are taken
# AUTHOR: marshyon
# ORGANIZATION:
# VERSION: 0.1
# CREATED: 21/07/12 10:11:47
# REVISION: 14/01/14 11:35:55
#==========================================================================
use strict;
use warnings;
use utf8;
use Date::Parse;
use POSIX qw(strftime);
use Modern::Perl;
use Data::Dumper;
$| = 1;
# CONFIGURATION SECTION
my $servername = '<servername> ';
my $from_date_str = '2012-07-15';
my $from_epoch = str2time($from_date_str);
my $to_date_str = '2014-07-21';
my $to_epoch = str2time($to_date_str);
my $email_recipient = shift || '<persons email>';
my $dir = '<directory where maillog files are>';
my @files = qw(<space separated list of files to parse>);
# CONFIGURATION SECTION ENDS
my @patterns;
my %session_ids = ();
# MAIN program starts
# first pass of log files - to extract session ids from email log file(s)
read_and_parse_files(
{ 'hash' => \%session_ids, 'files' => \@files, 'search' => 'emails' } );
# create a match string comprising of all session ids we are interested in
my $pattern_match = build_pattern_match_string( { 'hash' => \%session_ids } );
# second pass of log files, extracting and saving each mail session
read_and_parse_files(
{
'hash' => \%session_ids,
'files' => \@files,
'search' => 'session ids',
'pattern_match' => $pattern_match
}
);
# print out the emil session information we have found in the log
#say Dumper(\%session_ids);
print_logged_email_sessions( { 'hash' => \%session_ids } );
# MAIN program ends
#############
# SUBROUTINES
#############
sub build_pattern_match_string {
my ($p) = @_;
my $hash = $p->{'hash'};
foreach my $id ( keys( %{$hash} ) ) {
push @patterns, $id;
}
my $pattern_match = join( '|', @patterns );
return $pattern_match;
}
sub read_and_parse_files {
my ($p) = @_;
my $files = $p->{'files'};
my $hash = $p->{'hash'};
my $look_for = $p->{'search'};
my $pattern_match = $p->{'pattern_match'};
foreach my $file ( @{$files} ) {
open my $fh, "<", "$dir/$file"
or die "ERROR :: can't open file $dir/$file for read : $!n";
LINE:
while (<$fh>) {
my $line = $_;
my ( $date, $res ) = split( /$servername/, $line );
my $time = str2time($date);
next LINE
if ( ( $time < $from_epoch ) || ( $time > $to_epoch ) );
given ($look_for) {
when ( $look_for =~ m{emails} ) {
look_for_email_recipient(
{
'line' => $line,
'email_recipient' => $email_recipient,
'rest_of_line' => $res,
'hash' => $hash
}
);
}
when ( $look_for =~ m{session ids} ) {
look_for_session_id(
{
'line' => $line,
'pattern_match' => $pattern_match,
'time' => $time,
'hash' => $hash,
'file' => $file
}
);
}
}
}
close $fh;
}
}
sub look_for_email_recipient {
my ($p) = @_;
my $line = $p->{'line'};
my $email_recipient = $p->{'email_recipient'};
my $res = $p->{'rest_of_line'};
my $hash = $p->{'hash'};
if ( $line =~ m{(?:$email_recipient)} ) {
my @fields = split( /\s/, $res );
my $sid = $fields[1];
$sid =~ s{:$}{};
$hash->{$sid}->{'count'}++;
}
}
sub look_for_session_id {
my ($p) = @_;
my $line = $p->{'line'};
my $pattern_match = $p->{'pattern_match'};
my $time = $p->{'time'};
my $hash = $p->{'hash'};
my $file = $p->{'file'};
if ( $line =~ m{($pattern_match)} ) {
my $dte_str = strftime "%Y-%m-%d %H%M%S", localtime($time);
chomp($line);
$hash->{$1}->{'logs'} .= $line . "\n\t";
$hash->{$1}->{'file'} = $file;
$hash->{$1}->{'date_str'} = $dte_str;
}
}
sub print_logged_email_sessions {
my ($p) = @_;
my $hash = $p->{'hash'};
foreach my $id (
sort { $hash->{$a}->{'date_str'} cmp $hash->{$b}->{'date_str'} }
keys %{$hash}
)
{
print $hash->{$id}->{'date_str'} . "\t" . $id . "\t" . $hash->{$id}->{'logs'} . "\n";
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment