Last active
July 9, 2016 16:41
-
-
Save jasin/de749ac15292dc62c42f8e0a5fedd942 to your computer and use it in GitHub Desktop.
log scraper for my FreeBSD server written in Perl as an exercise
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w | |
################# FreeBSD Log scraper ####################### | |
############################################################# | |
# Basic usage: | |
# Perl scrape_log.pl -keywords=[KEYWORD,(S)] -log=[LOGNAME] | |
# | |
# -keywords= List of keywords to search for in logs | |
# -log= Name of the log file(s) | |
# -dir= Location of remote log file(s) | |
# | |
# Author: Jasin Colegrove | |
############################################################# | |
use strict; | |
use Net::SCP; | |
use IO::Uncompress::Bunzip2 qw(bunzip2 $Bunzip2Error); | |
# set up some global variables for this script. | |
my ($logname, $remoteDir, @files, @keys); | |
# functions | |
sub scrape_logs { | |
# Pull the logs locally so we can parse them | |
my $file; | |
my $hostname = "coleburt.com"; | |
my $username = "jasin"; | |
# Setup our secure copy connection | |
my $scp = Net::SCP->new( {"host"=>$hostname, "user"=>$username} ); | |
# Do work! Pull weeks worth of logs | |
# TODO: dynamic number of log files to fetch | |
for ((my $i = 0) .. 6) { | |
$file = "$logname.$i.bz2"; | |
print "Receiving $file..."; | |
if ($scp->get("$remoteDir/$file")) { | |
push @files, $file and print "Done\n"; | |
} else { | |
print "$scp->{errstr}\n"; | |
} | |
$i++; | |
} | |
} | |
sub parse_logs { | |
for my $file (@files) { | |
my $z = new IO::Uncompress::Bunzip2 $file or print $Bunzip2Error; | |
while (not $z->eof()) { # Keep looping till EOF | |
my $x = $z->getline(); | |
# Check each line against the keys in the @keywords array | |
for my $key (@keys) { | |
print $x if ($x =~ /$key/); # if exist, write to file | |
} | |
} | |
$z->close(); | |
} | |
} | |
sub email_output { | |
# TODO: Let someone know that the logs are prepared and ready to be read | |
} | |
# main entry point to the script | |
parse_args(@ARGV); | |
scrape_logs($logname); | |
parse_logs(); | |
sub parse_args { | |
my ($flag, $keywords); | |
while (@_) { | |
if($_[0] =~ /^-keywords/) { | |
($flag,$keywords) = split(/=/, $_[0]) and @keys = split(/,/, $keywords); | |
print "$flag=$keywords\n"; | |
} elsif ($_[0] =~ /^-log/) { | |
($flag,$logname) = split(/=/, $_[0]); | |
print "$flag=$logname\n"; | |
} elsif ($_[0] =~ /^-dir/) { | |
($flag,$remoteDir) = split(/=/, $_[0]); | |
print "$flag=$remoteDir\n"; | |
} else { | |
pgm_exit(1, "Unknown arg: $_[0]\n"); | |
} | |
shift @_; # end switch, get next @ARGV | |
} | |
pgm_exit(1, "Keywords and/or log switches must not be empty\n") if (not length $keywords && length $logname); #cheeky way to test for undef | |
} | |
sub pgm_exit ($$) { | |
my($exitcode, $msg) = @_; | |
print $msg; | |
exit($exitcode); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
What do you mean exactly? In CMD.exe
-keywords=BANNED,SPAM
needs to be"-keywords=BANNED,SPAM"
confused...
again not sure exactly what you mean here.
point taken. I went with the switch route because I thought this was a general consensus on the way to parse command line arguments across other languages as well. I see this case usage a lot when reading source code.
I asked github contact support how one can have multiple editors for a gist, waiting on a reply. But for the time being I see gist are in fact git repo. so if you fork my gist, you get my history, plus any edits you do on top. Gives a full history. Fork the gist and make your changes. I can view them on your gist page