Last active
March 6, 2017 18:31
-
-
Save h0tw1r3/d03f762b5913356318927bb3d5ea7e1c to your computer and use it in GitHub Desktop.
Recurse directory tree, scrubbing lines based that match pattern list. Bzip compressed files supported.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# Recurse tree for files, scrubbing lines based on pattern list | |
# Copyright: 2017 Jeffrey Clark <h0tw1r3@gmail.com> | |
# License: GNU GPL v3+ | |
use strict; | |
use warnings; | |
no if $] >= 5.017011, warnings => 'experimental::smartmatch'; | |
use feature 'switch'; | |
use File::Find; | |
use IO::Uncompress::Bunzip2 qw($Bunzip2Error); | |
use IO::Compress::Bzip2 qw(bzip2 $Bzip2Error); | |
use IO::File; | |
use Digest::MD5 qw(md5_hex); | |
use File::Copy; | |
use File::stat; | |
use File::Temp qw(tempdir); | |
use Getopt::Long qw(:config gnu_getopt); | |
use Pod::Usage; | |
my $help = 0; | |
my $file_pattern = ''; | |
my $search_path = './'; | |
my $tmp_path = File::Spec->tmpdir; | |
my $config_file = 'superscrub.re'; | |
my $apply = 0; | |
my $verbose = 0; | |
($help = 1) unless @ARGV; | |
GetOptions ( | |
"h|help" => \$help, | |
"s|search=s" => \$search_path, | |
"t|tmp=s" => \$tmp_path, | |
"r|regex=s" => \$config_file, | |
"a|apply" => \$apply, | |
"v|verbose+" => \$verbose, | |
); | |
pod2usage(1) if $help; | |
if (not @ARGV) { | |
print "Invalid command usage\n"; pod2usage(1); | |
} | |
$file_pattern = qr($ARGV[0]); | |
# read regex patterns from file | |
my @patterns; | |
open (CONFIG_FILE, "<${config_file}"); | |
while (my $regex = <CONFIG_FILE>) { | |
chomp($regex); | |
next if $regex =~ /^$/ or $regex =~ /^#/; | |
push @patterns, qr($regex); | |
} | |
close(CONFIG_FILE); | |
if ($verbose) { | |
print "Loaded "; | |
print scalar grep { defined $_ } @patterns; | |
print " patterns\n"; | |
} | |
$tmp_outpath = tempdir(DIR => $tmp_path, CLEANUP => 1); | |
my @filenames; | |
find( { | |
wanted => sub { if ($_ =~ $file_pattern) { push @filenames, $File::Find::name; } }, | |
no_chdir => 0, | |
}, $search_path ); | |
foreach my $filename (@filenames) { | |
print "Reading $filename\n" if ($verbose); | |
my $tmp_filename = $tmp_outpath . "/" . md5_hex($filename); | |
my $fstat = stat($filename); | |
my $out = new IO::File "> $tmp_filename" | |
or die "IO::File->new failed: $!\n"; | |
my $in = new IO::Uncompress::Bunzip2 $filename | |
or die "IO::Uncompress::Bunzip2 failed: $Bunzip2Error\n"; | |
my $matched = 0; | |
while ( my $line = $in->getline() ) { | |
given ($line) { | |
when (@patterns) { | |
$matched++; | |
print "Filtered => " if ($verbose > 1); | |
} | |
default { | |
print $out $line if ($apply); | |
} | |
} | |
} | |
$in->close; | |
$out->close; | |
if ($matched) { | |
print "Filtered $matched lines from $filename\n"; | |
if ($apply) { | |
if ($filename =~ m/\.bz2/) { | |
bzip2 $tmp_filename => $filename | |
or die "Error compressing $tmp_filename => $filename: $Bzip2Error\n"; | |
} else { | |
copy $tmp_filename => $filename | |
or die "Error copying $tmp_filename to $filename: $!\n"; | |
} | |
utime $fstat->atime, $fstat->mtime, $filename; | |
} | |
} | |
undef $out; | |
unlink $tmp_filename or die("Failed to remove $tmp_filename: $!\n"); | |
} | |
__END__ | |
=head1 NAME | |
superscrub - Recurse tree for files, scrubbing lines based on pattern list | |
=head1 SYNOPSIS | |
superscrub [FLAGS]... FILE_RE | |
FLAGS: | |
-h, --help | |
-s PATH, --search=PATH [./] | |
-t PATH, --tmp=PATH [/dev/shm] | |
-r CONFIG_FILE, --regex=CONFIG_FILE [superscrub.re] | |
-a, --apply | |
-v, --verbose (specify multiple times) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Example patterns | |
^\s+ something sensitive .* |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment