Skip to content

Instantly share code, notes, and snippets.

@h0tw1r3
Last active March 6, 2017 18:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save h0tw1r3/d03f762b5913356318927bb3d5ea7e1c to your computer and use it in GitHub Desktop.
Save h0tw1r3/d03f762b5913356318927bb3d5ea7e1c to your computer and use it in GitHub Desktop.
Recurse directory tree, scrubbing lines based that match pattern list. Bzip compressed files supported.
#!/usr/bin/env perl
# Recurse tree for files, scrubbing lines based on pattern list
# Copyright: 2017 Jeffrey Clark <h0tw1r3@gmail.com>
# License: GNU GPL v3+
use strict;
use warnings;
no if $] >= 5.017011, warnings => 'experimental::smartmatch';
use feature 'switch';
use File::Find;
use IO::Uncompress::Bunzip2 qw($Bunzip2Error);
use IO::Compress::Bzip2 qw(bzip2 $Bzip2Error);
use IO::File;
use Digest::MD5 qw(md5_hex);
use File::Copy;
use File::stat;
use File::Temp qw(tempdir);
use Getopt::Long qw(:config gnu_getopt);
use Pod::Usage;
my $help = 0;
my $file_pattern = '';
my $search_path = './';
my $tmp_path = File::Spec->tmpdir;
my $config_file = 'superscrub.re';
my $apply = 0;
my $verbose = 0;
($help = 1) unless @ARGV;
GetOptions (
"h|help" => \$help,
"s|search=s" => \$search_path,
"t|tmp=s" => \$tmp_path,
"r|regex=s" => \$config_file,
"a|apply" => \$apply,
"v|verbose+" => \$verbose,
);
pod2usage(1) if $help;
if (not @ARGV) {
print "Invalid command usage\n"; pod2usage(1);
}
$file_pattern = qr($ARGV[0]);
# read regex patterns from file
my @patterns;
open (CONFIG_FILE, "<${config_file}");
while (my $regex = <CONFIG_FILE>) {
chomp($regex);
next if $regex =~ /^$/ or $regex =~ /^#/;
push @patterns, qr($regex);
}
close(CONFIG_FILE);
if ($verbose) {
print "Loaded ";
print scalar grep { defined $_ } @patterns;
print " patterns\n";
}
$tmp_outpath = tempdir(DIR => $tmp_path, CLEANUP => 1);
my @filenames;
find( {
wanted => sub { if ($_ =~ $file_pattern) { push @filenames, $File::Find::name; } },
no_chdir => 0,
}, $search_path );
foreach my $filename (@filenames) {
print "Reading $filename\n" if ($verbose);
my $tmp_filename = $tmp_outpath . "/" . md5_hex($filename);
my $fstat = stat($filename);
my $out = new IO::File "> $tmp_filename"
or die "IO::File->new failed: $!\n";
my $in = new IO::Uncompress::Bunzip2 $filename
or die "IO::Uncompress::Bunzip2 failed: $Bunzip2Error\n";
my $matched = 0;
while ( my $line = $in->getline() ) {
given ($line) {
when (@patterns) {
$matched++;
print "Filtered => " if ($verbose > 1);
}
default {
print $out $line if ($apply);
}
}
}
$in->close;
$out->close;
if ($matched) {
print "Filtered $matched lines from $filename\n";
if ($apply) {
if ($filename =~ m/\.bz2/) {
bzip2 $tmp_filename => $filename
or die "Error compressing $tmp_filename => $filename: $Bzip2Error\n";
} else {
copy $tmp_filename => $filename
or die "Error copying $tmp_filename to $filename: $!\n";
}
utime $fstat->atime, $fstat->mtime, $filename;
}
}
undef $out;
unlink $tmp_filename or die("Failed to remove $tmp_filename: $!\n");
}
__END__
=head1 NAME
superscrub - Recurse tree for files, scrubbing lines based on pattern list
=head1 SYNOPSIS
superscrub [FLAGS]... FILE_RE
FLAGS:
-h, --help
-s PATH, --search=PATH [./]
-t PATH, --tmp=PATH [/dev/shm]
-r CONFIG_FILE, --regex=CONFIG_FILE [superscrub.re]
-a, --apply
-v, --verbose (specify multiple times)
# Example patterns
^\s+ something sensitive .*
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment