In bioinformatics, raw ASCII text file can get massive. This script sniffs out large uncompressed text files and sends their paths to STDOUT for piping to a file or zipping command.
use warnings;
use strict;
use Cwd;
use File::Find;
# Prints the full path of any files larger than 50mb which are uncompressed
# One file path per newline, so output can be piped to other tools, eg:
# perl | xargs gzip
# perl | grep sra
my $dir = $ARGV[0];
unless (defined $dir) {
$dir = getcwd();
find(\&print_large_uncompressed, $dir);
sub print_large_uncompressed {
if( -s > 52428800){ # file larger than 50mb
if ( index ( `file $_`, "ASCII text" ) != -1 ) {
print $File::Find::name . "\n";
