Skip to content

Instantly share code, notes, and snippets.

@mistermarco
Created November 30, 2011 20:37
Show Gist options
  • Save mistermarco/1410710 to your computer and use it in GitHub Desktop.
Save mistermarco/1410710 to your computer and use it in GitHub Desktop.
Count "pages" in a file, given a template size and an estimated number of characters per page.
#!/usr/bin/perl
use File::Find;
use Getopt::Long;
use Pod::Usage;
use Term::ANSIColor;
use strict;
my $default_overhead_size = 2000;
my $default_page_size = 2500;
my $help = 0;
my $man = 0;
my $modified_days;
my @directories = ();
my $overhead = 0;
my $page_size = 0;
my $debug = 0;
my $total = 0;
my $total_pages = 0;
my $all = 0;
GetOptions ('days|d=s' => \$modified_days,
'dir=s' => \@directories,
'help|?' => \$help,
'man' => \$man,
'overhead|o=i' => \$overhead,
'page_size|p=i' => \$page_size,
'debug' => \$debug,
'total' => \$total,
'all' => \$all);
pod2usage(1) if $help;
pod2usage(-existatus => 0, -verbose => 2) if $man;
@directories = split(/ /,join(' ',@directories));
@directories = qw(.) unless scalar @directories;
unless ($overhead) { $overhead = $default_overhead_size; }
unless ($page_size) { $page_size = $default_page_size; }
print "Using overhead size of $overhead and page size of $page_size\n" if $debug;
find(\&process_file, @directories);
sub process_file {
# don't do anything unless it's a file
return unless -f;
if ($debug) { print color 'reset'; }
if ($modified_days) { return unless -M $_ < $modified_days; }
if ($debug && -M $_ < 2) { print color 'red'; }
# return unless the file has one of the following extensions
unless ($all) {
return unless /.*\.(html|htm|asp|cgi|fft|pl|tmpl|tpl|php|txt)$/;
}
# calculate the size of the file
my $size = -s $_;
# subtract the overhead (size of template code) and divide by the
# approximate number of characters printed on a page
my $page_count = ($size - $overhead) / $page_size;
if ($page_count < 1) { $page_count = 1; }
# if the user requested a total, add the page count to the total
$total_pages += $page_count if $total;
print "$File::Find::name";
print "\t$size" if $debug;
print "\t$page_count\n";
}
print "Total pages: " . commify($total_pages) ."\n" if $total;
sub commify {
my $text = reverse $_[0];
$text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;
return scalar reverse $text;
}
__END__
=head1 NAME
page_count - Count the approximate page length of files in a directory
=head1 SYNOPSIS
Use page_count to get a list of files and their sizes in pages.
page_count
Used by itself it will run in the current directory (and its subdirectories) and
will count the pages for most commonly changed files (html, htm, fft, txt, asp, etc.)
=head1 OPTIONS
=over 8
=item B<-days> I<number of days>
Process pages changed within the number of days specified
=item B<-dir> I<directory_name(s)>
Process a specific directory and its subdirectories. If you enclose the names in quotes ("), you can specify a list of directories.
If omitted searches the current directory.
=item B<-help>
Prints out a short help message
=item B<-total>
Adds a total of all page sizes to the end of the report
=item B<-overhead> I<characters_in_overhead>
Override the template size default
=item B<-page_size> I<characters_in_printed_page>
Override the page size default
=item B<-debug>
Print out debugging messages
=item B<-all>
Calculate the size of all files, not just regular text files
=item B<-man>
Print out an even more detail man page about the program
=head1 AUTHOR
Written by Marco Wise (marco.wise@stanford.edu)
=head1 COPYRIGHT
2006 Board of Trustees, Leland Stanford Junior University
=cut
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment