Created November 30, 2011 20:37
Count "pages" in a file, given a template size and an estimated number of characters per page.
use File::Find;
use Getopt::Long;
use Pod::Usage;
use Term::ANSIColor;
use strict;
my $default_overhead_size = 2000;
my $default_page_size = 2500;
my $help = 0;
my $man = 0;
my $modified_days;
my @directories = ();
my $overhead = 0;
my $page_size = 0;
my $debug = 0;
my $total = 0;
my $total_pages = 0;
my $all = 0;
GetOptions ('days|d=s' => \$modified_days,
'dir=s' => \@directories,
'help|?' => \$help,
'man' => \$man,
'overhead|o=i' => \$overhead,
'page_size|p=i' => \$page_size,
'debug' => \$debug,
'total' => \$total,
'all' => \$all);
pod2usage(1) if $help;
pod2usage(-existatus => 0, -verbose => 2) if $man;
@directories = split(/ /,join(' ',@directories));
@directories = qw(.) unless scalar @directories;
unless ($overhead) { $overhead = $default_overhead_size; }
unless ($page_size) { $page_size = $default_page_size; }
print "Using overhead size of $overhead and page size of $page_size\n" if $debug;
find(\&process_file, @directories);
sub process_file {
# don't do anything unless it's a file
return unless -f;
if ($debug) { print color 'reset'; }
if ($modified_days) { return unless -M $_ < $modified_days; }
if ($debug && -M $_ < 2) { print color 'red'; }
# return unless the file has one of the following extensions
unless ($all) {
return unless /.*\.(html|htm|asp|cgi|fft|pl|tmpl|tpl|php|txt)$/;
# calculate the size of the file
my $size = -s $_;
# subtract the overhead (size of template code) and divide by the
# approximate number of characters printed on a page
my $page_count = ($size - $overhead) / $page_size;
if ($page_count < 1) { $page_count = 1; }
# if the user requested a total, add the page count to the total
$total_pages += $page_count if $total;
print "$File::Find::name";
print "\t$size" if $debug;
print "\t$page_count\n";
print "Total pages: " . commify($total_pages) ."\n" if $total;
sub commify {
my $text = reverse $_[0];
$text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;
return scalar reverse $text;
=head1 NAME
page_count - Count the approximate page length of files in a directory
Use page_count to get a list of files and their sizes in pages.
Used by itself it will run in the current directory (and its subdirectories) and
will count the pages for most commonly changed files (html, htm, fft, txt, asp, etc.)
=head1 OPTIONS
=over 8
=item B<-days> I<number of days>
Process pages changed within the number of days specified
=item B<-dir> I<directory_name(s)>
Process a specific directory and its subdirectories. If you enclose the names in quotes ("), you can specify a list of directories.
If omitted searches the current directory.
=item B<-help>
Prints out a short help message
=item B<-total>
Adds a total of all page sizes to the end of the report
=item B<-overhead> I<characters_in_overhead>
Override the template size default
=item B<-page_size> I<characters_in_printed_page>
Override the page size default
=item B<-debug>
Print out debugging messages
=item B<-all>
Calculate the size of all files, not just regular text files
=item B<-man>
Print out an even more detail man page about the program
=head1 AUTHOR
Written by Marco Wise (
2006 Board of Trustees, Leland Stanford Junior University
