nanis/build-fedfun-1997-2011.pl

## build-fedfun-1997-2011.pl
#!/usr/bin/env perl

# Author: A. Sinan Unur
# Copyright (C) 2013
# License: This is free software. You can use it under the terms
# of Artistic License 2.0 <http://opensource.org/licenses/Artistic-2.0>
# In particular:
# (14) Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT
# HOLDER AND CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTIES. THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT
# PERMITTED BY YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT
# HOLDER OR CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE
# USE OF THE PACKAGE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

use autodie;
use strict; use warnings;
use feature 'say';

use File::Slurp qw( read_dir );

my @files = sort {
    substr($a, 0, 4) cmp substr($b, 0, 4)
} grep /[.]txt\z/i, read_dir '.';

# Fix inconsistencies across years

my %fixes = (
    'Airports' => 'Air Transportation', # 2002 and 2003
    'Corrections' => 'Correction', # 1997 and 1998
    'Housing and Community Development (' =>
        'Housing and Community Development', # 2002
    'Housing & Community Development' =>
        'Housing and Community Development', # 1997 - 2001
    'Local Libraries' => 'Libraries', # 2002 and 2003
    'National Defense and International' =>
        'Nat Defense/International Relations', # 2002 and 2003
    'Nat Defense/International Relat' =>
        'Nat Defense/International Relations', # 1997 - 2001
    'Other Government Adminstration' =>
        'Other Government Administration', # 1997 and 1998
    'Police Protection - Officers' =>
        'Police', # 2002 and 2003
    'Postal Service (Federal)' =>
        'Postal Service', # 2002 and 2003
    'Social Insurance Administration (St' =>
        'Social Insurance Administration', # 2002
    'Space Research and Technology' =>
        'Space Research & Technology', # 2003
    'Space Research and Technology (Fede' =>
        'Space Research & Technology', # 2002
    'Streets and Highways' => 'Highways', # 2002 and 2003
    'TOTAL-ALL FUNCTIONS' => 'TOTAL - ALL FUNCTIONS', # 1997 - 2000
    'Water Transport and Terminals' =>
        'Water Transport & Terminals', # 2002 and 2003
    'Welfare' => 'Public Welfare', # 2002 and 2003
);

# ftp://ftp.bls.gov/pub/special.requests/cpi/cpiai.txt
my %cpi = (
    1997 => 160.5,
    1998 => 163,
    1999 => 166.6,
    2000 => 172.2,
    2001 => 177.1,
    2002 => 179.9,
    2003 => 184,
    2004 => 188.9,
    2005 => 195.3,
    2006 => 201.6,
    2007 => 207.342,
    2008 => 215.303,
    2009 => 214.537,
    2010 => 218.056,
    2011 => 224.939,
);

# 2011 dollars
my $base = $cpi{2011};
$_ = $base/$_ for values %cpi;

my $fixer = sub {
    my ($keys, $fixes) = @_;
    return sub {
        my $str = shift;
        for my $k (@$keys) {
            $str =~ s/\A\Q$k\E\z/$fixes->{$k}/
                and last;
        }
        return $str;
    }
}->([sort keys %fixes], \%fixes);

my %data;

for my $file (@files) {
    open my $in, '<', $file;

    my ($year) = ($file =~ /\A([0-9]{4})/);

    while (my $line = <$in>) {
        next unless $line =~ /\ATOTAL/ .. $line =~ /\A\s+\z/;
        next unless $line =~ /\S/;

        chomp $line;
        $line =~ s/[",]//g;

        unless (
            $line =~ m{
                \A
                (?<govfun> [^0-9]+)
                [0-9]+ \s+
                [0-9]+ \s+
                (?<payroll> [0-9]+)
            }x
        ) {
            warn "Unable to parse:\n$line\n";
        }

        my ($govfun, $payroll) = @+{qw(govfun payroll)};

        $govfun =~ s/\s+\z//;
        $govfun =~ s/\s+/ /;
        $govfun = $fixer->($govfun);

        $data{$govfun}{$year} = 0 + sprintf('%.0f', $payroll * $cpi{$year});
    }
}

my @govfun = sort keys %data;
say join("\t", '', @govfun);

for my $year (1997 .. 2011) {
    my @row = map $data{$_}{$year}, @govfun;
    say join ("\t", $year, @row);
}
	#!/usr/bin/env perl

	# Author: A. Sinan Unur
	# Copyright (C) 2013
	# License: This is free software. You can use it under the terms
	# of Artistic License 2.0 <http://opensource.org/licenses/Artistic-2.0>
	# In particular:
	# (14) Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT
	# HOLDER AND CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED
	# WARRANTIES. THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
	# PARTICULAR PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT
	# PERMITTED BY YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT
	# HOLDER OR CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT,
	# INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE
	# USE OF THE PACKAGE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	use autodie;
	use strict; use warnings;
	use feature 'say';

	use File::Slurp qw( read_dir );

	my @files = sort {
	substr($a, 0, 4) cmp substr($b, 0, 4)
	} grep /[.]txt\z/i, read_dir '.';

	# Fix inconsistencies across years

	my %fixes = (
	'Airports' => 'Air Transportation', # 2002 and 2003
	'Corrections' => 'Correction', # 1997 and 1998
	'Housing and Community Development (' =>
	'Housing and Community Development', # 2002
	'Housing & Community Development' =>
	'Housing and Community Development', # 1997 - 2001
	'Local Libraries' => 'Libraries', # 2002 and 2003
	'National Defense and International' =>
	'Nat Defense/International Relations', # 2002 and 2003
	'Nat Defense/International Relat' =>
	'Nat Defense/International Relations', # 1997 - 2001
	'Other Government Adminstration' =>
	'Other Government Administration', # 1997 and 1998
	'Police Protection - Officers' =>
	'Police', # 2002 and 2003
	'Postal Service (Federal)' =>
	'Postal Service', # 2002 and 2003
	'Social Insurance Administration (St' =>
	'Social Insurance Administration', # 2002
	'Space Research and Technology' =>
	'Space Research & Technology', # 2003
	'Space Research and Technology (Fede' =>
	'Space Research & Technology', # 2002
	'Streets and Highways' => 'Highways', # 2002 and 2003
	'TOTAL-ALL FUNCTIONS' => 'TOTAL - ALL FUNCTIONS', # 1997 - 2000
	'Water Transport and Terminals' =>
	'Water Transport & Terminals', # 2002 and 2003
	'Welfare' => 'Public Welfare', # 2002 and 2003
	);

	# ftp://ftp.bls.gov/pub/special.requests/cpi/cpiai.txt
	my %cpi = (
	1997 => 160.5,
	1998 => 163,
	1999 => 166.6,
	2000 => 172.2,
	2001 => 177.1,
	2002 => 179.9,
	2003 => 184,
	2004 => 188.9,
	2005 => 195.3,
	2006 => 201.6,
	2007 => 207.342,
	2008 => 215.303,
	2009 => 214.537,
	2010 => 218.056,
	2011 => 224.939,
	);

	# 2011 dollars
	my $base = $cpi{2011};
	$_ = $base/$_ for values %cpi;

	my $fixer = sub {
	my ($keys, $fixes) = @_;
	return sub {
	my $str = shift;
	for my $k (@$keys) {
	$str =~ s/\A\Q$k\E\z/$fixes->{$k}/
	and last;
	}
	return $str;
	}
	}->([sort keys %fixes], \%fixes);

	my %data;

	for my $file (@files) {
	open my $in, '<', $file;

	my ($year) = ($file =~ /\A([0-9]{4})/);

	while (my $line = <$in>) {
	next unless $line =~ /\ATOTAL/ .. $line =~ /\A\s+\z/;
	next unless $line =~ /\S/;

	chomp $line;
	$line =~ s/[",]//g;

	unless (
	$line =~ m{
	\A
	(?<govfun> [^0-9]+)
	[0-9]+ \s+
	[0-9]+ \s+
	(?<payroll> [0-9]+)
	}x
	) {
	warn "Unable to parse:\n$line\n";
	}

	my ($govfun, $payroll) = @+{qw(govfun payroll)};

	$govfun =~ s/\s+\z//;
	$govfun =~ s/\s+/ /;
	$govfun = $fixer->($govfun);

	$data{$govfun}{$year} = 0 + sprintf('%.0f', $payroll * $cpi{$year});
	}
	}

	my @govfun = sort keys %data;
	say join("\t", '', @govfun);

	for my $year (1997 .. 2011) {
	my @row = map $data{$_}{$year}, @govfun;
	say join ("\t", $year, @row);
	}