rupertl/mts-list-unpack.pl

## mts-list-unpack.pl
#!/usr/bin/env perl
##############################################################################
# mts-list-unpack.pl: creates a tab separated file from the MTS D6 driver list
#
# usage: mts-list-unpack.pl D6.0-LIST.TXT > unpacked.tsv
#
# This script will take the MTS D6.0 list of components and unpack it
# to a tab separated file, one line per subcomponent. It should work
# with any version of perl >= 5.10.1
##############################################################################

use strict;
use warnings;

main();

sub main
{
    my $entries = read_file();

    # $entries is a hash ref where the keys are the
    # component/subcomponent and the values are hash refs containing
    # each column from the file.

    print_tsv($entries);
}

sub read_file
{
    # Read the driver list file from stdin/command line args and create
    # the hash $entries.

    my $entries = {};

    # Each entry has 3 detail lines (or in some cases 2)
    # and then 0 or more description lines
    my $line = get_line();
    while ($line)
    {
        my $entry = {};

        my $line2 = get_line();
        my $line3 = get_line();

        my $no_line_3 = extract_entry_details($entry, $line, $line2, $line3);

        $entries->{$entry->{component}} = $entry;

        if ($no_line_3)
        {
            # No line 3 and no description so start the next entry with line 3
            $line = $line3;
        }
        else
        {
            # Read optional description
            $line = get_line();
            while ($line && ! is_entry_first_line($line))
            {
                add_entry_description($entry, $line);
                $line = get_line();
            }
        }
    }

    return $entries;
}

sub get_line
{
    # Read a line, skipping over page headers and blank lines, remving
    # and carriage control also.

    while (my $line = <>)
    {
        next if is_page_header($line);
        $line = remove_carriage_control_and_crlf($line);
        next if $line eq '';    # some blank lines appear after headers
        return $line;
    }
}

sub remove_carriage_control_and_crlf
{
    my $line = shift;

    chomp $line;
    $line =~ s/\r//;
    return substr($line, 1);
}


sub is_page_header
{
    # This is hard codes for the D6.0 file but could be adapted for
    # previous distributions.

    my $line = shift;

    my @headers = ("                                                                                               MON AUG 05/96   18:52:43   Page",
                   "  Num   R       Component Name         Subname     Type G Seq ID S                          Location                           File",
                   "_________ _________________________ ______________ ____ _ ______ _ ___________________________________________________________ ____",
                   "         6250 tape-file             FS Name              Ver FType LRECL Size DevT Inst  Person   Local Per      Date        Time",
                   "         ______________ ________________________________ ___ _____ _____ ____ ____ ____ _________ _________ ______________ ________",
                   "         1600 tape-file       Disk Name",
                   "         ______________ _____________________");

    for (@headers)
    {
        return 1 if $line =~ /$_/;
    }
}

sub extract_entry_details
{
    # Read up to 3 lines and extract fields into $entry. Returns 1 if
    # there is no line 3.

    my ($entry, $line1, $line2, $line3) = @_;

    extract_line_1($entry, $line1);
    extract_line_2($entry, $line2);
    return extract_line_3($entry, $line3);
}

sub extract_line_1
{
    my ($entry, $line1) = @_;

    if ($line1 =~ qr|^(\d{4}/\d{3})|)
    {
        $entry->{component} = $1;

        (
         $entry->{component_major},
         undef,                 # ignore the / between component major & minor
         $entry->{component_minor},
         $entry->{revision_level},
         $entry->{name},
         $entry->{subname},
         $entry->{type},
         $entry->{goodness},
         $entry->{seq_id},
         $entry->{save_control},
         $entry->{location},
         $entry->{file},
        )
            = map { s/^\s*//; $_ } # remove leading spaces
              unpack("A4 A1 A3 A2 A26 A15 A5 A2 A7 A2 A60 A4", $line1);
    }
    else
    {
        die "Line 1 of entry does not match specification";
    }
}

sub extract_line_2
{
    my ($entry, $line2) = @_;

    $line2 = substr($line2, 10);

    (
     $entry->{tape_6250},
     $entry->{file_6250},
     $entry->{fs_name},
     $entry->{ver},
     $entry->{ftype},
     $entry->{lrecl},
     $entry->{size},
     $entry->{devt},
     $entry->{inst},
     $entry->{person},
     $entry->{local_person},
     $entry->{date},
     $entry->{time},
    )
        = map { s/^\s*//; $_ }
          unpack("A7 A6 A33 A4 A6 A6 A5 A5 A5 A10 A10 A15 A9", $line2);

    $entry->{date} = convert_mts_date($entry->{date});
}

sub extract_line_3
{
    my ($entry, $line3) = @_;

    if (length($line3) == 45 && $line3 !~ /[a-z]/)
    {
        # It's a line 3

        $line3 = substr($line3, 10);

        (
         $entry->{tape_1600},
         $entry->{file_1600},
         $entry->{disk_name},
        )
            = map { s/^\s*//; $_ }
              unpack("A7 A6 A21", $line3);

        if ($entry->{disk_name} =~ /^(.*)(@.*)$/)
        {
            # Separate the disk name modifier (eg @UM)
            $entry->{disk_name} = $1;
            $entry->{disk_name_modifier} = $2;
        }
    }
    elsif (is_entry_first_line($line3))
    {
        # Found a line 1 again so this entry ends here
        return 1;
    }
    else
    {
        # If there is no line 3, treat as a description line
        $line3 =~ s/^\s*//;
        $entry->{description} = $line3;
    }

    return 0;
}

sub add_entry_description
{
    # Build up the description field from one or more lines

    my ($entry, $line) = @_;

    $line =~ s/^\s*//;
    if (defined($entry->{description}))
    {
        $entry->{description} .= "\n$line";
    }
    else
    {
        $entry->{description} = $line;
    }
}

sub is_entry_first_line
{
    my $line = shift;

    return $line =~ qr|^\d{4}/\d{3}|;
}

sub convert_mts_date
{
    my $date = shift;
    return unless $date;

    # Convert a MTS date like MAY  14, 1978 or SEP.  7, 1981 to DD-MMM-YYYY

    if ($date =~ /([A-Z]{3})\S*\s*(\d*),\s*(\d+)/)
    {
        return "$2-$1-$3";
    }
    else
    {
        die "Could not convert date $date";
    }
}

sub print_tsv
{
    # Print one entry per line, tab separated

    my $entries = shift;

    my @fields = qw(component_major component_minor revision_level
                    name subname type goodness seq_id save_control
                    location file tape_6250 file_6250 fs_name ver
                    ftype lrecl size devt inst person local_person
                    date time tape_1600 file_1600 disk_name
                    disk_name_modifier description);

    print join("\t", @fields), "\n";

    for my $component (sort keys %$entries)
    {
        my $entry = $entries->{$component};
        $entry->{description} =~ s/\n/ /g if $entry->{description};
        print join("\t", map
                   {
                       # Print empty string if item not set
                       defined($entry->{$_}) ? $entry->{$_} : '';
                   }
                   @fields), "\n";
    }
}
	#!/usr/bin/env perl
	##############################################################################
	# mts-list-unpack.pl: creates a tab separated file from the MTS D6 driver list
	#
	# usage: mts-list-unpack.pl D6.0-LIST.TXT > unpacked.tsv
	#
	# This script will take the MTS D6.0 list of components and unpack it
	# to a tab separated file, one line per subcomponent. It should work
	# with any version of perl >= 5.10.1
	##############################################################################

	use strict;
	use warnings;

	main();

	sub main
	{
	my $entries = read_file();

	# $entries is a hash ref where the keys are the
	# component/subcomponent and the values are hash refs containing
	# each column from the file.

	print_tsv($entries);
	}

	sub read_file
	{
	# Read the driver list file from stdin/command line args and create
	# the hash $entries.

	my $entries = {};

	# Each entry has 3 detail lines (or in some cases 2)
	# and then 0 or more description lines
	my $line = get_line();
	while ($line)
	{
	my $entry = {};

	my $line2 = get_line();
	my $line3 = get_line();

	my $no_line_3 = extract_entry_details($entry, $line, $line2, $line3);

	$entries->{$entry->{component}} = $entry;

	if ($no_line_3)
	{
	# No line 3 and no description so start the next entry with line 3
	$line = $line3;
	}
	else
	{
	# Read optional description
	$line = get_line();
	while ($line && ! is_entry_first_line($line))
	{
	add_entry_description($entry, $line);
	$line = get_line();
	}
	}
	}

	return $entries;
	}

	sub get_line
	{
	# Read a line, skipping over page headers and blank lines, remving
	# and carriage control also.

	while (my $line = <>)
	{
	next if is_page_header($line);
	$line = remove_carriage_control_and_crlf($line);
	next if $line eq ''; # some blank lines appear after headers
	return $line;
	}
	}

	sub remove_carriage_control_and_crlf
	{
	my $line = shift;

	chomp $line;
	$line =~ s/\r//;
	return substr($line, 1);
	}


	sub is_page_header
	{
	# This is hard codes for the D6.0 file but could be adapted for
	# previous distributions.

	my $line = shift;

	my @headers = (" MON AUG 05/96 18:52:43 Page",
	" Num R Component Name Subname Type G Seq ID S Location File",
	"_________ _________________________ ______________ ____ _ ______ _ ___________________________________________________________ ____",
	" 6250 tape-file FS Name Ver FType LRECL Size DevT Inst Person Local Per Date Time",
	" ______________ ________________________________ ___ _____ _____ ____ ____ ____ _________ _________ ______________ ________",
	" 1600 tape-file Disk Name",
	" ______________ _____________________");

	for (@headers)
	{
	return 1 if $line =~ /$_/;
	}
	}

	sub extract_entry_details
	{
	# Read up to 3 lines and extract fields into $entry. Returns 1 if
	# there is no line 3.

	my ($entry, $line1, $line2, $line3) = @_;

	extract_line_1($entry, $line1);
	extract_line_2($entry, $line2);
	return extract_line_3($entry, $line3);
	}

	sub extract_line_1
	{
	my ($entry, $line1) = @_;

	if ($line1 =~ qr\|^(\d{4}/\d{3})\|)
	{
	$entry->{component} = $1;

	(
	$entry->{component_major},
	undef, # ignore the / between component major & minor
	$entry->{component_minor},
	$entry->{revision_level},
	$entry->{name},
	$entry->{subname},
	$entry->{type},
	$entry->{goodness},
	$entry->{seq_id},
	$entry->{save_control},
	$entry->{location},
	$entry->{file},
	)
	= map { s/^\s*//; $_ } # remove leading spaces
	unpack("A4 A1 A3 A2 A26 A15 A5 A2 A7 A2 A60 A4", $line1);
	}
	else
	{
	die "Line 1 of entry does not match specification";
	}
	}

	sub extract_line_2
	{
	my ($entry, $line2) = @_;

	$line2 = substr($line2, 10);

	(
	$entry->{tape_6250},
	$entry->{file_6250},
	$entry->{fs_name},
	$entry->{ver},
	$entry->{ftype},
	$entry->{lrecl},
	$entry->{size},
	$entry->{devt},
	$entry->{inst},
	$entry->{person},
	$entry->{local_person},
	$entry->{date},
	$entry->{time},
	)
	= map { s/^\s*//; $_ }
	unpack("A7 A6 A33 A4 A6 A6 A5 A5 A5 A10 A10 A15 A9", $line2);

	$entry->{date} = convert_mts_date($entry->{date});
	}

	sub extract_line_3
	{
	my ($entry, $line3) = @_;

	if (length($line3) == 45 && $line3 !~ /[a-z]/)
	{
	# It's a line 3

	$line3 = substr($line3, 10);

	(
	$entry->{tape_1600},
	$entry->{file_1600},
	$entry->{disk_name},
	)
	= map { s/^\s*//; $_ }
	unpack("A7 A6 A21", $line3);

	if ($entry->{disk_name} =~ /^(.)(@.)$/)
	{
	# Separate the disk name modifier (eg @UM)
	$entry->{disk_name} = $1;
	$entry->{disk_name_modifier} = $2;
	}
	}
	elsif (is_entry_first_line($line3))
	{
	# Found a line 1 again so this entry ends here
	return 1;
	}
	else
	{
	# If there is no line 3, treat as a description line
	$line3 =~ s/^\s*//;
	$entry->{description} = $line3;
	}

	return 0;
	}

	sub add_entry_description
	{
	# Build up the description field from one or more lines

	my ($entry, $line) = @_;

	$line =~ s/^\s*//;
	if (defined($entry->{description}))
	{
	$entry->{description} .= "\n$line";
	}
	else
	{
	$entry->{description} = $line;
	}
	}

	sub is_entry_first_line
	{
	my $line = shift;

	return $line =~ qr\|^\d{4}/\d{3}\|;
	}

	sub convert_mts_date
	{
	my $date = shift;
	return unless $date;

	# Convert a MTS date like MAY 14, 1978 or SEP. 7, 1981 to DD-MMM-YYYY

	if ($date =~ /([A-Z]{3})\S\s(\d),\s(\d+)/)
	{
	return "$2-$1-$3";
	}
	else
	{
	die "Could not convert date $date";
	}
	}

	sub print_tsv
	{
	# Print one entry per line, tab separated

	my $entries = shift;

	my @fields = qw(component_major component_minor revision_level
	name subname type goodness seq_id save_control
	location file tape_6250 file_6250 fs_name ver
	ftype lrecl size devt inst person local_person
	date time tape_1600 file_1600 disk_name
	disk_name_modifier description);

	print join("\t", @fields), "\n";

	for my $component (sort keys %$entries)
	{
	my $entry = $entries->{$component};
	$entry->{description} =~ s/\n/ /g if $entry->{description};
	print join("\t", map
	{
	# Print empty string if item not set
	defined($entry->{$_}) ? $entry->{$_} : '';
	}
	@fields), "\n";
	}
	}