Created
September 14, 2014 04:01
-
-
Save rupertl/f3736dcbf7ef2b094208 to your computer and use it in GitHub Desktop.
mts-list-unpack.pl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
############################################################################## | |
# mts-list-unpack.pl: creates a tab separated file from the MTS D6 driver list | |
# | |
# usage: mts-list-unpack.pl D6.0-LIST.TXT > unpacked.tsv | |
# | |
# This script will take the MTS D6.0 list of components and unpack it | |
# to a tab separated file, one line per subcomponent. It should work | |
# with any version of perl >= 5.10.1 | |
############################################################################## | |
use strict; | |
use warnings; | |
main(); | |
sub main | |
{ | |
my $entries = read_file(); | |
# $entries is a hash ref where the keys are the | |
# component/subcomponent and the values are hash refs containing | |
# each column from the file. | |
print_tsv($entries); | |
} | |
sub read_file | |
{ | |
# Read the driver list file from stdin/command line args and create | |
# the hash $entries. | |
my $entries = {}; | |
# Each entry has 3 detail lines (or in some cases 2) | |
# and then 0 or more description lines | |
my $line = get_line(); | |
while ($line) | |
{ | |
my $entry = {}; | |
my $line2 = get_line(); | |
my $line3 = get_line(); | |
my $no_line_3 = extract_entry_details($entry, $line, $line2, $line3); | |
$entries->{$entry->{component}} = $entry; | |
if ($no_line_3) | |
{ | |
# No line 3 and no description so start the next entry with line 3 | |
$line = $line3; | |
} | |
else | |
{ | |
# Read optional description | |
$line = get_line(); | |
while ($line && ! is_entry_first_line($line)) | |
{ | |
add_entry_description($entry, $line); | |
$line = get_line(); | |
} | |
} | |
} | |
return $entries; | |
} | |
sub get_line | |
{ | |
# Read a line, skipping over page headers and blank lines, remving | |
# and carriage control also. | |
while (my $line = <>) | |
{ | |
next if is_page_header($line); | |
$line = remove_carriage_control_and_crlf($line); | |
next if $line eq ''; # some blank lines appear after headers | |
return $line; | |
} | |
} | |
sub remove_carriage_control_and_crlf | |
{ | |
my $line = shift; | |
chomp $line; | |
$line =~ s/\r//; | |
return substr($line, 1); | |
} | |
sub is_page_header | |
{ | |
# This is hard codes for the D6.0 file but could be adapted for | |
# previous distributions. | |
my $line = shift; | |
my @headers = (" MON AUG 05/96 18:52:43 Page", | |
" Num R Component Name Subname Type G Seq ID S Location File", | |
"_________ _________________________ ______________ ____ _ ______ _ ___________________________________________________________ ____", | |
" 6250 tape-file FS Name Ver FType LRECL Size DevT Inst Person Local Per Date Time", | |
" ______________ ________________________________ ___ _____ _____ ____ ____ ____ _________ _________ ______________ ________", | |
" 1600 tape-file Disk Name", | |
" ______________ _____________________"); | |
for (@headers) | |
{ | |
return 1 if $line =~ /$_/; | |
} | |
} | |
sub extract_entry_details | |
{ | |
# Read up to 3 lines and extract fields into $entry. Returns 1 if | |
# there is no line 3. | |
my ($entry, $line1, $line2, $line3) = @_; | |
extract_line_1($entry, $line1); | |
extract_line_2($entry, $line2); | |
return extract_line_3($entry, $line3); | |
} | |
sub extract_line_1 | |
{ | |
my ($entry, $line1) = @_; | |
if ($line1 =~ qr|^(\d{4}/\d{3})|) | |
{ | |
$entry->{component} = $1; | |
( | |
$entry->{component_major}, | |
undef, # ignore the / between component major & minor | |
$entry->{component_minor}, | |
$entry->{revision_level}, | |
$entry->{name}, | |
$entry->{subname}, | |
$entry->{type}, | |
$entry->{goodness}, | |
$entry->{seq_id}, | |
$entry->{save_control}, | |
$entry->{location}, | |
$entry->{file}, | |
) | |
= map { s/^\s*//; $_ } # remove leading spaces | |
unpack("A4 A1 A3 A2 A26 A15 A5 A2 A7 A2 A60 A4", $line1); | |
} | |
else | |
{ | |
die "Line 1 of entry does not match specification"; | |
} | |
} | |
sub extract_line_2 | |
{ | |
my ($entry, $line2) = @_; | |
$line2 = substr($line2, 10); | |
( | |
$entry->{tape_6250}, | |
$entry->{file_6250}, | |
$entry->{fs_name}, | |
$entry->{ver}, | |
$entry->{ftype}, | |
$entry->{lrecl}, | |
$entry->{size}, | |
$entry->{devt}, | |
$entry->{inst}, | |
$entry->{person}, | |
$entry->{local_person}, | |
$entry->{date}, | |
$entry->{time}, | |
) | |
= map { s/^\s*//; $_ } | |
unpack("A7 A6 A33 A4 A6 A6 A5 A5 A5 A10 A10 A15 A9", $line2); | |
$entry->{date} = convert_mts_date($entry->{date}); | |
} | |
sub extract_line_3 | |
{ | |
my ($entry, $line3) = @_; | |
if (length($line3) == 45 && $line3 !~ /[a-z]/) | |
{ | |
# It's a line 3 | |
$line3 = substr($line3, 10); | |
( | |
$entry->{tape_1600}, | |
$entry->{file_1600}, | |
$entry->{disk_name}, | |
) | |
= map { s/^\s*//; $_ } | |
unpack("A7 A6 A21", $line3); | |
if ($entry->{disk_name} =~ /^(.*)(@.*)$/) | |
{ | |
# Separate the disk name modifier (eg @UM) | |
$entry->{disk_name} = $1; | |
$entry->{disk_name_modifier} = $2; | |
} | |
} | |
elsif (is_entry_first_line($line3)) | |
{ | |
# Found a line 1 again so this entry ends here | |
return 1; | |
} | |
else | |
{ | |
# If there is no line 3, treat as a description line | |
$line3 =~ s/^\s*//; | |
$entry->{description} = $line3; | |
} | |
return 0; | |
} | |
sub add_entry_description | |
{ | |
# Build up the description field from one or more lines | |
my ($entry, $line) = @_; | |
$line =~ s/^\s*//; | |
if (defined($entry->{description})) | |
{ | |
$entry->{description} .= "\n$line"; | |
} | |
else | |
{ | |
$entry->{description} = $line; | |
} | |
} | |
sub is_entry_first_line | |
{ | |
my $line = shift; | |
return $line =~ qr|^\d{4}/\d{3}|; | |
} | |
sub convert_mts_date | |
{ | |
my $date = shift; | |
return unless $date; | |
# Convert a MTS date like MAY 14, 1978 or SEP. 7, 1981 to DD-MMM-YYYY | |
if ($date =~ /([A-Z]{3})\S*\s*(\d*),\s*(\d+)/) | |
{ | |
return "$2-$1-$3"; | |
} | |
else | |
{ | |
die "Could not convert date $date"; | |
} | |
} | |
sub print_tsv | |
{ | |
# Print one entry per line, tab separated | |
my $entries = shift; | |
my @fields = qw(component_major component_minor revision_level | |
name subname type goodness seq_id save_control | |
location file tape_6250 file_6250 fs_name ver | |
ftype lrecl size devt inst person local_person | |
date time tape_1600 file_1600 disk_name | |
disk_name_modifier description); | |
print join("\t", @fields), "\n"; | |
for my $component (sort keys %$entries) | |
{ | |
my $entry = $entries->{$component}; | |
$entry->{description} =~ s/\n/ /g if $entry->{description}; | |
print join("\t", map | |
{ | |
# Print empty string if item not set | |
defined($entry->{$_}) ? $entry->{$_} : ''; | |
} | |
@fields), "\n"; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment