Created
October 23, 2016 23:14
-
-
Save gmcharlt/f7e3bcb3680a5bf748aacd7e21c13175 to your computer and use it in GitHub Desktop.
little Perl script to convert MARC21 records from MARC-8 to UTF-8
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
# Convert MARC21 records from MARC-8 to UTF-8 | |
# | |
# Usage: | |
# marc_marc8toutf8.pl < MARC8.mrc > UTF8.mrc | |
use strict; | |
use warnings; | |
use MARC::Charset qw/ marc8_to_utf8 /; | |
use MARC::Batch; | |
binmode STDIN, ':bytes'; | |
binmode STDOUT, ':utf8'; | |
my $batch = MARC::Batch->new('USMARC', *STDIN); | |
while (my $marc = $batch->next()) { | |
convert_record_to_utf8($marc); | |
print $marc->as_usmarc(); | |
} | |
sub convert_record_to_utf8 { | |
my $marc = shift; | |
my $ldr = $marc->leader(); | |
if (substr($ldr, 9, 1) eq 'a') { | |
# we claim to already be UTF8 | |
return; | |
} else { | |
substr($ldr, 9, 1) = 'a'; | |
$marc->leader($ldr); | |
} | |
# convert contents of variable data subfields from MARC8 to UTF8 | |
foreach my $field ($marc->fields()) { | |
unless ($field->is_control_field()) { | |
my @converted_sfs = (); | |
foreach my $sf ($field->subfields()) { | |
$sf->[1] = marc8_to_utf8($sf->[1]); | |
push @converted_sfs, @$sf; | |
} | |
$field->replace_with(MARC::Field->new( | |
$field->tag(), | |
$field->indicator(1), | |
$field->indicator(2), | |
@converted_sfs | |
)); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment