Created
July 28, 2015 14:38
-
-
Save Dyrcona/b5c1df5ed41ebef50e8e to your computer and use it in GitHub Desktop.
A small script to fix DVD and possibly Blu-ray MARC records that say they are laserdisc in the 007.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
# --------------------------------------------------------------- | |
# Copyright © 2015 Merrimack Valley Library Consortium | |
# Jason Stephenson <jstephenson@mvlc.org> | |
# | |
# This program is free software; you can redistribute it and/or modify | |
# it under the terms of the GNU General Public License as published by | |
# the Free Software Foundation; either version 2 of the License, or | |
# (at your option) any later version. | |
# | |
# This program is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# GNU General Public License for more details. | |
# --------------------------------------------------------------- | |
# This script attempts to "fix" records that indicate they are | |
# laserdiscs to make them DVD or Blu-ray. It first loads the list of | |
# records that indicate they are laserdisc and then searches through | |
# the MARC of each to look for clues. It looks for certain strings in | |
# the subfields of 538, 347, and 300 tags in that order. If the | |
# correct values are found, the record is updated in Evergreen. | |
# | |
# Any records that cannot be fixed, either because the appropriate | |
# information is not found, or they have more than 007 tag, are added | |
# to a pre-made bucket so that a cataloger can have a look at these | |
# records and fix them by hand if necessary. | |
# | |
# The criteria and behavior of this script were determined after a | |
# couple of meetings of cataloging staff at MVLC. The criteria were | |
# determined to be good enough for our situation. They may not be | |
# good enough for yours, so use caution and try this in a test | |
# database, first. | |
use strict; | |
use warnings; | |
use feature 'state'; | |
use DBI; | |
use MARC::Record; | |
use MARC::File::XML; | |
use OpenILS::Utils::Normalize; | |
# You will want to make a bucket so you have a place to track the | |
# rejects. Change this constant to match the id of the bucket so | |
# created. | |
use constant BUCKETID => 222544; | |
binmode(STDERR, 'utf8'); | |
binmode(STDOUT, 'utf8'); | |
# Find all not deleted records with a video recording format of 'g' | |
# for laserdisc. | |
my $query = <<EOQUERY; | |
select distinct bre.id, bre.marc | |
from biblio.record_entry bre | |
join metabib.real_full_rec mrfr | |
on mrfr.record = bre.id | |
where not bre.deleted | |
and mrfr.tag = '007' | |
and substring(mrfr.value from '^v...(.)') = 'g' | |
EOQUERY | |
# If you need to use specific username and database parameters to | |
# communicate with your PostgreSQL database, you will need to alter | |
# the next line. | |
my $dbh = DBI->connect('DBI:Pg:') || die "He's dead, Jim."; | |
my $sth = $dbh->prepare($query); | |
if ($sth->execute()) { | |
while (my $data = $sth->fetchrow_hashref) { | |
# Set to 1 if we have fixed the record. | |
my $fixed = 0; | |
# Get a MARC::Record from the marc field. | |
my $record; | |
eval { | |
$record = MARC::Record->new_from_xml($data->{marc}, 'UTF-8'); | |
}; | |
# If there was a fatal error with the record, report it and go | |
# on to the next one. | |
if ($@) { | |
my $note = substr($@, 0, index($@, ' at /usr')); | |
add_record_to_bucket($data->{id}, $note); | |
import MARC::File::XML; # Resets the SAX parser. | |
next; | |
} | |
# We need a list of the 007s in the record. | |
my @fields_007 = $record->field('007'); | |
# If a record has more than 1 007 fields, we leave it for a | |
# human to look at. | |
my $count_007 = scalar(@fields_007); | |
if ($count_007 > 1) { | |
add_record_to_bucket($data->{id}, "Has $count_007 007s"); | |
next; | |
} | |
# We will use 538, 347, and 300 tags in that order in the | |
# attempt to determine if the record is for a Blu-ray or DVD. | |
my @fields_538 = $record->field('538'); | |
my @fields_347 = $record->field('347'); | |
my @fields_300 = $record->field('300'); | |
# Probably too much code repitition in the following blocks. | |
# I should probably factor the inner if statements into a | |
# single function, but I'm feeling lazy and what I have works. | |
for my $field (@fields_538) { | |
my $sf_value = $field->subfield('a'); | |
if ($sf_value) { | |
my $vr_format = get_vr_format($sf_value); | |
if ($vr_format) { | |
fix_007($fields_007[0], $vr_format); | |
$fixed = 1; | |
last; | |
} | |
} | |
} | |
unless ($fixed) { | |
for my $field (@fields_347) { | |
my $sf_value = $field->subfield('b'); | |
if ($sf_value) { | |
my $vr_format = get_vr_format($sf_value); | |
if ($vr_format) { | |
fix_007($fields_007[0], $vr_format); | |
$fixed = 1; | |
last; | |
} | |
} | |
} | |
} | |
unless ($fixed) { | |
for my $field (@fields_300) { | |
my $sf_value = $field->subfield('a'); | |
if ($sf_value) { | |
my $vr_format = get_vr_format($sf_value); | |
if ($vr_format) { | |
fix_007($fields_007[0], $vr_format); | |
$fixed = 1; | |
last; | |
} | |
} | |
# Also check subfield e. | |
$sf_value = $field->subfield('e'); | |
if ($sf_value) { | |
my $vr_format = get_vr_format($sf_value); | |
if ($vr_format) { | |
fix_007($fields_007[0], $vr_format); | |
$fixed = 1; | |
last; | |
} | |
} | |
} | |
} | |
# If we have fixed the record, update its marc in the | |
# database. If not, add it to the bucket so catalogers can | |
# look at it. | |
if ($fixed) { | |
update_record($data->{id}, $record); | |
} else { | |
add_record_to_bucket($data->{id}); | |
} | |
} | |
} | |
# Add a record to a bucket with an optional note. | |
sub add_record_to_bucket { | |
my ($record, $note) = @_; | |
state $pos = 0; | |
state $bucket_item_sth = $dbh->prepare(<<CBREBI | |
insert into container.biblio_record_entry_bucket_item | |
(bucket, target_biblio_record_entry, pos) | |
VALUES | |
(?,?,?) | |
returning id | |
CBREBI | |
); | |
state $bucket_item_note_sth = $dbh->prepare(<<CBREBIN | |
insert into container.biblio_record_entry_bucket_item_note | |
(item, note) | |
VALUES | |
(?,?) | |
CBREBIN | |
); | |
$bucket_item_sth->bind_param(1, BUCKETID); | |
$bucket_item_sth->bind_param(2, $record); | |
$bucket_item_sth->bind_param(3, $pos++); | |
if ($bucket_item_sth->execute()) { | |
my $bucket_item = $bucket_item_sth->fetchrow_arrayref(); | |
if ($note) { | |
$bucket_item_note_sth->bind_param(1, $bucket_item->[0]); | |
$bucket_item_note_sth->bind_param(2, $note); | |
$bucket_item_note_sth->execute(); | |
} | |
} | |
} | |
# Check if a field value has information about the actual video | |
# format. If it does, return the appropriate video recording format | |
# code, otherwise return undef. | |
sub get_vr_format { | |
my $subfield_value = shift; | |
my $vr_format; | |
if ($subfield_value =~ /\bblu-ray\b/i) { | |
$vr_format = 's'; | |
} elsif ($subfield_value =~ /\bdvd\b/i) { | |
$vr_format = 'v'; | |
} | |
return $vr_format; | |
} | |
# Replace the vr_format code in the 007. | |
sub fix_007 { | |
my ($field, $vr_format) = @_; | |
my $data = $field->data(); | |
substr($data, 4, 1) = $vr_format; | |
$field->update($data); | |
} | |
# Update the marc for the record in the database. | |
sub update_record { | |
my ($id, $record) = @_; | |
my $marc = OpenILS::Utils::Normalize::clean_marc($record); | |
state $bre_sth = $dbh->prepare(<<BRE | |
update biblio.record_entry | |
set marc = ? | |
where id = ? | |
BRE | |
); | |
$bre_sth->bind_param(1, $marc); | |
$bre_sth->bind_param(2, $id); | |
return $bre_sth->execute(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment