Skip to content

Instantly share code, notes, and snippets.

@Dyrcona
Created July 28, 2015 14:38
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Dyrcona/b5c1df5ed41ebef50e8e to your computer and use it in GitHub Desktop.
Save Dyrcona/b5c1df5ed41ebef50e8e to your computer and use it in GitHub Desktop.
A small script to fix DVD and possibly Blu-ray MARC records that say they are laserdisc in the 007.
#!/usr/bin/perl
# ---------------------------------------------------------------
# Copyright © 2015 Merrimack Valley Library Consortium
# Jason Stephenson <jstephenson@mvlc.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# ---------------------------------------------------------------
# This script attempts to "fix" records that indicate they are
# laserdiscs to make them DVD or Blu-ray. It first loads the list of
# records that indicate they are laserdisc and then searches through
# the MARC of each to look for clues. It looks for certain strings in
# the subfields of 538, 347, and 300 tags in that order. If the
# correct values are found, the record is updated in Evergreen.
#
# Any records that cannot be fixed, either because the appropriate
# information is not found, or they have more than 007 tag, are added
# to a pre-made bucket so that a cataloger can have a look at these
# records and fix them by hand if necessary.
#
# The criteria and behavior of this script were determined after a
# couple of meetings of cataloging staff at MVLC. The criteria were
# determined to be good enough for our situation. They may not be
# good enough for yours, so use caution and try this in a test
# database, first.
use strict;
use warnings;
use feature 'state';
use DBI;
use MARC::Record;
use MARC::File::XML;
use OpenILS::Utils::Normalize;
# You will want to make a bucket so you have a place to track the
# rejects. Change this constant to match the id of the bucket so
# created.
use constant BUCKETID => 222544;
binmode(STDERR, 'utf8');
binmode(STDOUT, 'utf8');
# Find all not deleted records with a video recording format of 'g'
# for laserdisc.
my $query = <<EOQUERY;
select distinct bre.id, bre.marc
from biblio.record_entry bre
join metabib.real_full_rec mrfr
on mrfr.record = bre.id
where not bre.deleted
and mrfr.tag = '007'
and substring(mrfr.value from '^v...(.)') = 'g'
EOQUERY
# If you need to use specific username and database parameters to
# communicate with your PostgreSQL database, you will need to alter
# the next line.
my $dbh = DBI->connect('DBI:Pg:') || die "He's dead, Jim.";
my $sth = $dbh->prepare($query);
if ($sth->execute()) {
while (my $data = $sth->fetchrow_hashref) {
# Set to 1 if we have fixed the record.
my $fixed = 0;
# Get a MARC::Record from the marc field.
my $record;
eval {
$record = MARC::Record->new_from_xml($data->{marc}, 'UTF-8');
};
# If there was a fatal error with the record, report it and go
# on to the next one.
if ($@) {
my $note = substr($@, 0, index($@, ' at /usr'));
add_record_to_bucket($data->{id}, $note);
import MARC::File::XML; # Resets the SAX parser.
next;
}
# We need a list of the 007s in the record.
my @fields_007 = $record->field('007');
# If a record has more than 1 007 fields, we leave it for a
# human to look at.
my $count_007 = scalar(@fields_007);
if ($count_007 > 1) {
add_record_to_bucket($data->{id}, "Has $count_007 007s");
next;
}
# We will use 538, 347, and 300 tags in that order in the
# attempt to determine if the record is for a Blu-ray or DVD.
my @fields_538 = $record->field('538');
my @fields_347 = $record->field('347');
my @fields_300 = $record->field('300');
# Probably too much code repitition in the following blocks.
# I should probably factor the inner if statements into a
# single function, but I'm feeling lazy and what I have works.
for my $field (@fields_538) {
my $sf_value = $field->subfield('a');
if ($sf_value) {
my $vr_format = get_vr_format($sf_value);
if ($vr_format) {
fix_007($fields_007[0], $vr_format);
$fixed = 1;
last;
}
}
}
unless ($fixed) {
for my $field (@fields_347) {
my $sf_value = $field->subfield('b');
if ($sf_value) {
my $vr_format = get_vr_format($sf_value);
if ($vr_format) {
fix_007($fields_007[0], $vr_format);
$fixed = 1;
last;
}
}
}
}
unless ($fixed) {
for my $field (@fields_300) {
my $sf_value = $field->subfield('a');
if ($sf_value) {
my $vr_format = get_vr_format($sf_value);
if ($vr_format) {
fix_007($fields_007[0], $vr_format);
$fixed = 1;
last;
}
}
# Also check subfield e.
$sf_value = $field->subfield('e');
if ($sf_value) {
my $vr_format = get_vr_format($sf_value);
if ($vr_format) {
fix_007($fields_007[0], $vr_format);
$fixed = 1;
last;
}
}
}
}
# If we have fixed the record, update its marc in the
# database. If not, add it to the bucket so catalogers can
# look at it.
if ($fixed) {
update_record($data->{id}, $record);
} else {
add_record_to_bucket($data->{id});
}
}
}
# Add a record to a bucket with an optional note.
sub add_record_to_bucket {
my ($record, $note) = @_;
state $pos = 0;
state $bucket_item_sth = $dbh->prepare(<<CBREBI
insert into container.biblio_record_entry_bucket_item
(bucket, target_biblio_record_entry, pos)
VALUES
(?,?,?)
returning id
CBREBI
);
state $bucket_item_note_sth = $dbh->prepare(<<CBREBIN
insert into container.biblio_record_entry_bucket_item_note
(item, note)
VALUES
(?,?)
CBREBIN
);
$bucket_item_sth->bind_param(1, BUCKETID);
$bucket_item_sth->bind_param(2, $record);
$bucket_item_sth->bind_param(3, $pos++);
if ($bucket_item_sth->execute()) {
my $bucket_item = $bucket_item_sth->fetchrow_arrayref();
if ($note) {
$bucket_item_note_sth->bind_param(1, $bucket_item->[0]);
$bucket_item_note_sth->bind_param(2, $note);
$bucket_item_note_sth->execute();
}
}
}
# Check if a field value has information about the actual video
# format. If it does, return the appropriate video recording format
# code, otherwise return undef.
sub get_vr_format {
my $subfield_value = shift;
my $vr_format;
if ($subfield_value =~ /\bblu-ray\b/i) {
$vr_format = 's';
} elsif ($subfield_value =~ /\bdvd\b/i) {
$vr_format = 'v';
}
return $vr_format;
}
# Replace the vr_format code in the 007.
sub fix_007 {
my ($field, $vr_format) = @_;
my $data = $field->data();
substr($data, 4, 1) = $vr_format;
$field->update($data);
}
# Update the marc for the record in the database.
sub update_record {
my ($id, $record) = @_;
my $marc = OpenILS::Utils::Normalize::clean_marc($record);
state $bre_sth = $dbh->prepare(<<BRE
update biblio.record_entry
set marc = ?
where id = ?
BRE
);
$bre_sth->bind_param(1, $marc);
$bre_sth->bind_param(2, $id);
return $bre_sth->execute();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment