Skip to content

Instantly share code, notes, and snippets.

@phochste
Created April 14, 2016 08:00
Show Gist options
  • Save phochste/feb5b2933a556b860e2c1562fcef118d to your computer and use it in GitHub Desktop.
Save phochste/feb5b2933a556b860e2c1562fcef118d to your computer and use it in GitHub Desktop.
#
# Usage:
# marc_marc('852a',holding.$append)
# holding(-logfile => '/mnt/scratch/lludss/holding_error.txt')
#
package Catmandu::Fix::holding;
use strict;
use Catmandu::Util qw(:io is_string);
use POSIX qw(strftime);
use Data::Dumper;
use Parse::RecDescent;
use Moo;
has logfile => ( is => 'ro' );
has parser => ( is => 'rw' , lazy => 1 , builder => 1);
around BUILDARGS => sub {
my ($orig, $class, %opts) = @_;
my $logfile = $opts{"-logfile"};
$orig->($class, logfile => $logfile);
};
sub _build_parser {
my ($self) = @_;
my (@grammar) = <DATA>;
my $parser = Parse::RecDescent->new(join("",@grammar));
$parser;
}
sub fix {
my ($self, $data) = @_;
return $data unless $data->{holding};
$data = $self->default_holding($data);
$data;
}
sub default_holding {
my ($self,$data) = @_;
my $identifier = $data->{_id} // '';
my $curryear = [ localtime time]->[5] + 1900;
my $holding = join(";", @{$data->{holding}});
$holding =~ s{laatste\s*\d*\s*\S+}{$curryear}g;
my $is_lopend = 0;
for (@{$data->{holding}}) {
$is_lopend = 1 unless ($_ =~ /^\s*#/);
}
my $res = $self->parser->startrule($holding);
# Collect all the parsed year holdings in an array of 'consecutive' years
my %YEARS = ();
foreach my $range (@$res) {
next if (ref $range ne 'ARRAY' || @$range == 0);
my $start = $range->[0];
my $end = $range->[1];
$end = $start unless defined $end;
$end = $curryear if $end eq 'NOW';
for ($start..$end) { $YEARS{$_} = 1}
}
my @years = sort { $a <=> $b } keys %YEARS;
# Translate the array of 'consecutive' years into an array of year ranges
my @ranges;
my $start = 0;
my $prev = 0;
foreach my $year (@years) {
$start = $year unless $start;
if ($prev && $year - $prev > 1) {
push(@ranges, $start eq $prev ? "$start" : "$start-$prev");
$start = $year;
}
$prev = $year;
}
push(@ranges, $start eq $prev ? "$start" : "$start-$prev") if $start;
my $years = join(" ", sort { $b <=> $a } @years);
my $range = join("; ", @ranges);
$self->logme("$identifier : failed to interpret '$holding'") unless is_string($range);
$data->{holding} = $years;
if (@years == 1 && !$is_lopend) {
$data->{holding_txt} = length $range ? "Print available for $range" : '';
}
else {
$data->{holding_txt} = length $range ? "Print available from $range" : '';
}
if ($is_lopend) {
$data->{holding_txt} .= " (current)";
}
$data;
}
sub logme {
my ($self,$msg) = @_;
return undef unless defined $self->logfile;
my $fh = io($self->logfile, mode=>'a');
my $date = localtime;
$fh->print("$date [$$] : $msg\n");
$fh->close();
}
1;
__DATA__
startrule: item(s /;/)
{ $return = $item[1]; }
item: holding '-' holding junk(?)
{ $return = [ $item[1], $item[3] ]; }
|
holding '-' junk
{ $return = [ $item[1] ]; }
|
holding '-'
{ $return = [ $item[1] , 'NOW' ]; }
|
holding
{ $return = [ $item[1] ]; }
|
<resync:[^;]*>
junk: /[^;]+/
holding: stop(?) remark(?) volume(?) '(' publication_year except_or_range_year(?) ')' issue(?)
{ $return = $item{publication_year} }
|
stop(?) remark(?) publication_year except_or_range_year(?) issue(?)
{ $return = $item{publication_year} }
volume: /[^#(;]+/
issue: /[\w\.]+([-\/,:][\w\.]+)*/
stop: /#/
except_or_range_year: /[-\/]\s*\d+/
publication_year: /(16|17|18|19|20)\d{2}/
remark: /[^#:]+/ ':'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment