Created
February 17, 2011 16:32
-
-
Save nichtich/832052 to your computer and use it in GitHub Desktop.
Make use of VIAF authority records
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
=head1 NAME | |
viaflookup.pl - Make use of VIAF authority records | |
=head1 VERSION | |
Version 0.1 - 2011-02-17 | |
=cut | |
use strict; | |
use LWP::Simple; | |
use Data::Dumper; | |
use CGI qw(escape param header); | |
use JSON; | |
=head1 DESCRIPTION | |
The Virtual International Authority File (VIAF) combines authority files | |
from more than a dozen libraries and countries. It is provided by OCLC at | |
L<http://www.viaf.org>. | |
See L<http://www.oclc.org/developer/services/viaf> for a description of the | |
VIAF API. You can directly access VIAF records by their identifer or by any | |
identifier of the combined authority files. | |
This document and script describes and implements use of VIAF. | |
=head2 Authority files | |
Authority files in VIAF are called "authority schemes". They are | |
identified by an uppercase scheme identifier, for instance C<DNB>. | |
Scheme URIs are of form <http://viaf.org/authorityScheme/XXX> where | |
C<XXX> is the scheme identifier. To make things complicated, the | |
identifer is sometimes used uppercase and sometimes lowercase. | |
A list of institutions that provide authority files in VIAF can be | |
found at the VIAF homepage L<http://viaf.org/>. Institutions are | |
also identified by uppercase identifiers that in most cases map | |
to the identifiers of their authority files (but not always). | |
For each institution with identifier C<XXX> (no for each authority | |
files!) there is an icon located at | |
L<http://viaf.org/viaf/images/flags/SELIBR.png> | |
=head2 Authority records | |
Authority records are identified by their scheme and a local identifier | |
within this scheme. VIAF used identifiers composed of scheme identifier | |
and local identifier, combined with a vertical bar. For instance | |
C<LC|n 50034593> identifies a library of congress name authority record. | |
You can create some URLs from this identifier: | |
=over | |
=item L<http://viaf.org/processed/LC%7Cn%2050034593> | |
The source record that VIAF used for mapping in MARCXML format. | |
=item L<http://viaf.org/viaf/sourceID/LC%7Cn%2050034593> | |
A HTTP 302 redirect to the mapped VIAF record. | |
=back | |
These URLs are not suitable as permanent linked data URIs for several | |
reasons (bugs in the encoding of characters that don't belong in URIs, | |
no content negotiation for RDF serialization etc.). A local identifier | |
does not make a proper URI. | |
Most Authority files should define their own, clean, strict, and | |
resolveable URIs for authority records. If there is such an URI, | |
you should be able to construct it from the local identifier. | |
Depending on the identifier structure, the institution may need to | |
define some normalization, for instance as described here for LCCN: | |
L<http://www.loc.gov/marc/lccn-namespace.html#normalization> | |
=cut | |
# this hash contains both, schemes and institutions | |
# if there is a 'records' entry, it is a scheme | |
my $schemes = { | |
BAV => { | |
name => 'Vatican Library', | |
}, | |
BNE => { | |
name => 'Biblioteca Nacional de España', | |
}, | |
BNF => { | |
name => 'Bibliothèque nationale de France', | |
records => 'http://catalogue.bnf.fr/ark:/12148/cb$1' #t | |
}, | |
DNB => { | |
name => 'Gemeinsame Normdatei', | |
records => 'http://d-nb.info/gnd/$1' | |
}, | |
EGAXA => { | |
name => 'Bibliotheca Alexandrina', | |
}, | |
ICCU => { | |
name => 'Italian National Catalog', | |
}, | |
JPG => { | |
name => 'Getty ULAN', | |
}, | |
JPGRI => { | |
name => 'Getty Research Institute', | |
}, | |
LAC => { | |
name => 'Library and Archives Canada', | |
}, | |
LC => { | |
name => 'Library of Congress Authorities', | |
short => 'LOC', | |
# see http://www.loc.gov/marc/lccn-namespace.html#normalization | |
filter => sub { | |
s/ |\/.*//g; # remove all blanks and characters after forward slash | |
if ( $_ =~ /^([^-]+)-(.*)$/ and length($2) < 6 ) { | |
return $1 . ('0'x(6 - length($2))) . $2; | |
} else { | |
return $_; | |
} | |
}, | |
pattern => qr/^([a-z]*\d+)$/, | |
records => 'info:lccn/$1' | |
}, | |
NKC => { | |
name => 'National Library of the Czech Republic', | |
}, | |
NLA => { | |
name => 'National Library of Australia', | |
}, | |
NLIlat => { | |
name => 'National Library of Israel', | |
}, | |
NLIara => { | |
name => 'National Library of Israel', | |
}, | |
NLIcyr => { | |
name => 'National Library of Israel', | |
}, | |
NLIheb => { | |
name => 'National Library of Israel', | |
}, | |
NLIlat => { | |
name => 'National Library of Israel', | |
}, | |
NSZL => { | |
name => 'National Széchényi Library (Hungary)' | |
}, | |
NUKAT => { | |
name => 'NUKAT, Poland' | |
}, | |
PTBNP => { | |
name => 'Biblioteca Nacional de Portugal', | |
}, | |
RERO => { | |
name => 'RERO (Switzerland)' | |
}, | |
SELIBR => { | |
name => 'National Library of Sweden', | |
records => 'http://libris.kb.se/auth/$1' | |
}, | |
SWNL => { | |
name => 'Swiss National Library', | |
}, | |
VIAF => { | |
name => 'Virtual International Authority File', | |
uri => 'http://viaf.org/viaf/$1/', | |
}, | |
}; | |
=head2 VIAF records | |
VIAF records are a special kind of authority records, that contain | |
mappings to other authority records. You can get VIAF records in | |
different formats (VIAF-XML, MARCXML, UnimarcXML, RDF, JSON). See | |
L<http://www.oclc.org/developer/services/viaf> for API documentation. | |
=hea2 Making use of VIAF | |
VIAF provides a large amount of information. Some typical queries are: | |
=over | |
=item Find authority records for a person | |
Given a name you want to know whether and which authority records | |
exist, so you can create links to an authority. Linking to authorities | |
is best practice in cataloging, so this is an important query. | |
In VIAF you can either search by name per SRU or per a simple REST | |
API. To only find authority records you better use the latter. Here | |
is an example query: | |
L<http://viaf.org/viaf/AutoSuggest?query=Emma%20Goldman> | |
The result is a JSON document that echoes the normalized C<query> and | |
gives a (possibly empty) ordered list of VIAF records as C<result>. Each | |
VIAF record contains the full name of a person as C<term> and local | |
authority record identifiers. The scheme is used in lowercase. | |
=cut | |
my $name = param('name'); | |
my $id = param('id'); | |
my $format = param('format'); # TODO: seealso, rdf, etc. | |
# search for name | |
my $url = 'http://viaf.org/viaf/AutoSuggest?query=' . escape($name); | |
# print "URL:$url\n"; | |
my $json = decode_json(get($url)); | |
if ( $json && $json->{result} ) { | |
#print Dumper($json); | |
foreach (@{$json->{result}}) { | |
handle_record ($_); | |
} | |
} | |
sub handle_record { | |
my $r = shift; | |
my @keys; | |
foreach my $prefix (keys %$r) { | |
next if $prefix eq 'term'; | |
my $local = $r->{$prefix}; | |
$prefix = uc($prefix); | |
print "$prefix|$local"; | |
if ( $schemes->{$prefix} && $schemes->{$prefix}->{records} ) { | |
my $uri = $schemes->{$prefix}->{records}; | |
my $pattern = $schemes->{$prefix}->{pattern} || qr/^(\d+)$/; | |
if ($local =~ $pattern) { | |
my ($a,$b) = ($1,$2); # TODO: $3, $4, ... | |
$uri =~ s/\$1/$a/; | |
$uri =~ s/\$2/$b/; | |
print " = $uri"; | |
} | |
} | |
print "\n"; | |
} | |
print "\n"; | |
} | |
# TODO: extract link to WorldCat Identities, Wikipedia, and DBPedia | |
# | |
# Example: | |
# http://viaf.org/viaf/39377930/ | |
# http://www.worldcat.org/wcidentities/lccn-n50-34593 | |
# http://wikipedia.org/wiki/Emma_Goldman | |
# http://dbpedia.org/resource/Emma_Goldman |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The VIAF developer documentation has been moved, it seems - now available at
http://www.oclc.org/developer/develop/web-services/virtual-international-authority-file-viaf.en.html
The OCLC has also published a nice developer handbook, available:
http://www.oclc.org/developer/develop/web-services.en.html
The developer handbook does not denote VIAF, but maybe it could be useful towards using the OCLC web APIs.
VIAF in the OCLC API Explorer:
https://platform.worldcat.org/api-explorer/VIAF
It seems that the "Jane Ausitin" resource ID from the example in the API explorer has been updated,
Regarding the SRU syntax used in the API SRUSearch function:
http://www.loc.gov/standards/sru/
Raw VIAF data, in RDF, MARC-21, and plain text formats:
http://viaf.org/viaf/data/