Skip to content

Instantly share code, notes, and snippets.

@ericleasemorgan
Created February 13, 2014 21:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ericleasemorgan/8984187 to your computer and use it in GitHub Desktop.
Save ericleasemorgan/8984187 to your computer and use it in GitHub Desktop.
given a (CrossRef) DOI, parse link header of HTTP request to get fulltext URLs
sub extracter {
# given a (CrossRef) DOI, parse link header of HTTP request to get fulltext URLs
# see also: https://prospect.crossref.org/splash/
# Eric Lease Morgan <emorgan@nd.edu>
# February 12, 2014 - first cut
# require
use HTTP::Request;
use LWP::UserAgent;
# get input
my $doi = shift;
# initialize
my %links = ();
my $pdf = '';
my $xml = '';
my $generic = '';
# build an HTTP request and send it; redirection comes for free
my $request = HTTP::Request->new( GET => $doi );
$request->header( Accept => "text/turtle" );
my $user_agent = LWP::UserAgent->new;
my $response = $user_agent->request( $request );
# process each link in the links header
foreach my $link ( split /, /, $response->header( 'link' ) ) {
# (re-)initialize
my $pointer = 0;
my %attributes = ();
# process each part of a link
foreach my $part ( split /;/, $link ) {
# remove extra spaces
$part =~ s/^ +//;
$part =~ s/ +$//;
# increment
$pointer++;
# skip the first part, the data URL
next if ( $pointer == 1 );
# process subsequent parts
my ( $name, $value ) = split /=/, $part;
$value =~ s/^\W+//;
$value =~ s/\W+$//;
$attributes{ $name } = $value;
}
# associate mime types with urls
if ( $attributes{ 'type' } eq 'application/pdf' ) { $pdf = $attributes{ 'anchor' } }
elsif ( $attributes{ 'type' } eq 'application/xml' ) { $xml = $attributes{ 'anchor' } }
elsif ( ! $attributes{ 'type' } ) { $generic = $attributes{ 'anchor' } }
}
# stuff the result into a data structure
$links{ $doi } = { 'pdf' => $pdf, 'xml' => $xml, 'generic' => $generic };
# done
return \%links;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment