Skip to content

Instantly share code, notes, and snippets.

@EdWarga
Last active June 6, 2018 15:28
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save EdWarga/be7b47018707507c69f47b53b1c45d72 to your computer and use it in GitHub Desktop.
Save EdWarga/be7b47018707507c69f47b53b1c45d72 to your computer and use it in GitHub Desktop.
Code Examples to support proposal for the 2018 Open Repositories conference. See files below for example scripts created for repository management activities. These are simple ad hoc examples created by a non-developer library professional. They have been coded using BaseX software and may require that software to run.
(:code snippet originally created for: Anderson, C., Stringer-Hye, & Warga, E. (2017). graphs-without-ontologies: Data and Code for 2015 VIVO Conference Presentation. XQuery, Heard Library. Retrieved from https://github.com/HeardLibrary/graphs-without-ontologies
copied from: https://github.com/HeardLibrary/graphs-without-ontologies/blob/master/XQuery/get-OAI-data.xq
This script will harvest metadata from an OAI-PMH content provider. Update the base URL, setSpec, and metadata format to target the repository, collection, and particular metadata format desired.
I have been using this script to harvest the DSpace METS records for items in test collections to perform quality control checks. See the qualityCheckQueries.xqy file to see queries to check certain aspects of item metadata stored in the PREMIS section of the DSPace METS documents.
:)
xquery version "3.1";
declare namespace oai = "http://www.openarchives.org/OAI/2.0/";
(: Retrieves metadata records for an entire OAI-PMH collection :)
(: Adds records to BaseX database:)
declare function local:request($base-url as xs:string, $verb as xs:string, $set-spec as xs:string) as document-node()*
{
let $request := $base-url || $verb || $set-spec
let $response := fn:doc($request)
let $token := $response//oai:resumptionToken/text()
return
if (fn:empty($token)) then
$response
else
($response,
local:resume($base-url, $token))
};
declare function local:resume($base-url as xs:string, $token as xs:string) as document-node()*
{
let $verb := "?verb=ListRecords&resumptionToken="
let $request := $base-url || $verb || $token
let $response := fn:doc($request)
let $new-token := $response//oai:resumptionToken/text()
return
if (fn:empty($new-token)) then
$response
else
($response,
local:resume($base-url, $new-token))
};
let $base-url := "{INSERT_URL}"(:base url:)
let $verb := "?verb=ListRecords&metadataPrefix=mets"
let $set-spec := "&set={SET_ID}" (:Target Collection:)
let $response := local:request($base-url, $verb, $set-spec)
for $record in $response//oai:record
let $id := $record/oai:header/oai:identifier/text()
return
db:add("OAI", $record, "path.xml")
(:This script takes XML records for bibliographic citations created from a CSV output from the faculty information system and trandforms them into MODS records for use in Zotero and other applications.:)
xquery version "3.1";
declare namespace mods="http://www.loc.gov/mods/v3";
declare namespace local="http://library.tamucc.edu/local";
let $record := doc('JournalRaw')/csv/record
for $ID in fn:distinct-values($record/ID)
let $title :=
<mods:titleInfo>
<title>{$record[ID/text() = $ID][1]/TITLE/text()}</title>
</mods:titleInfo>
let $names :=
for $LNAME in $record[ID/text() = $ID][1]/*[contains(name(),"AUTH") and contains(name(),"LNAME")]
let $number := fn:substring($LNAME/name(), 17, 2)
return
<mods:name type="personal">
<mods:namePart type="family">
{$record[ID/text() = $ID][1]/*[contains(name(),"LNAME") and contains(name(), $number)]/text()}
</mods:namePart>
<mods:namePart type="given">
{$record[ID/text() = $ID][1]/*[contains(name(),"FNAME") and contains(name(), $number)]/text()}
</mods:namePart>
<mods:namePart type="middle">
{$record[ID/text() = $ID][1]/*[contains(name(),"MNAME") and contains(name(), $number)]/text()}
</mods:namePart>
<mods:role><mods:roleTerm authority="marcrelator">author</mods:roleTerm></mods:role>
</mods:name>
let $extension :=
<mods:extenstion>
{for $email in fn:distinct-values($record[ID/text() = $ID]/Email)
return
<local:tamuccAuthor>
<local:firstName>{fn:distinct-values($record[ID/text() = $ID and Email[contains(text(), $email)]]/First_Name/text())}</local:firstName>
<local:lastName>{fn:distinct-values($record[ID/text() = $ID and Email[contains(text(), $email)]]/Last_Name/text())}</local:lastName>
<local:college>{fn:distinct-values($record[ID/text() = $ID and Email[contains(text(), $email)]]/College/text())}</local:college>
<local:department>{fn:distinct-values($record[ID/text() = $ID and Email[contains(text(), $email)]]/Department/text())}</local:department>
</local:tamuccAuthor>
}
</mods:extenstion>
let $genre := <mods:genre>article</mods:genre>
let $pubDate :=
<mods:originInfo>
<mods:dateIssued>{$record[ID/text() = $ID][1]/DTY_PUB/text()}</mods:dateIssued>
</mods:originInfo>
let $journalInfo :=
<mods:relatedItem type="host">
<mods:titleInfo>
<mods:title>{$record[ID/text() = $ID][1]/JOURNAL_NAME/text()}</mods:title>
</mods:titleInfo>
{$pubDate}
<mods:part>
<mods:detail type="volume">
<mods:number>{$record[ID/text() = $ID][1]/VOLUME/text()}</mods:number>
</mods:detail>
<mods:detail type="issue">
<mods:number>{$record[ID/text() = $ID][1]/ISSUE/text()}</mods:number>
</mods:detail>
<mods:extent unit="page">
<mods:start>{$record[ID/text() = $ID][1]/START_PAGE/text()}</mods:start>
<mods:end>{$record[ID/text() = $ID][1]/END_PAGE/text()}</mods:end>
</mods:extent>
</mods:part>
</mods:relatedItem>
let $URL :=
<mods:location>
<url>{$record[ID/text() = $ID][1]/WEB_ADDRESS/text()}</url>
</mods:location>
(:Build MODS Records:)
let $mods :=
<mods:mods>
<mods:identifier type="local">{$ID}</mods:identifier>
{$genre}
{$title}
{$pubDate}
{$journalInfo}
{$URL}
{$names}
{$extension}
</mods:mods>
return
$mods
xquery version "3.1";
(:code snippet to query METS records from OAI-PMH harvest
variables represent queries to identify items with no JPEG attached, items with JPEGS with 0bytes, and items with more than one JPEG attached.
These queries satisfied quality testing requirements for the specific collection being checked.:)
declare namespace oai = "http://www.openarchives.org/OAI/2.0/";
declare namespace mods = "http://www.loc.gov/mods/v3";
declare namespace premis="http://www.loc.gov/standards/premis";
let $noJpeg := db:open("OAI")//record[empty(//premis:objectIdentifierValue)]/header/identifier
(:let $noJpeg := db:open("OAI")//record[//mods:dateAvailable[contains(text(),"2017-11-16")] and empty(//premis:objectIdentifierValue)]/header/identifier:)
let $emptyFile := db:open("OAI")//record[//premis:size/text()="0"]/header/identifier
let $test := db:open("OAI")//premis:size/text()
let $multipleJPEGS := db:open("OAI")//record[count(//premis:objectIdentifierValue)>1]/header/identifier
return $emptyFile
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment