Created
October 28, 2013 19:10
-
-
Save joewiz/7202715 to your computer and use it in GitHub Desktop.
Create a tab-separated TSV file - like a comma-separated CSV file - out of a collection of TEI documents, with XQuery
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
xquery version "3.0"; | |
declare namespace tei="http://www.tei-c.org/ns/1.0"; | |
let $site-base-url := 'http://history.state.gov/milestones' | |
let $milestones-files := collection('/db/cms/apps/tei-content/data/milestones')/tei:TEI | |
let $tab-delimited-cells := | |
for $file in $milestones-files | |
let $filename := substring-before(util:document-name($file), '.xml') | |
return | |
for $article in $file//tei:div[@xml:id and not(@ana)] | |
let $title := normalize-space($article/tei:head) | |
let $url := concat($site-base-url, '/', $filename, '/', $article/@xml:id) | |
return | |
string-join(($title, $url), '	') | |
let $newline-delimited-rows := string-join($tab-delimited-cells, ' ') | |
return | |
$newline-delimited-rows |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment