Skip to content

Instantly share code, notes, and snippets.

@mathias-goebel
Created September 3, 2018 16:16
Show Gist options
  • Save mathias-goebel/828c504c564acd98c76894c4ac4d3adc to your computer and use it in GitHub Desktop.
Save mathias-goebel/828c504c564acd98c76894c4ac4d3adc to your computer and use it in GitHub Desktop.
wikidata to dracor
xquery version "3.1";
import module namespace functx="http://www.functx.com";
declare namespace tei="http://www.tei-c.org/ns/1.0";
let $path := "......../dlina/GerDraCor and Wikidata.csv"
let $rows :=
(file:read-binary($path)
=> util:binary-to-string()
=> tokenize("\n"))[position() gt 1]
let $collection-uri := "/db/gerdracorQ"
let $read-in :=
let $directory := "......../dlina/dracor-ger/tei/"
return
(
xmldb:remove($collection-uri),
xmldb:create-collection("/db", "gerdracorQ"),
xmldb:store-files-from-pattern($collection-uri, $directory, "*.xml")
)
for $row in $rows
(: parse CSV :)
let $tokenzzzzz := tokenize($row, ",")
let $filename := $tokenzzzzz[1] || ".xml"
let $wikidataUrl := $tokenzzzzz[2]
let $Q := substring-after($wikidataUrl, "/wiki/")
let $idno := <tei:idno type="wikidata" xml:base="https://www.wikidata.org/wiki/">{$Q}</tei:idno>
=> functx:change-element-ns-deep("http://www.tei-c.org/ns/1.0", "")
let $tei := doc($collection-uri || "/" || $filename)/*
let $do := update insert $idno into $tei//tei:publicationStmt
return
$tei
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment