Skip to content

Instantly share code, notes, and snippets.

@WaxCylinderRevival
WaxCylinderRevival / frus-document-dates.xq
Last active October 6, 2016 00:07 — forked from joewiz/frus-document-dates.xq
Look through FRUS documents for dates
xquery version "3.1";
declare namespace tei="http://www.tei-c.org/ns/1.0";
import module namespace dates="http://xqdev.com/dateparser" at "xmldb:exist:///db/apps/twitter/modules/date-parser.xqm";
declare function local:get-date-candidate-field($div) {
let $head := normalize-space(string-join($div/tei:head[1]/node()[not(./self::tei:note)]))
let $source-note := normalize-space(string-join($div//tei:note[@type='source']))
let $body := normalize-space(string-join($div/tei:head/following-sibling::node()))
@WaxCylinderRevival
WaxCylinderRevival / date-to-epoch-time.xqm
Last active October 11, 2016 04:38
Function to convert certain ISO 8601-compliant formats [date(yyyy-mm-dd) or dateTime] to epoch/Unix time
module namespace epoch = "https://gist.github.com/WaxCylinderRevival/ns/xquery/date-to-epoch-time";
declare function epoch:date-to-epoch-time
( $dateString as xs:string? ) as xs:decimal? {
if (empty($dateString))
then ()
else
if (matches($dateString, '^\d{4}-\d{2}-\d{2}$'))
then (xs:dateTime(xs:date($dateString)) - xs:dateTime("1970-01-01T00:00:00-00:00")) div xs:dayTimeDuration('PT1S')
else
@WaxCylinderRevival
WaxCylinderRevival / find-date-mismatches.xq
Created October 7, 2016 12:44 — forked from joewiz/find-date-mismatches.xq
Find FRUS documents whose English dates do not match the date metadata
xquery version "3.1";
(:
Find cases such as this:
<dateline>
<placeName>Washington</placeName>,
<date when="1971-10-05">October 15, 1971</date>.
</dateline>
@WaxCylinderRevival
WaxCylinderRevival / current-date-yyyy-mm-dd
Created October 27, 2016 14:39
OxygenXML Code Template for current-date
${date(yyyy-MM-dd)}
@WaxCylinderRevival
WaxCylinderRevival / current-dateTime
Created October 27, 2016 14:40
OxygenXML Code Template for current-dateTime
${date(yyyy-MM-dd'T'HH:mm:ss.SSSXXX)}
@WaxCylinderRevival
WaxCylinderRevival / find-date-string.xqm
Last active October 18, 2017 18:54
Test of local function to find date string(s) with common FRUS date pattern and wrap with date element
import module namespace functx="http://www.functx.com" at "http://www.xqueryfunctions.com/xq/functx-1.0-nodoc-2007-01.xq";
(: declare namespace date-processing='https://history.state.gov/ns/xquery/dates' :)
declare function local:find-date-strings
( $textWithDates as xs:string? ) as element()* {
for $textString in
$textWithDates => data() => serialize() => normalize-space() => functx:get-matches('((\d{1,2}[(st)(d)(nd)(rd)(th)]*\s+(January|February|March|April|May|June|July|August|September|October|November|December),*\s+\d{4})|((January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2}[(st)(nd)(d)(rd)(th)]*,\s+\d{4}))') => serialize() => functx:trim() => tokenize('\s\s+')
@WaxCylinderRevival
WaxCylinderRevival / analyze-string-for-dates.xqm
Last active October 27, 2017 00:25
Analyze input for date patterns common to FRUS dates, using fn:analyze-string
(: declare namespace dp='https://history.state.gov/ns/xquery/date-processing' :)
declare function local:find-date-strings
( $textWithDates as element()? ) as element()* {
let $textString :=
$textWithDates/node()[not(self::note)] => string-join(' ') => normalize-space() => analyze-string('((\d{1,2}(d|nd|rd|st|th)*\s+(January|February|March|April|May|June|July|August|September|October|November|December),*\s+\d{4})|((January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2}(d|nd|rd|st|th)*,*\s+\d{4}))','i')
for $match in data($textString/fn:match)
@WaxCylinderRevival
WaxCylinderRevival / analyze-text-for-date-patterns.xq
Last active October 30, 2017 04:53
analyze-text-for-date-patterns.xq
(: declare namespace dp='https://history.state.gov/ns/xquery/date-processing' :)
declare variable $local:regexes :=
map {
"month-regex" : "(?:January|February|March|April|May|June|July|August|September|October|November|December)",
"month-regex-fr" : "(?:janvier|février|fevrier|mart|avril|mai|juin|juillet|août|aout|septembre|octobre|novembre|décembre|decembre)",
"month-regex-sp" : "(?:enero|febrero|marzo|abril|mayo|junio|julio|agosto|septiembre|setiembre|octubre|noviembre|diciembre)",
"day-regex" : "(?:\d{1,2})(?:st|d|nd|rd|th)?",
"day-range-regex" : "(?:\d{1,2})(?:st|d|nd|rd|th)?\s*[-–—]\s*(?:\d{1,2})(?:st|d|nd|rd|th)?",
"year-regex" : "(?:\d{4})",
(: declare namespace dp='https://history.state.gov/ns/xquery/date-processing' :)
declare variable $local:regexes :=
map {
"month-regex" : "(January|February|March|April|May|June|July|August|September|October|November|December)",
"month-regex-fr" : "(janvier|février|fevrier|mart|avril|mai|juin|juillet|août|aout|septembre|octobre|novembre|décembre|decembre)",
"month-regex-sp" : "(enero|febrero|marzo|abril|mayo|junio|julio|agosto|septiembre|setiembre|octubre|noviembre|diciembre)",
"day-regex" : "(\d{1,2})(?:st|d|nd|rd|th)?",
"day-range-regex" : "(\d{1,2})(?:st|d|nd|rd|th)?\s*[-–—]\s*(\d{1,2})(?:st|d|nd|rd|th)?",
"year-regex" : "(\d{4})",
(: declare namespace dp='https://history.state.gov/ns/xquery/date-processing' :)
declare variable $local:regexes :=
map {
"month-regex" : "(January|February|March|April|May|June|July|August|September|October|November|December)",
"month-regex-fr" : "(janvier|février|fevrier|mart|avril|mai|juin|juillet|août|aout|septembre|octobre|novembre|décembre|decembre)",
"month-regex-sp" : "(enero|febrero|marzo|abril|mayo|junio|julio|agosto|septiembre|setiembre|octubre|noviembre|diciembre)",
"day-regex" : "(\d{1,2})(?:st|d|nd|rd|th)?",
"day-range-regex" : "(\d{1,2})(?:st|d|nd|rd|th)?\s*[-–—]\s*(\d{1,2})(?:st|d|nd|rd|th)?",
"year-regex" : "(\d{4})",