Skip to content

Instantly share code, notes, and snippets.

@joewiz
Created October 18, 2017 21:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save joewiz/67b9264d9442c45f00e1f1b108b06615 to your computer and use it in GitHub Desktop.
Save joewiz/67b9264d9442c45f00e1f1b108b06615 to your computer and use it in GitHub Desktop.
Simple date processing, with XQuery
xquery version "3.1";
module namespace dp = "http://history.state.gov/ns/xquery/date-processing";
declare variable $dp:month-regex := "(January|February|March|April|May|June|July|August|September|October|November|December)";
declare variable $dp:day-regex := "(\d{1,2})(?:st|d|nd|rd|th)?";
declare variable $dp:regexes :=
map {
"month": "(January|February|March|April|May|June|July|August|September|October|November|December)",
"day": "(\d{1,2})(?:st|d|nd|rd|th)?"
}
;
declare function dp:get-dates($source) {
let $day-regex := "(\d{1,2})(?:st|d|nd|rd|th)?"
let $day-range-regex := $day-regex || "[-–—]" || $day-regex
let $year-regex := "(\d{4})"
let $day-month-year-regex := $day-regex || '\s+' || $dp:month-regex || ',?\s+' || $year-regex
let $month-day-year-regex := $dp:month-regex || '\s+' || $day-regex || ',?\s+' || $year-regex
let $month-day-range-year-regex :=
$dp:month-regex || '\s+' || $day-range-regex || ',?\s+' || $year-regex
return
(
analyze-string($source, $day-month-year-regex),
analyze-string($source, $month-day-year-regex),
analyze-string($source, $month-day-range-year-regex)
)
};
xquery version "3.1";
import module namespace dp = "http://history.state.gov/ns/xquery/date-processing" at "date-processing.xqm";
let $source :=
<head>The first date is <hi>February 2d, 1865</hi>. The next date is March 1, 2010. The <strong>third</strong> date is <hi rend="italic">31st July</hi> 2015.<note>This is a note with a date: September 1-4, 1929.</note></head>/string()
return
dp:get-dates($source)
(:
<date-hit>
<type>single</type>
<string>February 2d, 1865</string>
<date>1865-02-02</date>
</date-hit>
<date-hit>
<type>range</type>
<string>September 1-4, 1929</string>
<date-start>1929-09-01</date-end>
<date-start>1929-09-04</date-end>
</date-hit>
:)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment