View get-groups-result.xml
<group> | |
<line level="0">The President left at 8:48 am</line> | |
<group> | |
<group> | |
<line level="1">-Administration recommendations on Capitol Hill</line> | |
</group> | |
<group> | |
<line level="1">-Improvements</line> | |
</group> | |
<group> |
View collection.xconf.xml
<collection xmlns="http://exist-db.org/collection-config/1.0"> | |
<index> | |
<!-- Old full text index configuration. Deprecated. --> | |
<fulltext default="none" attributes="false"/> | |
<!-- New full text index based on Lucene --> | |
<lucene> | |
<text qname="SPEECH"> | |
<ignore qname="SPEAKER"/> | |
</text> |
View tokenize-sentences.xq
xquery version "1.0"; | |
(: A naive approach to sentence tokenization inspired by http://stackoverflow.com/a/2103653/659732 | |
: | |
: Works well with edited text like newspapers. Parameters like punctuation can/should be edited; | |
: see the section below called "criteria". | |
: | |
: For a more sophisticated approach, see Tibor Kiss and Jan Strunk, "Unsupervised Multilingual | |
: Sentence Boundary Detection", Computational Linguistics, Volume 32, Issue 4, December 2006, | |
: pp. 485-525. Also, see these discussions of sentence tokenization: |
View trim-phrase-to-length.xq
xquery version "3.0"; | |
declare function local:trim-phrase-to-length($phrase, $length) { | |
(: if the phrase is already short enough, we're done :) | |
if (string-length($phrase) le $length) then | |
$phrase | |
(: the phrase is too long, so... :) | |
else | |
(: we will split the phrase into words and look for the longest possible arrangement within our length limit, | |
that doesn't end with boring words :) |
View oauth.xq
xquery version "3.0"; | |
module namespace oauth="http://history.state.gov/ns/xquery/oauth"; | |
(:~ A library module for signing and submitting OAuth requests such as the kind needed for the Twitter v1.1 API. | |
The EXPath Crypto library supplies the HMAC-SHA1 algorithm. The EXPath HTTP Client library makes the HTTP requests. | |
The OAuth standard requires a "nonce" parameter - a random string. Since there is no implementation-independent | |
nonce function in XQuery, we must rely on implementation-specific functions. For eXist-db we use util:uuid(). |
View highlight-matches.xq
xquery version "3.0"; | |
declare namespace fn="http://www.w3.org/2005/xpath-functions"; | |
(: Search within $nodes for matches to a regular expression $pattern and apply a $highlight function :) | |
declare function local:highlight-matches($nodes as node()*, $pattern as xs:string, $highlight as function(xs:string) as item()* ) { | |
for $node in $nodes | |
return | |
typeswitch ( $node ) | |
case element() return |
View http-download.xq
xquery version "3.1"; | |
import module namespace hc="http://expath.org/ns/http-client"; | |
import module namespace util="http://exist-db.org/xquery/util"; | |
import module namespace xmldb="http://exist-db.org/xquery/xmldb"; | |
(: downloads a file from a remote HTTP server at $file-url and save it to an eXist-db $collection. | |
: we try hard to recognize XML files and save them with the correct mimetype so that eXist-db can | |
: efficiently index and query the files; if it doesn't appear to be XML, though, we just trust | |
: the response headers :) |
View html5-serialization-prolog.xq
xquery version "3.0"; | |
declare namespace output="http://www.w3.org/2010/xslt-xquery-serialization"; | |
declare option output:method "html5"; | |
declare option output:media-type "text/html"; |
View principal-officers-since-carter.xq
xquery version "3.0"; | |
(: Display a list of Principal Officers who are still alive who began serving on/after, or were serving as of, January 20, 1977. :) | |
let $all-people := collection('/db/cms/apps/principals-chiefs/data/')/person | |
let $principals := $all-people[.//role/@class='principal'] | |
let $cutoff-date := '1977-01-20' | |
let $since-cutoff := $principals//role[@class='principal'][event[@type=('appointed', 'appointterminated')]/@when ge $cutoff-date]/ancestor::person | |
let $still-living := $since-cutoff[death/@type = 'unknown' or death = '' or empty(death/node())] | |
return |
View fix-name-capitalization.xq
xquery version "3.0"; | |
declare namespace fn="http://www.w3.org/2005/xpath-functions"; | |
(: Fix problems with mis-capitalized names. For example: | |
Before: MACARTHUR, Douglas II | |
After: MacArthur, Douglas II | |
:) | |
declare function local:fix-name-capitalization($name as xs:string) { | |
(: |
OlderNewer