joewiz/list-abbreviations.xq

## list-abbreviations.xq
xquery version "3.1";

(: Force oXygen to indent the output :)
declare namespace output="http://www.w3.org/2010/xslt-xquery-serialization";

declare option output:method "xml";
declare option output:indent "yes";

(: Assumed context: you're running this in oXygen's XPath/XQuery Builder pane
 : with an XML document open in the main editor :)
let $text := //text() => string-join(" ") => normalize-space()
(: Look for acronyms: Two or more upper case letters, which may be delimted
 : by periods or slashes; allow plural and possessive forms through, e.g.,
 : COMs, USIA’s :)
let $analysis := analyze-string($text, "([A-Z][\./]?){2,}(’?s)?")
let $hits := $analysis//fn:match
return
    <abbreviations>{
        for $hit in $hits
        (: For grouping purposes, ignore trailing period if it's not a
         : period-delimited acronym, e.g., treat "USSR" and "USSR." as
         : identical but treat "U.S.S.R." and "U.S.S.R" as different.
         : Similarly, treat "USIA", "USIA’s" as the same :)
        group by $str :=
            (
                if (matches($hit, '^(\w\.){2,}')) then
                    $hit
                else
                    replace($hit, '\.$', '')
            ) !
            replace(., "’?s$", "")
        let $count := count($hit)
        order by
            if (contains($str, '.')) then
                concat(replace($str, '\.', ''), '.')
            else
                $str
        return
            <abbr count="{$count}">{$str[1]}</abbr>
    }</abbreviations>
	xquery version "3.1";

	(: Force oXygen to indent the output :)
	declare namespace output="http://www.w3.org/2010/xslt-xquery-serialization";

	declare option output:method "xml";
	declare option output:indent "yes";

	(: Assumed context: you're running this in oXygen's XPath/XQuery Builder pane
	: with an XML document open in the main editor :)
	let $text := //text() => string-join(" ") => normalize-space()
	(: Look for acronyms: Two or more upper case letters, which may be delimted
	: by periods or slashes; allow plural and possessive forms through, e.g.,
	: COMs, USIA’s :)
	let $analysis := analyze-string($text, "([A-Z][\./]?){2,}(’?s)?")
	let $hits := $analysis//fn:match
	return
	<abbreviations>{
	for $hit in $hits
	(: For grouping purposes, ignore trailing period if it's not a
	: period-delimited acronym, e.g., treat "USSR" and "USSR." as
	: identical but treat "U.S.S.R." and "U.S.S.R" as different.
	: Similarly, treat "USIA", "USIA’s" as the same :)
	group by $str :=
	(
	if (matches($hit, '^(\w\.){2,}')) then
	$hit
	else
	replace($hit, '\.$', '')
	) !
	replace(., "’?s$", "")
	let $count := count($hit)
	order by
	if (contains($str, '.')) then
	concat(replace($str, '\.', ''), '.')
	else
	$str
	return
	<abbr count="{$count}">{$str[1]}</abbr>
	}</abbreviations>