Last active May 29, 2019
xquery version "1.0-ml";
declare namespace p = "";
declare option xdmp:mapping "false";
Returns z-score for the specified confidence value
For confidence values not in the lookup table,
solve the cubic equation I generated from a polynomial regression of Confidence Percentages to z-score with R^2 = 1 fit
@author Matthew Royal
declare function local:getZScore($confidence as xs:double) {
if ($confidence ge 0.5 and $confidence le 1) then
if ($confidence eq 0.5) then 0.674
else if ($confidence eq 0.8) then 1.282
else if ($confidence eq 0.9) then 1.645
else if ($confidence eq 0.95) then 1.96
else if ($confidence eq 0.98) then 2.326
else if ($confidence eq 0.99) then 2.576
math:fabs(local:solveCubicEquation(0.0481, 0.4007, 1.1354, 0.098 + $confidence))
else fn:error("ERROR: 0.5 <= $confidence <= 1.0, and input was " || fn:string-join($confidence))
Solve a cubic polynomial for x, given coefficients a, b, c, d
in standard form = ax^3 - bx^2 + cx - d = 0.
Uses homegrown cubic root function sqrt3() because Xquery's math library
cannot raise a negative number to a decimal power.
@author Matthew Royal
@param $a 1st coefficient
@param $b 2nd coefficient
@param $c 3rd coefficient
@param $d 4th coefficient
declare function local:solveCubicEquation($a as xs:double, $b as xs:double, $c as xs:double, $d as xs:double) {
let $big := ((-math:pow($b,3.0)) div (27.0 * math:pow($a,3.0))) + (($b * $c) div (6.0 * math:pow($a,2.0))) - ($d div (2.0 * $a))
let $small := ($c div (3.0 * $a)) - (math:pow($b,2.0) div (9.0 * math:pow($a,2.0)))
let $tail := ($b div (3.0 * $a))
let $x := math:pow($big + math:sqrt(math:pow($big,2.0) + math:pow($small,3.0)), (1.0 div 3.0)) + local:sqrt3( $big - math:sqrt(math:pow($big,2.0) + math:pow($small,3.0)) ) - $tail
return $x
Estimate the cube root of a number.
WARNINGS: 1) Does not work well with many digits due to xs:double size limitation.
2) Does not generate complex numbers.
@author Matthew Royal
declare function local:sqrt3($number as xs:double) {
let $answer := ()
let $negative := $number lt 0
let $number := if ($negative) then $number * -1 else $number
let $numLength := fn:string-length(fn:replace(fn:string($number), "[^0-9]", ""))
let $hasDecimal := fn:matches(xs:string($number), "\.")
let $beforeDecimal := fn:replace(xs:string($number), "^([0-9]+?)\.[0-9]*$", "$1")
let $afterDecimal := if ($hasDecimal) then fn:replace(xs:string($number), "^[0-9]+?\.([0-9]*)$", "$1") else ()
(: Group numbers by threes, starting before then after the decimal point :)
let $beforeGroups := fn:reverse(
for $i in (1 to xs:integer(math:ceil(fn:string-length($beforeDecimal) div 3)))
return xs:double(fn:replace($beforeDecimal, "^.*?([0-9]{1,3})[0-9]{"||($i - 1) * 3||"}$", "$1"))
let $beforeGroups :=
if (fn:count($beforeGroups) eq 1 and $beforeDecimal eq "0") then () else $beforeGroups
let $afterGroups :=
for $i in (1 to xs:integer(math:ceil(fn:string-length($afterDecimal) div 3)))
return xs:double(fn:replace($afterDecimal, "^[0-9]{"||($i - 1) * 3||"}([0-9]{1,3}).*?$", "$1"))
let $numPieces := ($beforeGroups, $afterGroups)
let $newLast :=
let $num := xs:string($numPieces[fn:last()])
if (fn:string-length($num) eq 1) then xs:double($num || "00")
else if (fn:string-length($num) eq 2) then xs:double($num || "0")
else ()
let $numPieces := if ($newLast) then ($numPieces[1 to (fn:last() - 1)], $newLast) else $numPieces
let $numPieces := (
let $precisionDeficit := ($numLength - fn:count($numPieces))
if ($precisionDeficit gt 0) then
for $i in (1 to $precisionDeficit)
return "000"
else ()
(: Find the largest cube of a single digit number less than the first group :)
let $largestCube := 1
let $_ :=
for $j in (2 to 9)
if (math:pow($j, 3) le $numPieces[1]) then
xdmp:set($largestCube, $j)
else ()
let $_ := xdmp:set($answer, ($answer, xs:string($largestCube)))
let $diff := $numPieces[1] - math:pow($largestCube, 3)
let $diffNext := ()
let $numAnswer := ()
let $combo := ()
(: Find the largest factor of a single digit number less than all the rest of the groups :)
let $_ :=
try {
for $i in (2 to fn:count($numPieces) )
let $numAnswer := xs:double(fn:string-join($answer)) * 10
let $_ := xdmp:set($diffNext, xs:double(xs:string($diff) || $numPieces[$i]))
let $largestFactor := 1
let $_ :=
for $j in (1 to 9)
let $_ := xdmp:set($combo, $j * ((3 * math:pow($numAnswer,2)) + ((3 * $numAnswer) * $j) + math:pow($j,2)))
if ($combo le $diffNext) then
xdmp:set($largestFactor, $j)
else ()
let $_ := xdmp:set($answer, ($answer, xs:string($largestFactor)))
return xdmp:set($diff, ($diffNext - ($largestFactor * ((3 * math:pow($numAnswer,2)) + ((3 * $numAnswer) * $largestFactor) + math:pow($largestFactor,2)))) )
} catch ($exception) {}
for $num at $i in $answer
return (
if ($i - 1 eq fn:count($beforeGroups)) then
if (fn:count($beforeGroups) eq 1 and $beforeDecimal eq "0") then "0."
else "."
else ()
)) * (if ($negative) then -1 else 1)
Calculate the sample size for a given population and confidence.
@author Matthew Royal
declare function local:getSampleSize($population as xs:integer, $confidence as xs:double, $confidenceInterval as xs:double) {
let $confidence := local:getZScore($confidence)
let $choicePickedPercent := 0.5 (: default for computing sample size:)
let $sampleSize :=
(math:pow($confidence, 2) * $choicePickedPercent * (1 - $choicePickedPercent))
div math:pow($confidenceInterval, 2)
let $sampleSizePopulation :=
$sampleSize div (1 + (($sampleSize - 1) div $population))
return $sampleSizePopulation
declare function local:computeSample($query as cts:query, $confidence as xs:double, $confidenceInterval as xs:double, $function as xdmp:function?, $numDocuments as xs:integer?) {
let $queryEstimate := xdmp:estimate(cts:search(/, $query))
let $sampleSize := local:getSampleSize($queryEstimate, $confidence, $confidenceInterval)
if (fn:exists($function)) then
let $documents := cts:search(/, $query, ("score-random"))[1 to xs:integer(math:ceil($sampleSize))]
let $bools := xdmp:apply($function, $documents)
let $results := fn:count(
for $bool in $bools where $bool return $bool
return (
"Query Estimate: " || $queryEstimate, "Sample size: " || $sampleSize
|| " " || $confidence * 100
||"% +/-" || $confidenceInterval * 100 || "% confidence"
"XQuery function returned TRUE in " || (($results div math:ceil($sampleSize)) * 100.0)
|| "% of the cases (" || $results || " of " || xs:integer(math:ceil($sampleSize)) || "), "
|| "scaling to an estimated ~" || math:ceil(($results div math:ceil($sampleSize)) * $queryEstimate)
|| " documents for the entire population " || $queryEstimate || "."
let $numDocuments := (if (exists($numDocuments) and $numDocuments gt 0) then $numDocuments else xs:int(math:ceil($sampleSize)))
let $affectedDocs := $documents[let $p := fn:position() return $bools[$p] eq fn:true()]/fn:base-uri()
return (
"Matching URIs (Up to "||$numDocuments||"):",
$affectedDocs[1 to $numDocuments]
else (
"Query Estimate: " || $queryEstimate, "Sample size: " || $sampleSize
|| " " ||$confidence * 100
||"% +/-" || $confidenceInterval * 100 || "% confidence"
let $query :=
cts:element-query( xs:QName("p:companyDirectory"),
cts:element-query(xs:QName("p:employeeName"), cts:and-query(()) ),
cts:element-value-query(xs:QName("p:deleted"), "false"),
cts:element-value-query(xs:QName("p:isCurrentVersion"), "true"),
cts:element-value-query(xs:QName("p:hireYear"), "2015")
let $function := function ($documents as item()*) as xs:boolean* {
for $doc in $documents
let $employeeNames := $doc//p:employeeName/fn:string()
return fn:exists(
for $employeeName in $employeeNames
where fn:string-length($employeeName) ge 60
return $employeeName
return local:computeSample($query, 0.95, 0.05, $function, 10)
Updated default population from "5" to the rounded up sample size.

