Created
February 16, 2017 19:05
-
-
Save tonyahowe/baf9ac9196917efa17a6fb5ddf2d7ae6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
xquery version "3.0"; | |
module namespace app="http://exist-db.org/apps/"; | |
import module namespace templates="http://exist-db.org/xquery/templates"; | |
import module namespace config="http://exist-db.org/apps/NiC/config" at "config.xqm"; | |
import module namespace tei2="http://exist-db.org/xquery/app/tei2html" at "tei2html.xql"; | |
import module namespace kwic="http://exist-db.org/xquery/kwic" at "resource:org/exist/xquery/lib/kwic.xql"; | |
(:~ | |
: This is a sample templating function. It will be called by the templating module if | |
: it encounters an HTML element with an attribute: data-template="app:test" or class="app:test" (deprecated). | |
: The function has to take 2 default parameters. Additional parameters are automatically mapped to | |
: any matching request or function parameter. | |
: | |
: @param $node the HTML node with the attribute which triggered this call | |
: @param $model a map containing arbitrary data - used to pass information between template calls | |
:) | |
declare namespace tei="http://www.tei-c.org/ns/1.0"; | |
declare namespace functx = "http://www.functx.com"; | |
declare function app:test($node as node(), $model as map(*)) { | |
<p>Template output generated by function app:test at {current-dateTime()}. The templating | |
function was triggered by the class attribute <code>class="app:test"</code>.</p> | |
}; | |
declare function functx:contains-any-of | |
( $arg as xs:string? , | |
$searchStrings as xs:string* ) as xs:boolean { | |
some $searchString in $searchStrings | |
satisfies contains($arg,$searchString) | |
} ; | |
(:modified by applying functx:escape-for-regex() :) | |
declare function functx:number-of-matches | |
( $arg as xs:string? , | |
$pattern as xs:string ) as xs:integer { | |
count(tokenize(functx:escape-for-regex(functx:escape-for-regex($arg)),functx:escape-for-regex($pattern))) - 1 | |
} ; | |
declare function functx:escape-for-regex | |
( $arg as xs:string? ) as xs:string { | |
replace($arg, | |
'(\.|\[|\]|\\|\||\-|\^|\$|\?|\*|\+|\{|\}|\(|\))','\\$1') | |
} ; | |
(: List Headnotes test :) | |
declare | |
%templates:wrap | |
function app:list-headnotes($node as node(), $model as map(*)) { | |
map { | |
"headnotes" := | |
for $headnote in collection($config:headnotes)/tei:TEI | |
order by app:work-title($headnote) | |
return | |
$headnote | |
} | |
}; | |
declare | |
%templates:wrap | |
function app:headnote($node as node(), $model as map(*), $id as xs:string?) { | |
let $headnote := collection($config:headnotes)//id($id) | |
return | |
map { "headnote" := $headnote } | |
}; | |
declare function app:headnote-title($node as node(), $model as map(*), $type as xs:string?) { | |
let $suffix := if ($type) then "." || $type else () | |
let $headnote := $model("headnote")/ancestor-or-self::tei:TEI | |
return | |
<a xmlns="http://www.w3.org/1999/xhtml" href="{$node/@href}{$headnote/@xml:id}{$suffix}">{ app:headnote-title($headnote) }</a> | |
}; | |
declare function app:headnote-title($headnote as element(tei:TEI)) { | |
$headnote/tei:teiHeader/tei:fileDesc/tei:titleStmt/tei:title[1]/text() | |
}; | |
(:~ | |
: List works | |
:) | |
declare | |
%templates:wrap | |
function app:list-works($node as node(), $model as map(*)) { | |
map { | |
"works" := | |
for $work in collection($config:data)/tei:TEI | |
order by app:work-title($work) | |
return | |
$work | |
} | |
}; | |
declare | |
%templates:wrap | |
function app:work($node as node(), $model as map(*), $id as xs:string?) { | |
let $work := collection($config:data)//id($id) | |
return | |
map { "work" := $work } | |
}; | |
declare function app:work-title($node as node(), $model as map(*), $type as xs:string?) { | |
let $suffix := if ($type) then "." || $type else () | |
let $work := $model("work")/ancestor-or-self::tei:TEI | |
return | |
<a xmlns="http://www.w3.org/1999/xhtml" href="{$node/@href}{$work/@xml:id}{$suffix}">{ app:work-title($work) }</a> | |
}; | |
declare function app:work-title($work as element(tei:TEI)) { | |
$work/tei:teiHeader/tei:fileDesc/tei:titleStmt/tei:title[1]/text() | |
}; | |
declare function app:work-author($node as node(), $model as map(*)) { | |
let $surnames := $model("work")//tei:surname | |
let $name-count := count($surnames) | |
return | |
if ($name-count le 2) then | |
string-join($surnames, ' and ') | |
else | |
concat( | |
string-join( | |
$surnames[position() = (1 to last() - 1)] | |
, | |
', '), | |
', and ', | |
$surnames[last()] | |
) | |
}; | |
declare function app:work-status($node as node(), $model as map(*)) { | |
let $status := $model("work")//tei:availability/@status/string() | |
return $status | |
}; | |
declare function app:work-date($node as node(), $model as map(*)) { | |
let $TPdate := $model("work")//tei:date/@title/string() | |
return $TPdate | |
}; | |
declare | |
%templates:wrap | |
function app:checkbox($node as node(), $model as map(*), $target-texts as xs:string*) { | |
attribute { "value" } { | |
$model("work")/@xml:id/string() | |
}, | |
if ($model("work")/@xml:id/string() = $target-texts) then | |
attribute checked { "checked" } | |
else | |
() | |
}; | |
declare | |
%templates:default("target-texts", "all") | |
function app:display-checked($node as node(), $model as map(*), $id as xs:string?, $target-texts as xs:string?) { | |
map { | |
"works" := | |
for $work in $target-texts | |
order by app:work-title($work) | |
return | |
tei2:tei2html($work) | |
} | |
}; | |
declare function app:work-type($node as node(), $model as map(*)) { | |
let $work := $model("work")/ancestor-or-self::tei:TEI | |
let $id := $work/@xml:id/string() | |
let $work-types := doc(concat($config:data-root, '/', 'work-types.xml'))//item[id = $id]/value | |
return | |
string-join( | |
for $work-type in $work-types | |
order by $work-type | |
return $work-type | |
, ', ') | |
}; | |
declare function app:epub-link($node as node(), $model as map(*)) { | |
let $id := $model("work")/@xml:id/string() | |
return | |
<a xmlns="http://www.w3.org/1999/xhtml" href="{$node/@href}{$id}.epub">{ $node/node() }</a> | |
}; | |
declare function app:pdf-link($node as node(), $model as map(*)) { | |
let $id := $model("work")/@xml:id/string() | |
return | |
<a xmlns="http://www.w3.org/1999/xhtml" href="{$node/@href}{$id}.pdf">{ $node/node() }</a> | |
}; | |
declare function app:xml-link($node as node(), $model as map(*)) { | |
let $doc-path := document-uri(root($model("work"))) | |
let $eXide-link := templates:link-to-app("http://exist-db.org/apps/eXide", "index.html?open=" || $doc-path) | |
let $rest-link := '/exist/rest' || $doc-path | |
return | |
if (xmldb:collection-available('/db/apps/eXide')) | |
then <a xmlns="http://www.w3.org/1999/xhtml" href="{$eXide-link}" target="_blank">{ $node/node() }</a> | |
else <a xmlns="http://www.w3.org/1999/xhtml" href="{$rest-link}" target="_blank">{ $node/node() }</a> | |
}; | |
declare function app:copy-params($node as node(), $model as map(*)) { | |
element { node-name($node) } { | |
$node/@* except $node/@href, | |
attribute href { | |
let $link := $node/@href | |
let $params := | |
string-join( | |
for $param in request:get-parameter-names() | |
for $value in request:get-parameter($param, ()) | |
return | |
$param || "=" || $value, | |
"&" | |
) | |
return | |
$link || "?" || $params | |
}, | |
$node/node() | |
} | |
}; | |
declare function app:work-types($node as node(), $model as map(*)) { | |
let $types := distinct-values(doc(concat($config:data-root, '/', 'work-types.xml'))//value) | |
let $control := | |
<select multiple="multiple" name="work-types" class="form-control"> | |
<option value="all">All Work Types</option> | |
{for $type in $types | |
return <option value="{$type}">{$type}</option> | |
} | |
</select> | |
return | |
templates:form-control($control, $model) | |
}; | |
declare function app:view($node as node(), $model as map(*), $id as xs:string, $query as xs:string?) { | |
for $text in $model("work")/id($id) | |
let $text := | |
if ($query) then | |
util:expand(($text[.//tei:head[ft:query(., $query)]], $text[.//tei:p[ft:query(., $query)]]), "add-exist-id=all") | |
else | |
$text | |
return | |
tei2:tei2html($text) | |
}; | |
declare function app:are-headnotes($node as node(), $model as map(*), $id as xs:string) { | |
<!-- here test for existence of headnotes in work-types.xml file --> | |
}; | |
declare function app:view-headnote($node as node(), $model as map(*), $id as xs:string) { | |
for $text in $model("work")/id($id) | |
return | |
tei2:tei2html($text) | |
}; | |
declare function app:pageImages($node as node(), $model as map(*)) { | |
for $pb in $model("work")//tei:pb | |
let $facsPage := $pb/@facs | |
return | |
<p><a href="../images/{$facsPage}"><img src="../images/{$facsPage}" width="100%"/></a></p> | |
}; | |
declare function app:sources($node as node(), $model as map(*)) { | |
for $n in $model("work")//tei:imprint | |
return | |
<p><li class="list-unstyled"><small> | |
{$n//tei:pubPlace}: {$n//tei:publisher}. {$n//tei:date}. | |
{ | |
if ($n//tei:extent/@type = "online") then | |
<a href="{$n//tei:extent}">Link.</a> | |
else | |
$n//tei:extent | |
} | |
{$n//tei:note} | |
</small></li></p> | |
}; | |
(:~ | |
Execute the query. The search results are not output immediately. Instead they | |
are passed to nested templates through the $model parameter. | |
:) | |
declare | |
%templates:default("mode", "any") | |
%templates:default("scope", "narrow") | |
%templates:default("work-types", "all") | |
%templates:default("target-texts", "all") | |
function app:query($node as node()*, $model as map(*), $query as xs:string?, $mode as xs:string, $scope as xs:string, | |
$work-types as xs:string+, $target-texts as xs:string+) { | |
let $queryExpr := app:create-query($query, $mode) | |
return | |
if (empty($queryExpr) or $queryExpr = "") then | |
let $cached := session:get-attribute("apps.NiC") | |
return | |
map { | |
"hits" := $cached, | |
"query" := session:get-attribute("apps.NiC.query") | |
} | |
else | |
(:Get the work ids of the work types selected.:) | |
let $target-text-ids := distinct-values(doc(concat($config:data-root, '/', 'work-types.xml'))//item[value = $work-types]/id) | |
(:If no individual works have been selected, search in the works with ids selected by type; | |
if indiidual works have been selected, then neglect that no selection has been done in works according to type.:) | |
let $target-texts := | |
if ($target-texts = 'all' and $work-types = 'all') | |
then 'all' | |
else | |
if ($target-texts = 'all') | |
then $target-text-ids | |
else | |
if ($work-types = "all") then $target-texts else ($target-texts[. = $target-text-ids]) | |
let $context := | |
if ($target-texts = 'all') | |
then collection($config:data-root)/tei:TEI | |
else collection($config:data-root)//tei:TEI[@xml:id = $target-texts] | |
let $hits := | |
if ($scope eq 'narrow') | |
then | |
for $hit in ($context//tei:p[ft:query(., $queryExpr)], $context//tei:head[ft:query(., $queryExpr)]) | |
order by ft:score($hit) descending | |
return $hit | |
else | |
for $hit in $context//tei:text[ft:query(., $queryExpr)] | |
order by ft:score($hit) descending | |
return $hit | |
let $store := ( | |
session:set-attribute("apps.NiC", $hits), | |
session:set-attribute("apps.NiC.query", $queryExpr) | |
) | |
return | |
(: Process nested templates :) | |
map { | |
"hits" := $hits, | |
"query" := $queryExpr | |
} | |
}; | |
declare | |
%templates:default("mode", "any") | |
%templates:default("scope", "narrow") | |
%templates:default("work-types", "all") | |
%templates:default("target-texts", "all") | |
function app:coursepack($node as node()*, $model as map(*), $target-texts as xs:string+) { | |
map { | |
"coursepacks" := $target-texts | |
} | |
}; | |
declare function app:coursepack-html($node as node(), $model as map(*), $type as xs:string?) { | |
(: xquery 1 version :) | |
(: | |
for $coursepack in $model("coursepack") | |
return | |
<p>{$coursepack}</p> | |
:) | |
(: xquery 3 version :) | |
let $uri := $model("coursepack") ! ($config:data-root || "/" || . || ".xml") | |
return | |
<div>{tei2:tei2html(doc($uri)/tei:TEI)}</div> | |
(: tei2:tei2html()/tei:TEI) :) | |
(: let $doc-uri := doc(concat($collection || $model("coursepack"))) :) | |
(: | |
let $doc-uri := doc($config:app-root || "/" || $model("coursepack")) | |
return | |
<p>{$doc-uri}</p> | |
:) | |
}; | |
declare function app:upload($node as node(), $model as map(*)) { | |
let $collection := '/db/apps/NiC/inReview/' | |
let $filename := request:get-uploaded-file-name('file') | |
(: make sure you use the right user permissions that has write access to this collection :) | |
let $login := xmldb:login($collection, 'public', 'public') | |
let $store := xmldb:store($collection, $filename, request:get-uploaded-file-data('file')) | |
return | |
<results> | |
<message>File {$filename} has been stored at collection={$collection}.</message> | |
</results> | |
}; | |
(:~ | |
Helper function: create a lucene query from the user input | |
:) | |
declare %private function app:create-query($query-string as xs:string?, $mode as xs:string) { | |
let $query-string := | |
if ($query-string) | |
then app:sanitize-lucene-query($query-string) | |
else '' | |
let $query-string := normalize-space($query-string) | |
let $query:= | |
(:If the query contains any operator used in sandard lucene searches or regex searches, pass it on to the query parser;:) | |
if (functx:contains-any-of($query-string, ('AND', 'OR', 'NOT', '+', '-', '!', '~', '^', '.', '?', '*', '|', '{','[', '(', '<', '@', '#', '&')) and $mode eq 'any') | |
then | |
let $luceneParse := app:parse-lucene($query-string) | |
let $luceneXML := util:parse($luceneParse) | |
let $lucene2xml := app:lucene2xml($luceneXML/node(), $mode) | |
return $lucene2xml | |
(:otherwise the query is performed by selecting one of the special options (any, all, phrase, near, fuzzy, wildcard or regex):) | |
else | |
let $query-string := tokenize($query-string, '\s') | |
let $last-item := $query-string[last()] | |
let $query-string := | |
if ($last-item castable as xs:integer) | |
then string-join(subsequence($query-string, 1, count($query-string) - 1), ' ') | |
else string-join($query-string, ' ') | |
let $query := | |
<query> | |
{ | |
if ($mode eq 'any') | |
then | |
for $term in tokenize($query-string, '\s') | |
return <term occur="should">{$term}</term> | |
else if ($mode eq 'all') | |
then | |
<bool> | |
{ | |
for $term in tokenize($query-string, '\s') | |
return <term occur="must">{$term}</term> | |
} | |
</bool> | |
else | |
if ($mode eq 'phrase') | |
then <phrase>{$query-string}</phrase> | |
else | |
if ($mode eq 'near-unordered') | |
then <near slop="{if ($last-item castable as xs:integer) then $last-item else 5}" ordered="no">{$query-string}</near> | |
else | |
if ($mode eq 'near-ordered') | |
then <near slop="{if ($last-item castable as xs:integer) then $last-item else 5}" ordered="yes">{$query-string}</near> | |
else | |
if ($mode eq 'fuzzy') | |
then <fuzzy max-edits="{if ($last-item castable as xs:integer and number($last-item) < 3) then $last-item else 2}">{$query-string}</fuzzy> | |
else | |
if ($mode eq 'wildcard') | |
then <wildcard>{$query-string}</wildcard> | |
else | |
if ($mode eq 'regex') | |
then <regex>{$query-string}</regex> | |
else () | |
}</query> | |
return $query | |
return $query | |
}; | |
(:~ | |
: Create a bootstrap pagination element to navigate through the hits. | |
:) | |
declare | |
%templates:wrap | |
%templates:default('start', 1) | |
%templates:default("per-page", 10) | |
%templates:default("min-hits", 0) | |
%templates:default("max-pages", 10) | |
function app:paginate($node as node(), $model as map(*), $start as xs:int, $per-page as xs:int, $min-hits as xs:int, | |
$max-pages as xs:int) { | |
if ($min-hits < 0 or count($model("hits")) >= $min-hits) then | |
let $count := xs:integer(ceiling(count($model("hits"))) div $per-page) + 1 | |
let $middle := ($max-pages + 1) idiv 2 | |
return ( | |
if ($start = 1) then ( | |
<li class="disabled"> | |
<a><i class="glyphicon glyphicon-fast-backward"/></a> | |
</li>, | |
<li class="disabled"> | |
<a><i class="glyphicon glyphicon-backward"/></a> | |
</li> | |
) else ( | |
<li> | |
<a href="?start=1"><i class="glyphicon glyphicon-fast-backward"/></a> | |
</li>, | |
<li> | |
<a href="?start={max( ($start - $per-page, 1 ) ) }"><i class="glyphicon glyphicon-backward"/></a> | |
</li> | |
), | |
let $startPage := xs:integer(ceiling($start div $per-page)) | |
let $lowerBound := max(($startPage - ($max-pages idiv 2), 1)) | |
let $upperBound := min(($lowerBound + $max-pages - 1, $count)) | |
let $lowerBound := max(($upperBound - $max-pages + 1, 1)) | |
for $i in $lowerBound to $upperBound | |
return | |
if ($i = ceiling($start div $per-page)) then | |
<li class="active"><a href="?start={max( (($i - 1) * $per-page + 1, 1) )}">{$i}</a></li> | |
else | |
<li><a href="?start={max( (($i - 1) * $per-page + 1, 1)) }">{$i}</a></li>, | |
if ($start + $per-page < count($model("hits"))) then ( | |
<li> | |
<a href="?start={$start + $per-page}"><i class="glyphicon glyphicon-forward"/></a> | |
</li>, | |
<li> | |
<a href="?start={max( (($count - 1) * $per-page + 1, 1))}"><i class="glyphicon glyphicon-fast-forward"/></a> | |
</li> | |
) else ( | |
<li class="disabled"> | |
<a><i class="glyphicon glyphicon-forward"/></a> | |
</li>, | |
<li> | |
<a><i class="glyphicon glyphicon-fast-forward"/></a> | |
</li> | |
) | |
) else | |
() | |
}; | |
(:~ | |
Create a span with the number of items in the current search result. | |
:) | |
declare function app:hit-count($node as node()*, $model as map(*)) { | |
<span xmlns="http://www.w3.org/1999/xhtml" id="hit-count">{ count($model("hits")) }</span> | |
}; | |
(:~ | |
Output the actual search result as a div, using the kwic module to summarize full text matches. | |
:) | |
declare | |
%templates:wrap | |
%templates:default("start", 1) | |
%templates:default("per-page", 10) | |
function app:show-hits($node as node()*, $model as map(*), $start as xs:integer, $per-page as xs:integer) { | |
for $hit at $p in subsequence($model("hits"), $start, $per-page) | |
let $id := $hit/ancestor-or-self::tei:TEI/@xml:id/string() | |
let $work-title := app:work-title($hit/ancestor::tei:TEI) | |
let $doc-id := $hit/ancestor::tei:TEI/@xml:id | |
(: let $text-ancestor-id := $hit/ancestor::tei:text/@xml:id:) | |
(: let $text-ancestor-front := $hit/ancestor::tei:text/tei:front/text() :) | |
(:pad hit with surrounding siblings:) | |
let $hitExpanded := <hit>{($hit/preceding-sibling::*[1], $hit, $hit/following-sibling::*[1])}</hit> | |
let $loc := | |
<tr class="reference"> | |
<td colspan="3"> | |
<span class="number">{$start + $p - 1}</span> | |
<a href="{$doc-id}.html">{$work-title}</a> | |
</td> | |
</tr> | |
let $matchId := ($hit/@xml:id, util:node-id($hit))[1] | |
let $config := <config width="120" table="yes" link="{$id}.html?query={$model('query')}#{$matchId}"/> | |
let $kwic := kwic:summarize($hitExpanded, $config, app:filter#2) | |
return | |
($loc, $kwic) | |
}; | |
(:~ | |
Callback function called from the kwic module. | |
:) | |
declare %private function app:filter($node as node(), $mode as xs:string) as xs:string? { | |
if ($node/parent::tei:head) then | |
concat('(', $node, ':) ') | |
else if ($mode eq 'before') then | |
concat($node, ' ') | |
else | |
concat(' ', $node) | |
}; | |
declare function app:base($node as node(), $model as map(*)) { | |
let $context := request:get-context-path() | |
let $app-root := substring-after($config:app-root, "/db/") | |
return | |
<base xmlns="http://www.w3.org/1999/xhtml" href="{$context}/{$app-root}/"/> | |
}; | |
(: This functions provides crude way to avoid the most common errors with paired expressions and apostrophes. :) | |
(: TODO: check order of pairs:) | |
declare %private function app:sanitize-lucene-query($query-string as xs:string) as xs:string { | |
let $query-string := replace($query-string, "'", "''") (:escape apostrophes:) | |
(:TODO: notify user if query has been modified.:) | |
(:Remove colons – Lucene fields are not supported.:) | |
let $query-string := translate($query-string, ":", " ") | |
let $query-string := | |
if (functx:number-of-matches($query-string, '"') mod 2) | |
then $query-string | |
else replace($query-string, '"', ' ') (:if there is an uneven number of quotation marks, delete all quotation marks.:) | |
let $query-string := | |
if ((functx:number-of-matches($query-string, '\(') + functx:number-of-matches($query-string, '\)')) mod 2 eq 0) | |
then $query-string | |
else translate($query-string, '()', ' ') (:if there is an uneven number of parentheses, delete all parentheses.:) | |
let $query-string := | |
if ((functx:number-of-matches($query-string, '\[') + functx:number-of-matches($query-string, '\]')) mod 2 eq 0) | |
then $query-string | |
else translate($query-string, '[]', ' ') (:if there is an uneven number of brackets, delete all brackets.:) | |
let $query-string := | |
if ((functx:number-of-matches($query-string, '{') + functx:number-of-matches($query-string, '}')) mod 2 eq 0) | |
then $query-string | |
else translate($query-string, '{}', ' ') (:if there is an uneven number of braces, delete all braces.:) | |
let $query-string := | |
if ((functx:number-of-matches($query-string, '<') + functx:number-of-matches($query-string, '>')) mod 2 eq 0) | |
then $query-string | |
else translate($query-string, '<>', ' ') (:if there is an uneven number of angle brackets, delete all angle brackets.:) | |
return $query-string | |
}; | |
(: Function to translate a Lucene search string to an intermediate string mimicking the XML syntax, | |
with some additions for later parsing of boolean operators. The resulting intermediary XML search string will be parsed as XML with util:parse(). | |
Based on Ron Van den Branden, https://rvdb.wordpress.com/2010/08/04/exist-lucene-to-xml-syntax/:) | |
(:TODO: | |
The following cases are not covered: | |
1) | |
<query><near slop="10"><first end="4">snake</first><term>fillet</term></near></query> | |
as opposed to | |
<query><near slop="10"><first end="4">fillet</first><term>snake</term></near></query> | |
w(..)+d, w[uiaeo]+d is not treated correctly as regex. | |
:) | |
declare %private function app:parse-lucene($string as xs:string) { | |
(: replace all symbolic booleans with lexical counterparts :) | |
if (matches($string, '[^\\](\|{2}|&{2}|!) ')) | |
then | |
let $rep := | |
replace( | |
replace( | |
replace( | |
$string, | |
'&{2} ', 'AND '), | |
'\|{2} ', 'OR '), | |
'! ', 'NOT ') | |
return app:parse-lucene($rep) | |
else | |
(: replace all booleans with '<AND/>|<OR/>|<NOT/>' :) | |
if (matches($string, '[^<](AND|OR|NOT) ')) | |
then | |
let $rep := replace($string, '(AND|OR|NOT) ', '<$1/>') | |
return app:parse-lucene($rep) | |
else | |
(: replace all '+' modifiers in token-initial position with '<AND/>' :) | |
if (matches($string, '(^|[^\w"])\+[\w"(]')) | |
then | |
let $rep := replace($string, '(^|[^\w"])\+([\w"(])', '$1<AND type=_+_/>$2') | |
return app:parse-lucene($rep) | |
else | |
(: replace all '-' modifiers in token-initial position with '<NOT/>' :) | |
if (matches($string, '(^|[^\w"])-[\w"(]')) | |
then | |
let $rep := replace($string, '(^|[^\w"])-([\w"(])', '$1<NOT type=_-_/>$2') | |
return app:parse-lucene($rep) | |
else | |
(: replace parentheses with '<bool></bool>' :) | |
(:NB: regex also uses parentheses!:) | |
if (matches($string, '(^|[\W-[\\]]|>)\(.*?[^\\]\)(\^(\d+))?(<|\W|$)')) | |
then | |
let $rep := | |
(: add @boost attribute when string ends in ^\d :) | |
(:if (matches($string, '(^|\W|>)\(.*?\)(\^(\d+))(<|\W|$)')) | |
then replace($string, '(^|\W|>)\((.*?)\)(\^(\d+))(<|\W|$)', '$1<bool boost=_$4_>$2</bool>$5') | |
else:) replace($string, '(^|\W|>)\((.*?)\)(<|\W|$)', '$1<bool>$2</bool>$3') | |
return app:parse-lucene($rep) | |
else | |
(: replace quoted phrases with '<near slop="0"></bool>' :) | |
if (matches($string, '(^|\W|>)(").*?\2([~^]\d+)?(<|\W|$)')) | |
then | |
let $rep := | |
(: add @boost attribute when phrase ends in ^\d :) | |
(:if (matches($string, '(^|\W|>)(").*?\2([\^]\d+)?(<|\W|$)')) | |
then replace($string, '(^|\W|>)(")(.*?)\2([~^](\d+))?(<|\W|$)', '$1<near boost=_$5_>$3</near>$6') | |
(\: add @slop attribute in other cases :\) | |
else:) replace($string, '(^|\W|>)(")(.*?)\2([~^](\d+))?(<|\W|$)', '$1<near slop=_$5_>$3</near>$6') | |
return app:parse-lucene($rep) | |
else (: wrap fuzzy search strings in '<fuzzy max-edits=""></fuzzy>' :) | |
if (matches($string, '[\w-[<>]]+?~[\d.]*')) | |
then | |
let $rep := replace($string, '([\w-[<>]]+?)~([\d.]*)', '<fuzzy max-edits=_$2_>$1</fuzzy>') | |
return app:parse-lucene($rep) | |
else (: wrap resulting string in '<query></query>' :) | |
concat('<query>', replace(normalize-space($string), '_', '"'), '</query>') | |
}; | |
(: Function to transform the intermediary structures in the search query generated through app:parse-lucene() and util:parse() | |
to full-fledged boolean expressions employing XML query syntax. | |
Based on Ron Van den Branden, https://rvdb.wordpress.com/2010/08/04/exist-lucene-to-xml-syntax/:) | |
declare %private function app:lucene2xml($node as item(), $mode as xs:string) { | |
typeswitch ($node) | |
case element(query) return | |
element { node-name($node)} { | |
element bool { | |
$node/node()/app:lucene2xml(., $mode) | |
} | |
} | |
case element(AND) return () | |
case element(OR) return () | |
case element(NOT) return () | |
case element() return | |
let $name := | |
if (($node/self::phrase | $node/self::near)[not(@slop > 0)]) | |
then 'phrase' | |
else node-name($node) | |
return | |
element { $name } { | |
$node/@*, | |
if (($node/following-sibling::*[1] | $node/preceding-sibling::*[1])[self::AND or self::OR or self::NOT or self::bool]) | |
then | |
attribute occur { | |
if ($node/preceding-sibling::*[1][self::AND]) | |
then 'must' | |
else | |
if ($node/preceding-sibling::*[1][self::NOT]) | |
then 'not' | |
else | |
if ($node[self::bool]and $node/following-sibling::*[1][self::AND]) | |
then 'must' | |
else | |
if ($node/following-sibling::*[1][self::AND or self::OR or self::NOT][not(@type)]) | |
then 'should' (:must?:) | |
else 'should' | |
} | |
else () | |
, | |
$node/node()/app:lucene2xml(., $mode) | |
} | |
case text() return | |
if ($node/parent::*[self::query or self::bool]) | |
then | |
for $tok at $p in tokenize($node, '\s+')[normalize-space()] | |
(:Here the query switches into regex mode based on whether or not characters used in regex expressions are present in $tok.:) | |
(:It is not possible reliably to distinguish reliably between a wildcard search and a regex search, so switching into wildcard searches is ruled out here.:) | |
(:One could also simply dispense with 'term' and use 'regex' instead - is there a speed penalty?:) | |
let $el-name := | |
if (matches($tok, '((^|[^\\])[.?*+()\[\]\\^|{}#@&<>~]|\$$)') or $mode eq 'regex') | |
then 'regex' | |
else 'term' | |
return | |
element { $el-name } { | |
attribute occur { | |
(:if the term follows AND:) | |
if ($p = 1 and $node/preceding-sibling::*[1][self::AND]) | |
then 'must' | |
else | |
(:if the term follows NOT:) | |
if ($p = 1 and $node/preceding-sibling::*[1][self::NOT]) | |
then 'not' | |
else (:if the term is preceded by AND:) | |
if ($p = 1 and $node/following-sibling::*[1][self::AND][not(@type)]) | |
then 'must' | |
(:if the term follows OR and is preceded by OR or NOT, or if it is standing on its own:) | |
else 'should' | |
} | |
(:, | |
if (matches($tok, '((^|[^\\])[.?*+()\[\]\\^|{}#@&<>~]|\$$)')) | |
then | |
(\:regex searches have to be lower-cased:\) | |
attribute boost { | |
lower-case(replace($tok, '(.*?)(\^(\d+))(\W|$)', '$3')) | |
} | |
else ():) | |
, | |
(:regex searches have to be lower-cased:) | |
lower-case(normalize-space(replace($tok, '(.*?)(\^(\d+))(\W|$)', '$1'))) | |
} | |
else normalize-space($node) | |
default return | |
$node | |
}; | |
(: | |
declare | |
%templates:wrap | |
%templates:default('target-texts', 'all') | |
function app:test-show-selected($node as node()*, $model as map(*), $target-texts as xs:string+) { | |
let $collection := $config:data | |
let $all-documents := collection($collection)[tei:TEI] ! substring-after(base-uri(), '/db/apps/NiC/data') | |
let $target-texts := 'all' | |
let $selected-docs := request:get-parameter('checked', ()) | |
let $documents-to-show := if (exists($selected-docs)) then $all-documents[. = $selected-docs] else $all-documents | |
let $hello := 'howdy' | |
let $loc := | |
<tr class="reference"> | |
<td colspan="3"> | |
<a href=".html">{$hello}</a> | |
</td> | |
</tr> | |
return | |
if (exists($selected-docs)) then | |
for $selected in $selected-docs | |
return | |
tei2:tei2html(doc(concat($collection, $selected))/tei:TEI) | |
else ($loc) | |
}; | |
:) | |
(: not sure what this is--can I cut? :) | |
(: declare | |
%templates:default("mode", "any") | |
%templates:default("scope", "narrow") | |
%templates:default("work-types", "all") | |
%templates:default("target-texts", "all") | |
function app:test-query($node as node()*, $model as map(*), $query as xs:string?, $mode as xs:string, $scope as xs:string, | |
$work-types as xs:string+, $target-texts as xs:string+) { | |
let $queryExpr := app:create-query($query, $mode) | |
return | |
if (empty($queryExpr) or $queryExpr = "") then | |
let $cached := session:get-attribute("apps.NiC") | |
return | |
map { | |
"hits" := $cached, | |
"query" := session:get-attribute("apps.NiC.query") | |
} | |
else | |
(:Get the work ids of the work types selected.:) | |
let $target-text-ids := distinct-values(doc(concat($config:data-root, '/', 'work-types.xml'))//item[value = $work-types]/id) | |
(:If no individual works have been selected, search in the works with ids selected by type; | |
if individual works have been selected, then neglect that no selection has been done in works according to type.:) | |
let $target-texts := | |
if ($target-texts = 'all' and $work-types = 'all') | |
then 'all' | |
else | |
if ($target-texts = 'all') | |
then $target-text-ids | |
else | |
if ($work-types = "all") then $target-texts else ($target-texts[. = $target-text-ids]) | |
let $context := | |
if ($target-texts = 'all') | |
then collection($config:data-root)/tei:TEI | |
else collection($config:data-root)//tei:TEI[@xml:id = $target-texts] | |
let $hits := | |
if ($scope eq 'narrow') | |
then | |
for $hit in ($context//tei:p[ft:query(., $queryExpr)], $context//tei:head[ft:query(., $queryExpr)]) | |
order by ft:score($hit) descending | |
return $hit | |
else | |
for $hit in $context//tei:text[ft:query(., $queryExpr)] | |
order by ft:score($hit) descending | |
return $hit | |
let $store := ( | |
session:set-attribute("apps.NiC", $hits), | |
session:set-attribute("apps.NiC.query", $queryExpr) | |
) | |
return | |
(: Process nested templates :) | |
map { | |
"hits" := $hits, | |
"query" := $queryExpr | |
} | |
}; | |
:) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment