Skip to content

Instantly share code, notes, and snippets.

View emchateau's full-sized avatar

Emmanuel Château-Dutier emchateau

View GitHub Profile
xquery version "3.0";
(: find the shortest and longest article and get the average word count of a collection of TEI XML articles :)
declare namespace tei="http://www.tei-c.org/ns/1.0";
(: in our case, 'articles' are TEI divs that have @xml:id attributes and no child divs;
we filter out the foreward since they're not full articles. :)
let $milestone-articles := collection('/db/cms/apps/tei-content/data/milestones')//tei:div[@xml:id and not(.//tei:div)][@xml:id ne 'foreword']
let $article-infos :=
xquery version "3.0";
declare namespace fn="http://www.w3.org/2005/xpath-functions";
(: Fix problems with mis-capitalized names. For example:
Before: MACARTHUR, Douglas II
After: MacArthur, Douglas II
:)
declare function local:fix-name-capitalization($name as xs:string) {
(:
xquery version "1.0";
(: A naive approach to sentence tokenization inspired by http://stackoverflow.com/a/2103653/659732
:
: Works well with edited text like newspapers. Parameters like punctuation can/should be edited;
: see the section below called "criteria".
:
: For a more sophisticated approach, see Tibor Kiss and Jan Strunk, "Unsupervised Multilingual
: Sentence Boundary Detection", Computational Linguistics, Volume 32, Issue 4, December 2006,
: pp. 485-525. Also, see these discussions of sentence tokenization:
javascript:(function({
var INSTAPAPER=true,w=window,d=document,pageSelectedTxt=w.getSelection?w.getSelection():(d.getSelection)?d.getSelection():(d.selection?d.selection.createRange().text:0),pageTitle=d.title,pageUri=w.location.href,tmplt="";
tmplt="From ["+pageTitle+"]("+pageUri+"):\n\n";
if(pageSelectedTxt!="") {
pageSelectedTxt=">%20"+pageSelectedTxt;
pageSelectedTxt=pageSelectedTxt.replace(/(\r\n|\n+|\r)/gm,"\n");
pageSelectedTxt=pageSelectedTxt.replace(/\n/g,"\n>%20\n>%20");
w.location.href="nvalt://make/?txt="+encodeURIComponent(tmplt+pageSelectedTxt)+"&title="+encodeURIComponent(pageTitle)
}
else {
<div>
<h2>{$heading} - {count($actions/generate)}</h2>
<p>Actions are processes that generate a new item from an existing item.</p>
<div>
{$partial("action1.xml","action",$actions/generate )}
</div>
</div>
# Requirements:
# - pen drive formatted in FAT
# - boot.img.gz downloaded from: http://ftp.debian.org/debian/dists/Debian6.0.2/main/installer-amd64/current/images/hd-media/
# - netinstall iso image http://cdimage.debian.org/debian-cd/6.0.2.1/amd64/iso-cd/
# - pen drive is on /dev/disk1 -> check with "diskutil list"
diskutil unmountDisk /dev/disk1
gzip -dc boot.img.gz >/dev/disk1
diskutil eject /dev/disk1
# mount pen drive again
# add iso image to the pen drive
@emchateau
emchateau / tokenizeToElements.xq
Last active August 29, 2015 14:10 — forked from xquery/gist:1536327
This function tokenize a string and returns each token in an xml element
xquery version "1.0";
(:~
: This function tokenize a string and returns each text item in a <text/> element
:)
let $string := "test,test,test,test,test"
return
for $text in tokenize($string,',')
return
<text>{concat('add some text: ', $text)}</text>
<sales-qty-by-product>{
for $pname in distinct-values(
doc("sales-records.xml")/*/record/product-name)
order by $pname
return
<product name="{$pname}">{
sum(
doc("sales-records.xml")/*/record[
product-name eq $pname ]/qty)
}</product>
xquery version "3.1";
(: parse tweets using XQuery 3.1's JSON support
: see http://www.w3.org/TR/xpath-functions-31/#json
: sample JSON from https://dev.twitter.com/rest/reference/get/statuses/user_timeline
:)
let $json := json-doc('/db/user_timeline.json')
let $tweets := $json?*
return
xquery version "3.1";
(: Utility functions for JSON as derived via XQuery 3.1's parse-json and json-doc functions.
: See http://www.w3.org/TR/xpath-functions-31/#json :)
module namespace ju = "http://joewiz.org/ns/xquery/json-util";
declare namespace output="http://www.w3.org/2010/xslt-xquery-serialization";
(: Get the data type for a piece of JSON data :)