Skip to content

Instantly share code, notes, and snippets.

@joewiz
Last active July 27, 2017 19:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save joewiz/7957ac170be66e3fedbff74b9d9a71de to your computer and use it in GitHub Desktop.
Save joewiz/7957ac170be66e3fedbff74b9d9a71de to your computer and use it in GitHub Desktop.
Get GitHub zip archive
xquery version "3.1";
import module namespace hd = "http://joewiz.org/ns/xquery/http-download" at "http-download.xqm";
import module namespace unzip = "http://joewiz.org/ns/xquery/unzip" at "unzip.xqm";
declare function local:download-and-unpack-zip-archive($archive-url as xs:string, $destination as xs:string) as xs:string {
let $download-temp-folder :=
if (xmldb:collection-available("/db/http-download-temp")) then
"/db/http-download-temp"
else
xmldb:create-collection("/db", "http-download-temp")
let $downloaded-file := hd:download($archive-url, $download-temp-folder)
let $unzip := unzip:unzip($downloaded-file, $destination)
let $cleanup := xmldb:remove("/db/http-download-temp")
return
tokenize($downloaded-file, "/")[. ne ''][last()] => substring-before(".zip")
};
declare function local:get-github-archive($archive-url, $parent-collection, $destination-collection-name) {
let $zip-collection-name := local:download-and-unpack-zip-archive($archive-url, $parent-collection)
let $zip-collection := $parent-collection || "/" || $zip-collection-name
let $rename := xmldb:rename($zip-collection, $destination-collection-name)
return
<result>Successfully downloaded { $archive-url } to { $parent-collection || "/" || $destination-collection-name }</result>
};
let $archive-url := "https://github.com/scta-texts/summahalensis/archive/master.zip"
let $destination := "/db/apps/scta-data/test/test"
let $destination-collection-name := "summahalensis"
return
local:get-github-archive($archive-url, $destination, $destination-collection-name)
xquery version "3.1";
module namespace hd="http://joewiz.org/ns/xquery/http-download";
import module namespace hc="http://expath.org/ns/http-client";
import module namespace util="http://exist-db.org/xquery/util";
import module namespace xmldb="http://exist-db.org/xquery/xmldb";
(: downloads a file from a remote HTTP server at $file-url and save it to an eXist-db $collection.
: we try hard to recognize XML files and save them with the correct mimetype so that eXist-db can
: efficiently index and query the files; if it doesn't appear to be XML, though, we just trust
: the response headers :)
declare function hd:download($file-url as xs:string, $collection as xs:string) as item()* {
let $request := <hc:request href="{$file-url}" method="GET"/>
let $response := hc:send-request($request)
let $head := $response[1]
(: These sample responses from EXPath HTTP client reveals where the response code, media-type, and filename can be found:
<hc:response xmlns:http="http://expath.org/ns/http-client" status="200" message="OK">
<hc:header name="connection" value="close"/>
<hc:header name="transfer-encoding" value="chunked"/>
<hc:header name="content-type" value="application/zip"/>
<hc:header name="content-disposition" value="attachment; filename=xqjson-master.zip"/>
<hc:header name="date" value="Sat, 06 Jul 2013 05:59:04 GMT"/>
<hc:body media-type="application/zip"/>
</hc:response>
<hc:response xmlns:http="http://expath.org/ns/http-client" status="200" message="OK">
<hc:header name="date" value="Sat, 06 Jul 2013 06:26:34 GMT"/>
<hc:header name="server" value="GitHub.com"/>
<hc:header name="content-type" value="text/plain; charset=utf-8"/>
<hc:header name="status" value="200 OK"/>
<hc:header name="content-disposition" value="inline"/>
<hc:header name="content-transfer-encoding" value="binary"/>
<hc:header name="etag" value=""a6782b6125583f16632fa103a828fdd6""/>
<hc:header name="vary" value="Accept-Encoding"/>
<hc:header name="cache-control" value="private"/>
<hc:header name="keep-alive" value="timeout=10, max=50"/>
<hc:header name="connection" value="Keep-Alive"/>
<hc:body media-type="text/plain"/>
</hc:response>
:)
return
(: check to ensure the remote server indicates success :)
if ($head/@status = '200') then
(: try to get the filename from the content-disposition header, otherwise construct from the $file-url :)
let $filename :=
if (contains($head/hc:header[@name='content-disposition']/@value, 'filename=')) then
$head/hc:header[@name='content-disposition']/@value/substring-after(., 'filename=')
else
(: use whatever comes after the final / as the file name:)
replace($file-url, '^.*/([^/]*)$', '$1')
(: override the stated media type if the file is known to be .xml :)
let $media-type := $head/hc:body/@media-type
let $mime-type :=
if (ends-with($file-url, '.xml') and $media-type = 'text/plain') then
'application/xml'
else
$media-type
(: if the file is XML and the payload is binary, we need convert the binary to string :)
let $content-transfer-encoding := $head/hc:body[@name = 'content-transfer-encoding']/@value
let $body := $response[2]
let $file :=
if (ends-with($file-url, '.xml') and $content-transfer-encoding = 'binary') then
util:binary-to-string($body)
else
$body
return
xmldb:store($collection, $filename, $file, $mime-type)
else
<error>
<message>Oops, something went wrong:</message>
{$head}
</error>
};
xquery version "3.0";
(:~
: A module for unzipping files stored in the database
:
: @author Joe Wicentowski
:)
module namespace unzip = "http://joewiz.org/ns/xquery/unzip";
import module namespace compression = "http://exist-db.org/xquery/compression";
import module namespace functx = "http://www.functx.com";
import module namespace xmldb = "http://exist-db.org/xquery/xmldb";
(: Helper function of unzip:mkcol() :)
declare %private function unzip:mkcol-recursive($collection, $components) as xs:string* {
if (exists($components)) then
let $newColl := concat($collection, "/", $components[1])
return (
xmldb:create-collection($collection, $components[1]),
if ($components[2]) then
unzip:mkcol-recursive($newColl, subsequence($components, 2))
else ()
)
else
()
};
(: Helper function to recursively create a collection hierarchy. :)
declare %private function unzip:mkcol($collection, $path) as xs:string* {
unzip:mkcol-recursive($collection, tokenize($path, "/") ! xmldb:encode(.))
};
(: Helper function to recursively create a collection hierarchy. :)
declare %private function unzip:mkcol($path) as xs:string* {
unzip:mkcol('/db', substring-after($path, "/db/"))
};
(: Helper function to allow all zip entries through :)
declare %private function unzip:allow-all-entries-through($path as xs:string, $data-type as xs:string, $param as item()*) as xs:boolean {
true()
};
(: Helper function to store zip file data :)
declare %private function unzip:store-entry($path as xs:string, $data-type as xs:string, $data as item()?, $param as item()*) as element() {
let $unzip-base-collection := $param[@name="unzip-base-collection"]/@value
return
if ($data-type = 'folder') then
let $mkcol := unzip:mkcol($unzip-base-collection, $path)
return
<entry path="{$path}" data-type="{$data-type}"/>
else (: if ($data-type = 'resource') :)
let $resource-collection := concat($unzip-base-collection, '/', xmldb:encode(functx:substring-before-last($path, '/')))
let $resource-filename := if (contains($path, '/')) then functx:substring-after-last($path, '/') else $path
let $resource-filename := xmldb:encode($resource-filename)
return
try {
let $collection-check := if (xmldb:collection-available($resource-collection)) then () else unzip:mkcol($resource-collection)
let $store := xmldb:store($resource-collection, $resource-filename, $data)
return
<entry path="{$path}" data-type="{$data-type}"/>
}
catch * {
<error path="{$path}">{concat('Error storing ', $path, ': ', $err:code, $err:value, $err:description)}</error>
}
};
(: Helper function to list zip file contents :)
declare %private function unzip:list-entry($path as xs:string, $data-type as xs:string, $data as item()?, $param as item()*) as element(entry) {
<entry path="{$path}" data-type="{$data-type}"/>
};
(:~
: Lists contents of zip file
:
: @param $resource the full db path to the zip file
: @returns a node listing the names of all resources in the zip file
:)
declare function unzip:list($resource as xs:string) as element(entries) {
let $file := if (util:binary-doc-available($resource)) then util:binary-doc($resource) else error(xs:QName('unzip'), concat($resource, ' does not exist or is not a valid binary file'))
let $entry-filter := unzip:allow-all-entries-through#3
let $entry-filter-params := ()
let $entry-data := unzip:list-entry#4
let $entry-data-params := ()
let $entries := compression:unzip($file, $entry-filter, $entry-filter-params, $entry-data, $entry-data-params)
return
<entries count="{count($entries)}">{$entries}</entries>
};
(:~
: Unzips a zip file. Contents are stored in the same collection as the zip file.
:
: @param $zip-file the full db path to the zip file
: @returns the paths of each successfully stored file or errors describing entries that could not be stored
:)
declare function unzip:unzip($zip-file as xs:string) as element(entries) {
let $zip-file-collection := functx:substring-before-last($zip-file, '/')
let $target-collection := $zip-file-collection
return
unzip:unzip($zip-file, $target-collection)
};
(:~
: Unzips a zip file. Contents are stored into $target-collection.
:
: @param $zip-file the full db path to the zip file
: @param $target-collection where the zip file contents will be stored
: @returns the paths of each successfully stored file or errors describing entries that could not be stored
:)
declare function unzip:unzip($zip-file as xs:string, $target-collection as xs:string) as element(entries) {
let $file := if (util:binary-doc-available($zip-file)) then util:binary-doc($zip-file) else error(xs:QName('unzip'), concat($zip-file, ' does not exist or is not a valid binary file'))
let $unzip-base-collection := if (xmldb:collection-available($target-collection)) then $target-collection else unzip:mkcol($target-collection)[last()]
let $entry-filter := unzip:allow-all-entries-through#3
let $entry-filter-params := ()
let $entry-data := unzip:store-entry#4
let $entry-data-params := <param name="unzip-base-collection" value="{$unzip-base-collection}"/>
let $results := compression:unzip($file, $entry-filter, $entry-filter-params, $entry-data, $entry-data-params)
return
<entries target-collection="{$target-collection}" count-stored="{count($results[self::entry])}" count-unable-to-store="{count($results[self::error])}">{$results}</entries>
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment