Last active
July 27, 2017 19:29
-
-
Save joewiz/7957ac170be66e3fedbff74b9d9a71de to your computer and use it in GitHub Desktop.
Get GitHub zip archive
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
xquery version "3.1"; | |
import module namespace hd = "http://joewiz.org/ns/xquery/http-download" at "http-download.xqm"; | |
import module namespace unzip = "http://joewiz.org/ns/xquery/unzip" at "unzip.xqm"; | |
declare function local:download-and-unpack-zip-archive($archive-url as xs:string, $destination as xs:string) as xs:string { | |
let $download-temp-folder := | |
if (xmldb:collection-available("/db/http-download-temp")) then | |
"/db/http-download-temp" | |
else | |
xmldb:create-collection("/db", "http-download-temp") | |
let $downloaded-file := hd:download($archive-url, $download-temp-folder) | |
let $unzip := unzip:unzip($downloaded-file, $destination) | |
let $cleanup := xmldb:remove("/db/http-download-temp") | |
return | |
tokenize($downloaded-file, "/")[. ne ''][last()] => substring-before(".zip") | |
}; | |
declare function local:get-github-archive($archive-url, $parent-collection, $destination-collection-name) { | |
let $zip-collection-name := local:download-and-unpack-zip-archive($archive-url, $parent-collection) | |
let $zip-collection := $parent-collection || "/" || $zip-collection-name | |
let $rename := xmldb:rename($zip-collection, $destination-collection-name) | |
return | |
<result>Successfully downloaded { $archive-url } to { $parent-collection || "/" || $destination-collection-name }</result> | |
}; | |
let $archive-url := "https://github.com/scta-texts/summahalensis/archive/master.zip" | |
let $destination := "/db/apps/scta-data/test/test" | |
let $destination-collection-name := "summahalensis" | |
return | |
local:get-github-archive($archive-url, $destination, $destination-collection-name) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
xquery version "3.1"; | |
module namespace hd="http://joewiz.org/ns/xquery/http-download"; | |
import module namespace hc="http://expath.org/ns/http-client"; | |
import module namespace util="http://exist-db.org/xquery/util"; | |
import module namespace xmldb="http://exist-db.org/xquery/xmldb"; | |
(: downloads a file from a remote HTTP server at $file-url and save it to an eXist-db $collection. | |
: we try hard to recognize XML files and save them with the correct mimetype so that eXist-db can | |
: efficiently index and query the files; if it doesn't appear to be XML, though, we just trust | |
: the response headers :) | |
declare function hd:download($file-url as xs:string, $collection as xs:string) as item()* { | |
let $request := <hc:request href="{$file-url}" method="GET"/> | |
let $response := hc:send-request($request) | |
let $head := $response[1] | |
(: These sample responses from EXPath HTTP client reveals where the response code, media-type, and filename can be found: | |
<hc:response xmlns:http="http://expath.org/ns/http-client" status="200" message="OK"> | |
<hc:header name="connection" value="close"/> | |
<hc:header name="transfer-encoding" value="chunked"/> | |
<hc:header name="content-type" value="application/zip"/> | |
<hc:header name="content-disposition" value="attachment; filename=xqjson-master.zip"/> | |
<hc:header name="date" value="Sat, 06 Jul 2013 05:59:04 GMT"/> | |
<hc:body media-type="application/zip"/> | |
</hc:response> | |
<hc:response xmlns:http="http://expath.org/ns/http-client" status="200" message="OK"> | |
<hc:header name="date" value="Sat, 06 Jul 2013 06:26:34 GMT"/> | |
<hc:header name="server" value="GitHub.com"/> | |
<hc:header name="content-type" value="text/plain; charset=utf-8"/> | |
<hc:header name="status" value="200 OK"/> | |
<hc:header name="content-disposition" value="inline"/> | |
<hc:header name="content-transfer-encoding" value="binary"/> | |
<hc:header name="etag" value=""a6782b6125583f16632fa103a828fdd6""/> | |
<hc:header name="vary" value="Accept-Encoding"/> | |
<hc:header name="cache-control" value="private"/> | |
<hc:header name="keep-alive" value="timeout=10, max=50"/> | |
<hc:header name="connection" value="Keep-Alive"/> | |
<hc:body media-type="text/plain"/> | |
</hc:response> | |
:) | |
return | |
(: check to ensure the remote server indicates success :) | |
if ($head/@status = '200') then | |
(: try to get the filename from the content-disposition header, otherwise construct from the $file-url :) | |
let $filename := | |
if (contains($head/hc:header[@name='content-disposition']/@value, 'filename=')) then | |
$head/hc:header[@name='content-disposition']/@value/substring-after(., 'filename=') | |
else | |
(: use whatever comes after the final / as the file name:) | |
replace($file-url, '^.*/([^/]*)$', '$1') | |
(: override the stated media type if the file is known to be .xml :) | |
let $media-type := $head/hc:body/@media-type | |
let $mime-type := | |
if (ends-with($file-url, '.xml') and $media-type = 'text/plain') then | |
'application/xml' | |
else | |
$media-type | |
(: if the file is XML and the payload is binary, we need convert the binary to string :) | |
let $content-transfer-encoding := $head/hc:body[@name = 'content-transfer-encoding']/@value | |
let $body := $response[2] | |
let $file := | |
if (ends-with($file-url, '.xml') and $content-transfer-encoding = 'binary') then | |
util:binary-to-string($body) | |
else | |
$body | |
return | |
xmldb:store($collection, $filename, $file, $mime-type) | |
else | |
<error> | |
<message>Oops, something went wrong:</message> | |
{$head} | |
</error> | |
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
xquery version "3.0"; | |
(:~ | |
: A module for unzipping files stored in the database | |
: | |
: @author Joe Wicentowski | |
:) | |
module namespace unzip = "http://joewiz.org/ns/xquery/unzip"; | |
import module namespace compression = "http://exist-db.org/xquery/compression"; | |
import module namespace functx = "http://www.functx.com"; | |
import module namespace xmldb = "http://exist-db.org/xquery/xmldb"; | |
(: Helper function of unzip:mkcol() :) | |
declare %private function unzip:mkcol-recursive($collection, $components) as xs:string* { | |
if (exists($components)) then | |
let $newColl := concat($collection, "/", $components[1]) | |
return ( | |
xmldb:create-collection($collection, $components[1]), | |
if ($components[2]) then | |
unzip:mkcol-recursive($newColl, subsequence($components, 2)) | |
else () | |
) | |
else | |
() | |
}; | |
(: Helper function to recursively create a collection hierarchy. :) | |
declare %private function unzip:mkcol($collection, $path) as xs:string* { | |
unzip:mkcol-recursive($collection, tokenize($path, "/") ! xmldb:encode(.)) | |
}; | |
(: Helper function to recursively create a collection hierarchy. :) | |
declare %private function unzip:mkcol($path) as xs:string* { | |
unzip:mkcol('/db', substring-after($path, "/db/")) | |
}; | |
(: Helper function to allow all zip entries through :) | |
declare %private function unzip:allow-all-entries-through($path as xs:string, $data-type as xs:string, $param as item()*) as xs:boolean { | |
true() | |
}; | |
(: Helper function to store zip file data :) | |
declare %private function unzip:store-entry($path as xs:string, $data-type as xs:string, $data as item()?, $param as item()*) as element() { | |
let $unzip-base-collection := $param[@name="unzip-base-collection"]/@value | |
return | |
if ($data-type = 'folder') then | |
let $mkcol := unzip:mkcol($unzip-base-collection, $path) | |
return | |
<entry path="{$path}" data-type="{$data-type}"/> | |
else (: if ($data-type = 'resource') :) | |
let $resource-collection := concat($unzip-base-collection, '/', xmldb:encode(functx:substring-before-last($path, '/'))) | |
let $resource-filename := if (contains($path, '/')) then functx:substring-after-last($path, '/') else $path | |
let $resource-filename := xmldb:encode($resource-filename) | |
return | |
try { | |
let $collection-check := if (xmldb:collection-available($resource-collection)) then () else unzip:mkcol($resource-collection) | |
let $store := xmldb:store($resource-collection, $resource-filename, $data) | |
return | |
<entry path="{$path}" data-type="{$data-type}"/> | |
} | |
catch * { | |
<error path="{$path}">{concat('Error storing ', $path, ': ', $err:code, $err:value, $err:description)}</error> | |
} | |
}; | |
(: Helper function to list zip file contents :) | |
declare %private function unzip:list-entry($path as xs:string, $data-type as xs:string, $data as item()?, $param as item()*) as element(entry) { | |
<entry path="{$path}" data-type="{$data-type}"/> | |
}; | |
(:~ | |
: Lists contents of zip file | |
: | |
: @param $resource the full db path to the zip file | |
: @returns a node listing the names of all resources in the zip file | |
:) | |
declare function unzip:list($resource as xs:string) as element(entries) { | |
let $file := if (util:binary-doc-available($resource)) then util:binary-doc($resource) else error(xs:QName('unzip'), concat($resource, ' does not exist or is not a valid binary file')) | |
let $entry-filter := unzip:allow-all-entries-through#3 | |
let $entry-filter-params := () | |
let $entry-data := unzip:list-entry#4 | |
let $entry-data-params := () | |
let $entries := compression:unzip($file, $entry-filter, $entry-filter-params, $entry-data, $entry-data-params) | |
return | |
<entries count="{count($entries)}">{$entries}</entries> | |
}; | |
(:~ | |
: Unzips a zip file. Contents are stored in the same collection as the zip file. | |
: | |
: @param $zip-file the full db path to the zip file | |
: @returns the paths of each successfully stored file or errors describing entries that could not be stored | |
:) | |
declare function unzip:unzip($zip-file as xs:string) as element(entries) { | |
let $zip-file-collection := functx:substring-before-last($zip-file, '/') | |
let $target-collection := $zip-file-collection | |
return | |
unzip:unzip($zip-file, $target-collection) | |
}; | |
(:~ | |
: Unzips a zip file. Contents are stored into $target-collection. | |
: | |
: @param $zip-file the full db path to the zip file | |
: @param $target-collection where the zip file contents will be stored | |
: @returns the paths of each successfully stored file or errors describing entries that could not be stored | |
:) | |
declare function unzip:unzip($zip-file as xs:string, $target-collection as xs:string) as element(entries) { | |
let $file := if (util:binary-doc-available($zip-file)) then util:binary-doc($zip-file) else error(xs:QName('unzip'), concat($zip-file, ' does not exist or is not a valid binary file')) | |
let $unzip-base-collection := if (xmldb:collection-available($target-collection)) then $target-collection else unzip:mkcol($target-collection)[last()] | |
let $entry-filter := unzip:allow-all-entries-through#3 | |
let $entry-filter-params := () | |
let $entry-data := unzip:store-entry#4 | |
let $entry-data-params := <param name="unzip-base-collection" value="{$unzip-base-collection}"/> | |
let $results := compression:unzip($file, $entry-filter, $entry-filter-params, $entry-data, $entry-data-params) | |
return | |
<entries target-collection="{$target-collection}" count-stored="{count($results[self::entry])}" count-unable-to-store="{count($results[self::error])}">{$results}</entries> | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment