Created
August 22, 2016 21:55
-
-
Save joewiz/ed624a43cb6f9e15cc44b6e30553ad5e to your computer and use it in GitHub Desktop.
Convert YAML to XML, with XQuery
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
xquery version "3.0"; | |
(: doesn't support YAML indentation yet - just a start :) | |
declare function local:process-yaml-value($value) { | |
let $single-quote := "^'(.+)'$" | |
let $double-quote := '^"(.+)"$' | |
return | |
if (matches($value, $single-quote) or matches($value, $double-quote)) then | |
let $pattern := "^['""](.+)['""]$" | |
return | |
analyze-string($value, $pattern)//fn:group[1]/string() | |
else | |
$value | |
}; | |
declare function local:process-yaml-value-list($line, $lines) { | |
let $value := "^- (.+)$" | |
return | |
if (matches($line, $value)) then | |
( | |
let $match-groups := analyze-string($line, $value)//fn:group | |
return | |
element value { local:process-yaml-value($match-groups[1]/string()) } | |
, | |
local:process-yaml-value-list(head($lines), tail($lines)) | |
) | |
else | |
() | |
}; | |
declare function local:process-yaml-lines($line, $lines) { | |
let $header := "^---$" | |
let $name := "^([^:]+):$" | |
let $value := "^- (.+)$" | |
let $name-value := "^([^:]+): (.+)$" | |
return | |
if (empty($line)) then | |
() | |
else if (matches($line, $header)) then | |
local:process-yaml-lines(head($lines), tail($lines)) | |
else if (matches($line, $name-value)) then | |
let $match-groups := analyze-string($line, $name-value)//fn:group | |
return | |
( | |
element object { | |
element name { $match-groups[1]/string() }, | |
element value { local:process-yaml-value($match-groups[2]/string()) } | |
} | |
, | |
local:process-yaml-lines(head($lines), tail($lines)) | |
) | |
else if (matches($line, $name)) then | |
let $match-groups := analyze-string($line, $name)//fn:group | |
let $name-values := | |
element object { | |
element name { $match-groups[1]/string() }, | |
local:process-yaml-value-list(head($lines), tail($lines)) | |
} | |
let $value-count := count($name-values/value) | |
let $next-lines := subsequence($lines, $value-count + 1) | |
return | |
( | |
$name-values, | |
local:process-yaml-lines(head($next-lines), tail($next-lines)) | |
) | |
else | |
( | |
element error { | |
element message { "Sorry, this didn't match any of our patterns" }, | |
element line {$line} | |
}, | |
local:process-yaml-lines(head($lines), tail($lines)) | |
) | |
}; | |
declare function local:yaml-to-xml($yaml as xs:string) { | |
let $lines := tokenize($yaml, '\n') | |
return | |
<objects>{ local:process-yaml-lines(head($lines), tail($lines)) }</objects> | |
}; | |
let $yaml := "--- | |
layout: post | |
title: About HistoryAtState | |
date: '2012-10-15T15:52:00-04:00' | |
tags: | |
- FRUS | |
- U.S. Department of State | |
tumblr_url: http://historyatstate.tumblr.com/post/33656405719/abouthistoryatstate | |
---" | |
return local:yaml-to-xml($yaml) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<objects> | |
<object> | |
<name>layout</name> | |
<value>post</value> | |
</object> | |
<object> | |
<name>title</name> | |
<value>About HistoryAtState</value> | |
</object> | |
<object> | |
<name>date</name> | |
<value>2012-10-15T15:52:00-04:00</value> | |
</object> | |
<object> | |
<name>tags</name> | |
<value>FRUS</value> | |
<value>U.S. Department of State</value> | |
</object> | |
<object> | |
<name>tumblr_url</name> | |
<value>http://historyatstate.tumblr.com/post/33656405719/abouthistoryatstate</value> | |
</object> | |
</objects> |
@lcahlander Today I'd probably investigate using a tool like https://github.com/oxygenxml/oxygen-resources-converter to transform YAML into JSON, and then use XQuery's parse-json
or json-doc
for subsequent querying/transformation.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
If you use a yaml file of significantly length, then the stack overflows.