Skip to content

Instantly share code, notes, and snippets.

@joewiz
Last active August 30, 2023 23:19
Show Gist options
  • Save joewiz/7581205ab5be46eaa25fe223acda42c3 to your computer and use it in GitHub Desktop.
Save joewiz/7581205ab5be46eaa25fe223acda42c3 to your computer and use it in GitHub Desktop.
Convert CSV to XML, with XQuery
xquery version "3.1";
(: XQuery adaptation of https://github.com/digital-preservation/csv-tools/blob/master/csv-to-xml_v3.xsl.
See also the thread on basex-talk https://mailman.uni-konstanz.de/pipermail/basex-talk/2016-September/011272.html.
:)
declare function local:get-cells($row as xs:string) {
(: workaround for lack of lookahead support: append comma to end of row :)
let $string-to-analyze := $row || ","
let $analyze := fn:analyze-string($string-to-analyze, '(("[^"]*")+|[^,]*),')
for $group in $analyze//fn:group[@nr="1"]
return
if (matches($group, '^".+"$')) then
replace($group, '^"([^"]+)"$', '$1')
else
$group/string()
};
let $csv := 'Author,Title,ISBN,Binding,Year Published
Jeannette Walls,The Glass Castle,074324754X,Paperback,2006
James Surowiecki,The Wisdom of Crowds,9780385503860,Paperback,2005
Lawrence Lessig,The Future of Ideas,9780375505782,Paperback,2002
"Larry Bossidy, Ram Charan, Charles Burck",Execution,9780609610572,Hardcover,2002
Kurt Vonnegut,Slaughterhouse-Five,9780791059258,Paperback,1999'
let $lines := tokenize($csv, '\n')
let $header-row := fn:head($lines)
let $body-rows := fn:tail($lines)
let $headers := local:get-cells($header-row) ! replace(., '\s+', '_')
return
element csv {
for $row in $body-rows
let $cells := local:get-cells($row)
return
element row {
for $cell at $count in $cells
return element {$headers[$count]} {$cell}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment