aperezdc/lpegxml.lua

## lpegxml.lua
--
-- LPeg-based XML parser.
--
-- * Grammar term names are the same as in the XML 1.1
--   specification: http://www.w3.org/TR/xml11/
-- * Action functions are missing.
--
-- Copyright (C) 2012 Adrian Perez <aperez@igalia.com>
-- Distribute under terms of the MIT license.
--

local lpeg = require "lpeg"
local V, R, S, P = lpeg.V, lpeg.R, lpeg.S, lpeg.P
local grammar = { "document" }

do local _ENV = grammar

  -- S ::= (#x20 | #x9 | #xD | #xA)+
  SS = (S " \t\r\n")^1
  SSopt = SS ^ -1

  -- NameStartChar ::= ":" | [A-Z] | "_" | [a-z]
  NameStartChar = S ":_" + R ("az", "AZ")

  -- NameChar ::= NameStartChar | "-" | "." | [0-9]
  NameChar = NameStartChar + S "-." + R "09"

  -- Name ::= NameStartChar (NameChar)*
  Name = NameStartChar * NameChar^0

  -- Names ::= Name (#x20 Name)*
  Names = Name * (" " * Name)^0

  -- Nmtoken ::= (NameChar)+
  Nmtoken = NameChar^1

  -- Nmtokens ::= Nmtoken (#x20 Nmtoken)*
  Nmtokens = Nmtoken * (" " * Nmtoken)^0

  -- CharRef ::= '&#' [0-9]+ ';'
  --           | '&#x' [0-9a-fA-F]+ ';'
  CharRef = ("&#"  * (R "09")^1 * ";")
          + ("&#x" * (R ("09", "af", "AF"))^1 * ";")

  -- EntityRef   ::= '&' Name ';'
  -- PEReference ::= '%' Name ';'
  -- Reference   ::= EntityRef | CharRef
  --
  EntityRef   = "&" * Name * ";"
  PEReference = "%" * Name * ";"
  Reference   = EntityRef + CharRef

  -- EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
  --               | "'" ([^%&'] | PEReference | Reference)* "'"
  EntityValue = ('"' * ((1 - S '%&"') + PEReference + Reference)^0 * '"')
              + ("'" * ((1 - S "%&'") + PEReference + Reference)^0 * "'")

  -- AttValue ::= '"' ([^<&"] | Reference)* '"'
  --            | "'" ([^<&'] | Reference)* "'"
  AttValue = ('"' * ((1 - S '<&"') + Reference)^0 * '"')
           + ("'" * ((1 - S "<&'") + Reference)^0 * "'")

  -- SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
  SystemLiteral = ('"' * (1 - P '"')^0 * '"')
                + ("'" * (1 - P "'")^0 * "'")

  -- PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
  PubidChar = S " \r\n-'()+,./:=?;!*#@$_%" + R ("az", "AZ", "09")

  -- PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
  PubidLiteral = ('"' * PubidChar^0 * '"')
               + ("'" * (PubidChar - "'")^0 * "'")

  -- CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
  CharData = (1 - (S "<&" + "]]>"))^0

  -- Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
  Comment = "<!--"
          * ((1 - S "-") + ("-" * (1 - S "-")))^0
          * "-->"

  -- PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
  PITarget = Name - (S "xX" * S "mM" * S "lL")

  -- PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
  PI = "<?"
     * PITarget
     * (SS * (1 - P "?>")^1)^0
     * "?>"

  -- CDSect  ::= CDStart CData CDEnd
  -- CDStart ::= '<![CDATA['
  -- CData   ::= (Char* - (Char* ']]>' Char*))
  -- CDEnd   ::= ']]>'
  CData  = (1 - P "]]>")^0
  CDSect = "<![CDATA[" * CData * "]]>"

  -- prolog ::= XMLDecl Misc* (doctypedecl Misc*)?
  prolog = (V "XMLDecl") ^ -1
         * (V "Misc")^0
         * (V "doctypedecl" * (V "Misc")^0) ^ -1

  -- Eq ::= S? '=' S?
  Eq = SSopt * "=" * SSopt

  -- SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
  SDDecl = SS
         * "standalone"
         * Eq
         * ( ("'" * (P "yes" + "no") * "'")
           + ('"' * (P "yes" + "no") * '"')
           )

  -- XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
  XMLDecl = "<?xml"
          * V "VersionInfo"
          * (V "EncodingDecl") ^ -1
          * SDDecl ^ -1
          * SSopt
          * "?>"

  -- VersionNum ::= '1.0' | '1.1'
  VersionNum = P "1.0"
             + P "1.1"

  -- VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
  VersionInfo = SS
              * "version"
              * Eq
              * ( ("'" * VersionNum * "'")
                + ('"' * VersionNum * '"')
                )

  -- Misc ::= Comment | PI | S
  Misc = Comment + PI + SS

  -- doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
  doctypedecl = "<!DOCTYPE"
              * SS
              * Name
              * (SS * V "ExternalID") ^ -1
              * SSopt
              * ( "["
                * V "intSubset"
                * "]"
                * SSopt
                ) ^ -1
              * ">"

  -- document ::= ( prolog element Misc* )
  document = prolog * V "element" * Misc^0

  -- DeclSep ::= PEReference | S
  DeclSep = PEReference + SS

  -- choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
  -- seq    ::= '(' S? cp ( S? ',' S? cp )* S? ')'
  choice = "(" * SSopt * V "cp" * (SSopt * "|" * SSopt * V "cp")^1 * SSopt * ")"
  seq    = "(" * SSopt * V "cp" * (SSopt * "," * SSopt * V "cp")^0 * SSopt * ")"

  -- cp ::= (Name | choice | seq) ('?' | '*' | '+')?
  cp = (Name + choice + seq) * (S "?*+") ^ -1

  -- children ::= (choice | seq) ('?' | '*' | '+')?
  children = (choice + seq) * (S "?*+") ^ -1

  -- Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'
  Mixed = "(" * SSopt * "#PCDATA" * (SSopt * "|" * SSopt * Name)^0 * SSopt * ")*"
        + "(" * SSopt * "#PCDATA" * SSopt * ")"

  -- contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
  contentspec = P "EMPTY" + P "ANY" + Mixed + children

  -- elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
  elementdecl = "<!ELEMENT" * SS * Name * SS * contentspec * SSopt * ">"

  -- EnumeratedType ::= NotationType | Enumeration
  -- NotationType   ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
  -- Enumeration    ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
  -- AttType        ::= StringType | TokenizedType | EnumeratedType
  -- StringType     ::= 'CDATA'
  -- TokenizedTyp e ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES'
  --                  | 'NMTOKEN' | 'NMTOKENS'
  --
  NotationType = "NOTATION" * SS * "(" * SSopt * Name * (SSopt * "|" * SSopt * Name)^0 * SSopt * ")"
  Enumeration = "(" * SSopt * Nmtoken * (SSopt * "|" * SSopt * Nmtoken)^0 * SSopt * ")"
  AttType = P "CDATA"
          + P "ID"
          + P "IDREF"
          + P "IDREFS"
          + P "ENTITY"
          + P "ENTITIES"
          + P "NMTOKEN"
          + P "NMTOKENS"
          + NotationType
          + Enumeration

  -- DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
  DefaultDecl = P "#REQUIRED"
              + P "#IMPLIED"
              + (((P "#FIXED" * SS) ^ -1) * AttValue)

  -- AttDef ::= S Name S AttType S DefaultDecl
  AttDef = SS * Name * SS * AttType * SS * DefaultDecl

  -- AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
  AttlistDecl = "<!ATTLIST" * SS * Name * AttDef^0 * SSopt * ">"

  -- ExternalID ::= 'SYSTEM' S SystemLiteral
  --              | 'PUBLIC' S PubidLiteral S SystemLiteral
  --
  ExternalID = "SYSTEM" * SS * SystemLiteral
             + "PUBLIC" * SS * PubidLiteral * SS * SystemLiteral

  -- NDataDecl ::= S 'NDATA' S Name
  NDataDecl = SS * "NDATA" * SS * Name

  -- EntityDecl ::= GEDecl | PEDecl
  -- GEDecl     ::= '<!ENTITY' S Name S EntityDef S? '>'
  -- PEDecl     ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
  -- EntityDef  ::= EntityValue | (ExternalID NDataDecl?)
  -- PEDef      ::= EntityValue | ExternalID
  --
  PEDef      = EntityValue + ExternalID
  EntityDef  = EntityValue + (ExternalID * NDataDecl ^ -1)
  GEDecl     = "<!ENTITY" * SS * Name * SS * EntityDef * SSopt * ">"
  PEDecl     = "<!ENTITY" * SS * "%" * SS * Name * SS * PEDef * SSopt * ">"
  EntityDecl = GEDecl + PEDecl

  -- PublicID ::= 'PUBLIC' S PubidLiteral
  PublicID = "PUBLIC" * SS * PubidLiteral

  -- NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
  NotationDecl = "<!NOTATION" * SS * Name * SS * (ExternalID + PublicID) * SSopt * ">"

  -- markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
  markupdecl = elementdecl
             + AttlistDecl
             + EntityDecl
             + NotationDecl
             + PI
             + Comment

  -- conditionalSect    ::= includeSect | ignoreSect
  -- includeSect        ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
  -- ignoreSect         ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
  -- ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
  -- Ignore             ::= Char* - (Char* ('<![' | ']]>') Char*)
  --
  Ignore = (1 - (P "<![" + P "]]>"))
  ignoreSectContents = Ignore * ("<![" * V "ignoreSectContents" * "]]" * Ignore)^0
  conditionalSect = ("<![" * SSopt * "INCLUDE" * SSopt * "[" * V "extSubsetDecl" * "]]>")
                  + ("<![" * SSopt * "IGNORE"  * SSopt * "[" * ignoreSectContents^0 * "]]>")

  -- intSubset ::= (markupdecl | DeclSep)*
  intSubset = (markupdecl + DeclSep)^0

  -- extSubsetDecl ::= (markupdecl | conditionalSect | DeclSep)*
  extSubsetDecl = (markupdecl + conditionalSect + DeclSep)^0

  -- EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
  EncName = R ("AZ", "az") * ((R ("AZ", "az", "09") + S "._") + "-")^0

  -- EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
  EncodingDecl = SS
               * "encoding"
               * Eq
               * ( '"' * EncName * '"'
                 + "'" * EncName * "'"
                 )

  -- TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
  TextDecl = "<?xml"
           * VersionInfo ^ -1
           * EncodingDecl
           * SSopt
           * "?>"

  -- extSubset ::= TextDecl? extSubsetDecl
  extSubset = TextDecl ^ -1
            * extSubsetDecl

  -- Attribute ::= Name Eq AttValue
  Attribute = Name * Eq * AttValue

  -- STag ::= '<' Name (S Attribute)* S? '>'
  STag = "<" * Name * (SS * Attribute)^0 * SSopt * ">"

  -- ETag ::= '</' Name S? '>'
  ETag = "</" * Name * SSopt * ">"

  -- EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
  EmptyElemTag = "<" * Name * (SS * Attribute)^0 * SSopt * "/>"

  -- elementdecl ::= EmptyElemTag | STag content ETag
  element = EmptyElemTag
          + (STag * V "content" * ETag)

  --  content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
  content = CharData ^ -1
          * ( ( element
              + Reference
              + CDSect
              + PI
              + Comment
              )
            * CharData ^ -1
            ) ^ 0

end -- _ENV = grammar


d = P (grammar) / io.write
d:match (io.read "*a")

## xmldom.lua
---
-- XML parsing and DOM tree representation.
--
-- @copyright 2012 Adrian Perez <aperez@igalia.com>
-- @license Distributed under terms of the MIT license.
--

local setmetatable, getmetatable = setmetatable, getmetatable
local tpack, tremove, tconcat = table.pack, table.remove, table.concat
local print, ipairs, pairs, assert, type = print, ipairs, pairs, assert, type
local openfile, tostring = io.open, tostring
local sprintf = string.format

local lpeg = require "lpeg"
local V, R, S, P, C = lpeg.V, lpeg.R, lpeg.S, lpeg.P, lpeg.C
local Cg, Ct = lpeg.Cg, lpeg.Ct

local _M = {
  xml_grammar = { "document" }
}


local Object = {}

--- Clones an object.
--
-- Clones an object, returning a new one. The returned object will look
-- up missing attributes in the table in which `clone()` was called.
-- Optionally, a table from which to pick additional attributes can
-- be passed (n.b. it is equivalent to call @{Object:extend} on the
-- returned object).
--
-- @param t Table with additional attributes (optional).
-- @return New cloned object.
--
function Object:clone (t)
  local clone = {}
  setmetatable (clone, { __index = self })
  if type (t) == "table" then
    for k, v in pairs (t) do
      clone[k] = v
    end
  end
  return clone
end

--- Alias for @{Object:clone}
--
Object.extend = Object.clone

--- Gets the prototype of an object.
--
-- The prototype is the base object from which the object was cloned.
-- @return A table (the prototype) or `nil` (for the base object).
--
function Object:prototype ()
	local meta = getmetatable (self)
	return meta and meta.__index
end

--- Checks whether an object is derived from some other object.
--
-- **Note** that this function will traverse the object prototype
-- chain recursively, so it may be slow.
--
-- @param obj Reference object.
-- @return Whether the object derives from the reference object.
--
function Object:derives (obj)
	local meta = getmetatable (self)
	while true do
		-- No metatable, or no __index, means it's the base object
		if not (meta and meta.__index) then
			return false
		end
		-- Yup, this is derived
		if meta.__index == obj then
			return true
		end
		-- Climb up in the hierarchy
		meta = getmetatable (meta.__index)
	end
end


_M.Object = Object


local flatten_attributes
local outline
function outline (e, d)
  d = d or 0
  local indent = ("  "):rep (d)
  local attr = flatten_attributes (e.attributes)
  print (("%s%s [%s] <%s>"):format (indent, e.nodeType, attr, e.tagName or e.nodeValue))

  if e:hasChildNodes () then
    d = d + 1
    for i, v in ipairs (e.childNodes) do
      outline (v, d)
    end
  end
end


---
-- XML DOM node class.
--
-- This loosely follows the [HTML DOM element API]
-- (https://developer.mozilla.org/en/DOM/element), but deviates from
-- it on purpose, to make more convenient to use and more Lua-like.
--
-- @type Node
--
local Node = Object:clone
{
  ELEMENT_NODE                = "element";
  TEXT_NODE                   = "text";
  CDATA_SECTION_NODE          = "cdata";
  COMMENT_NODE                = "comment";
  PROCESSING_INSTRUCTION_NODE = "processinginstruction";
  DOCTYPE_NODE                = "doctype";
  XML_DECLARATION_NODE        = "xmldeclaration";
  DOCUMENT_NODE               = "document";

  nodeType   = nil;
  nodeValue  = nil;
  tagName    = nil;
  parentNode = nil;

  attributes = nil;
  childNodes = nil;

  -- nodeType == DOCTYPE_NODE
  doctypePublicIdentifier = nil;
  doctypeSystemIdentifier = nil;

  -- nodeType == XML_DECLARATION_NODE
  xmlVersion    = nil;
  xmlEncoding   = nil;
  xmlStandalone = nil;
}

---
-- Returns the number of child elements of a node.
--
function Node:childElementCount ()
  return self.childNodes and #self.childNodes or 0
end

---
-- Appends a child to a node.
--
-- @param child Node to be appended.
--
function Node:appendChild (child)
  if self.childNodes == nil then
    self.childNodes = {}
  end
  self.childNodes[#self.childNodes + 1] = child
  child.parentNode = self
end

---
-- Removes a child from a node.
--
-- @param child Node to be removed. If it is not a child, nothing is done.
--
function Node:removeChild (child)
  if child.parentNode ~= self or self.childNodes == nil then
    return
  end

  local pos = nil
  if self.childNodes ~= nil then
    for i, value in ipairs (self.childNodes) do
      if value == child then
        pos = i
        break
      end
    end
  end

  if pos ~= nil then
    tremove (self.childNodes, pos)
    child.parentNode = nil
  end
end

---
-- Checks whether a node has children.
--
function Node:hasChildNodes ()
  return self.childNodes ~= nil and #self.childNodes > 0
end


local escape_replacements = {
  ["&"] = "&amp;";
  ["<"] = "&lt;";
}

local function escape_match (capture)
  return escape_replacements[capture] or capture
end

local function escape (text)
  return text:gsub ("&%a*;?", escape_match)
end

local flatten_node

local function flatten_document (elt, output)
  for _, child in ipairs (elt.childNodes) do
    flatten_node (child, output)
  end
end

local function flatten_cdata (elt, output)
  output[#output+1] = "<![CDATA["
  output[#output+1] = elt.nodeValue
  output[#output+1] = "]]>\n"
end

local function flatten_comment (elt, output)
  output[#output+1] = "<!-- " .. elt.nodeValue .. " -->"
end

local function flatten_proc_ins (elt, output)
  output[#output+1] = "<?" .. elt.tagName .. " "
  output[#output+1] = elt.nodeValue
  output[#output+1] = "?>\n"
end

local function flatten_text (elt, output)
  output[#output+1] = escape (elt.nodeValue)
end

local function flatten_xml_decl (elt, output)
  output[#output+1] = "<?xml version=\"" .. elt.xmlVersion
  output[#output+1] = "\" encoding=\"" .. elt.xmlEncoding
  if elt.xmlStandalone ~= nil then
    output[#output+1] = "\" standalone=\""
    output[#output+1] = elt.xmlStandalone and "yes" or "no"
  end
  output[#output+1] = "\"?>\n"
end

local function flatten_doctype (elt, output)
  output[#output+1] = "<!DOCTYPE " .. elt.tagName
  if elt.doctypeKind ~= nil then
    output[#output+1] = elt.doctypeKind .. " \""
    if elt.doctypeKind == "SYSTEM" then
      output[#output+1] = elt.doctypeSystemIdentifier
    else
      output[#output+1] = elt.doctypePublicIdentifier
      output[#output+1] = "\" \""
      output[#output+1] = elt.doctypeSystemIdentifier
    end
    output[#output+1] = "\""
  end
  output[#output+1] = ">\n"
end

function flatten_attributes (attributes)
  local attr = {}
  if attributes == nil then
    return ""
  end
  for name, value in pairs (attributes) do
    -- TODO Escaping of characters
    attr[#attr+1] = name .. "=\"" .. escape (value) .. "\""
  end
  return tconcat (attr, " ")
end

local function flatten_element (elt, output)
  output[#output+1] = "<" .. elt.tagName

  local attr = flatten_attributes (elt.attributes)
  if #attr > 0 then
    output[#output+1] = " " .. attr
  end

  if elt:hasChildNodes () then
    output[#output+1] = ">"
    for _, child in ipairs (elt.childNodes) do
      flatten_node (child, output)
    end
    output[#output+1] = "</" .. elt.tagName .. ">"
  else
    output[#output+1] = "/>"
  end
end

local flatteners =
{
  [Node.DOCUMENT_NODE]               = flatten_document;
  [Node.CDATA_SECTION_NODE]          = flatten_cdata;
  [Node.COMMENT_NODE]                = flatten_comment;
  [Node.PROCESSING_INSTRUCTION_NODE] = flatten_proc_ins;
  [Node.TEXT_NODE]                   = flatten_text;
  [Node.XML_DECLARATION_NODE]        = flatten_xml_decl;
  [Node.DOCTYPE_NODE]                = flatten_doctype;
  [Node.ELEMENT_NODE]                = flatten_element;
}

function flatten_node (treeish, output)
  local flatten_func = assert (flatteners[treeish.nodeType],
                               "Unknown node type: " ..
                               tostring (treeish.nodeType))
  flatten_func (treeish, output)
end


function _M.dump (treeish, output)
  local result = {}
  flatten_node (treeish, result)
  result[#result+1] = "\n"
  if output == nil then
    return tconcat (result)
  else
    local fd
    if type (output) == "string" then
      fd = assert (openfile (output, "wb"))
    else
      fd = output
    end

    for _, line in ipairs (output) do
      fd:write (line)
    end

    if fd ~= output then
      fd:close ()
    end
  end
end


--
-- The "action" table contains parsing actions. Those functions pick the
-- relevant captures defined in the XML grammar and build up a DOM tree
-- composed out of "Node" instances.
--
local action = {}

function action.comment (text)
  return Node:clone {
    nodeType  = Node.COMMENT_NODE;
    nodeValue = text;
  }
end

function action.processing_instruction (name, content)
  return Node:clone {
    nodeType  = Node.PROCESSING_INSTRUCTION_NODE;
    nodeValue = content;
    tagName   = name;
  }
end

function action.cdata (content)
  return Node:clone {
    nodeType  = Node.CDATA_SECTION_NODE;
    nodeValue = content;
  }
end

function action.text (content)
  return Node:clone {
    nodeType  = Node.TEXT_NODE;
    nodeValue = content;
  }
end

-- The "data" table is optional:
--
--   data[1] contains the type of reference: SYSTEM / PUBLIC
--   data[2] and data[3] contain the rest of information
--
-- The rest of contents of DOCTYPE declarations are completely
-- ignored -- yet they are parsed and validated by LPeg.
--
function action.doctype (tag, data)
  local node = Node:clone {
    nodeType = Node.DOCTYPE_NODE;
    tagName  = tag;
  }
  if data then
    assert (type (data) == "table",
            "argument #2 to action.doctype is not a table")
    node.doctypeKind = data[1]
    if node.doctypeKind == "SYSTEM" then
      node.doctypeSystemIdentifier = data[2]
    else
      assert (node.doctypeKind == "PUBLIC",
              "DOCTYPE kind has to be either PUBLIC or SYSTEM")
      node.doctypePublicIdentifier = data[2]
      node.doctypeSystemIdentifier = data[3]
    end
  end
  return node
end

function action.xml_declaration (version, encoding, standalone)
  return Node:clone {
    nodeType      = Node.XML_DECLARATION_NODE;
    xmlStandalone = standalone ~= "" and standalone or nil;
    xmlEncoding   = encoding ~= "" and encoding or "utf-8";
    xmlVersion    = version;
  }
end

function action.document (prolog, rootnode, epilog)
  local doc = Node:clone { nodeType = Node.DOCUMENT_NODE }
  for i, node in ipairs (prolog) do
    doc:appendChild (node)
  end
  doc:appendChild (rootnode)
  for i, node in ipairs (epilog) do
    doc:appendChild (node)
  end
  return doc
end

function action.empty_element_tag (tag, ...)
  local arg = tpack (...)
  local att = {}

  for i = 1, arg.n, 2 do
    att[arg[i]] = arg[i+1]
  end

  return Node:clone {
    nodeType   = Node.ELEMENT_NODE;
    tagName    = tag;
    attributes = att;
  }
end

function action.element_tag (stag, arg, ...)
  local child = tpack (...)
  assert (stag == child[child.n],
          "Start tag does not match end tag ("..stag..", "..child[child.n]..")")
  tremove (child)

  local att = {}
  for i = 1, arg.n or #arg, 2 do
    att[arg[i]] = arg[i+1]
  end

  local elt = Node:clone {
    nodeType   = Node.ELEMENT_NODE;
    tagName    = stag;
    attributes = att;
  }

  for i, node in ipairs (child) do
    elt:appendChild (node)
  end

  return elt
end


local entity_refs = {
  ["&lt;"]  = "<";
  ["&amp;"] = "&";
}

function action.entity_reference (name)
  return entity_refs[name] or name
end

function action.captures (...)
  local args = { ... }
  local FMT = "[%i] [1;1m>[0;0m%s[1;1m<[0;0m"
  local FN  = "<%s[%s]>"
  for i, v in ipairs (args) do
    if type (v) == "table" then
      if v.nodeType then
        v = FN:format (v.nodeType, v.tagName)
      else
        v = lib.ml.tstring (v)
      end
    end
    print (FMT:format (i, v))
  end
  return { ... }
end


--
-- XML grammar using LPeg. It uses the functions in the "action" table to
-- build a DOM tree.
--
do local _ENV = _M.xml_grammar

  -- S ::= (#x20 | #x9 | #xD | #xA)+
  SS = (S " \t\r\n")^1
  SSopt = SS ^ -1

  -- NameStartChar ::= ":" | [A-Z] | "_" | [a-z]
  NameStartChar = S ":_" + R ("az", "AZ")

  -- NameChar ::= NameStartChar | "-" | "." | [0-9]
  NameChar = NameStartChar + S "-." + R "09"

  -- Name ::= NameStartChar (NameChar)*
  Name = NameStartChar * NameChar^0

  -- Names ::= Name (#x20 Name)*
  Names = Name * (" " * Name)^0

  -- Nmtoken ::= (NameChar)+
  Nmtoken = NameChar^1

  -- Nmtokens ::= Nmtoken (#x20 Nmtoken)*
  Nmtokens = Nmtoken * (" " * Nmtoken)^0

  -- CharRef ::= '&#' [0-9]+ ';'
  --           | '&#x' [0-9a-fA-F]+ ';'
  CharRef = ("&#"  * (R "09")^1 * ";")
          + ("&#x" * (R ("09", "af", "AF"))^1 * ";")

  -- EntityRef ::= '&' Name ';'
  --EntityRef = "&" * C( Name ) * ";"
  --          / action.entity_reference
  EntityRef = "&" * Name * ";"
            / action.entity_reference

  -- PEReference ::= '%' Name ';'
  PEReference = "%" * Name * ";"

  -- Reference ::= EntityRef | CharRef
  Reference = EntityRef + CharRef

  -- EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
  --               | "'" ([^%&'] | PEReference | Reference)* "'"
  EntityValue = ('"' * ((1 - S '%&"') + PEReference + Reference)^0 * '"')
              + ("'" * ((1 - S "%&'") + PEReference + Reference)^0 * "'")

  -- AttValue ::= '"' ([^<&"] | Reference)* '"'
  --            | "'" ([^<&'] | Reference)* "'"
  AttValue = ( ('"' * Ct( (C(1 - S '<&"')^1 + Reference)^0 ) * '"')
             + ("'" * Ct( (C(1 - S "<&'")^1 + Reference)^0 ) * "'")
             ) / tconcat

  -- SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
  SystemLiteral = ('"' * C( (1 - P '"')^0 ) * '"')
                + ("'" * C( (1 - P "'")^0 ) * "'")

  -- PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
  PubidChar = S " \r\n-'()+,./:=?;!*#@$_%" + R ("az", "AZ", "09")

  -- PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
  PubidLiteral = ('"' * C( PubidChar^0 ) * '"')
               + ("'" * C( (PubidChar - "'")^0 ) * "'")

  -- CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
  CharData = C( (1 - (S "<&" + "]]>"))^1 )

  -- Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
  Comment = "<!--"
          * C( ((1 - S "-") + ("-" * (1 - S "-")))^0 )
          * "-->"
          / action.comment

  -- PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
  PITarget = Name - (S "xX" * S "mM" * S "lL")

  -- PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
  PI = "<?"
     * C( PITarget )
     * (SS * C( (1 - P "?>")^1 )^0 )
     * "?>"
     / action.processing_instruction

  -- CDSect  ::= CDStart CData CDEnd
  -- CDStart ::= '<![CDATA['
  -- CData   ::= (Char* - (Char* ']]>' Char*))
  -- CDEnd   ::= ']]>'
  CData  = (1 - P "]]>")^0
  CDSect = "<![CDATA["
         * C( CData )
         * "]]>"
         / action.cdata

  -- prolog ::= XMLDecl Misc* (doctypedecl Misc*)?
  prolog = (V "XMLDecl") ^ -1
         * (V "Misc")^0
         * (V "doctypedecl" * (V "Misc")^0) ^ -1

  -- Eq ::= S? '=' S?
  Eq = SSopt * "=" * SSopt

  -- SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
  SDDecl = SS
         * "standalone"
         * Eq
         * ( ("'" * C(P "yes" + "no") * "'")
           + ('"' * C(P "yes" + "no") * '"')
           )
         / function (v) return v == "yes" end

  -- VersionNum ::= '1.0' | '1.1'
  VersionNum = P "1.0"
             + P "1.1"

  -- VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
  VersionInfo = SS
              * "version"
              * Eq
              * ( ("'" * C( VersionNum ) * "'")
                + ('"' * C( VersionNum ) * '"')
                )

  -- XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
  XMLDecl = "<?xml"
          * Cg( VersionInfo )
          * Cg( (V "EncodingDecl") ^ -1 )
          * Cg( SDDecl ^ -1 )
          * SSopt
          * "?>"
          / action.xml_declaration

  -- Misc ::= Comment | PI | S
  Misc = Comment + PI + SS

  -- doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
  doctypedecl = "<!DOCTYPE"
              * SS
              * C( Name )
              * (SS * V "ExternalID") ^ -1
              * SSopt
              * ( "["
                * V "intSubset"
                * "]"
                * SSopt
                ) ^ -1
              * ">"
              / action.doctype

  -- document ::= ( prolog element Misc* )
  document = Ct( prolog )
           * Cg( V "element" )
           * Ct( Misc^0 )
           / action.document

  -- DeclSep ::= PEReference | S
  DeclSep = PEReference + SS

  -- choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
  -- seq    ::= '(' S? cp ( S? ',' S? cp )* S? ')'
  choice = "(" * SSopt * V "cp" * (SSopt * "|" * SSopt * V "cp")^1 * SSopt * ")"
  seq    = "(" * SSopt * V "cp" * (SSopt * "," * SSopt * V "cp")^0 * SSopt * ")"

  -- cp ::= (Name | choice | seq) ('?' | '*' | '+')?
  cp = (Name + choice + seq) * (S "?*+") ^ -1

  -- children ::= (choice | seq) ('?' | '*' | '+')?
  children = (choice + seq) * (S "?*+") ^ -1

  -- Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'
  Mixed = "(" * SSopt * "#PCDATA" * (SSopt * "|" * SSopt * Name)^0 * SSopt * ")*"
        + "(" * SSopt * "#PCDATA" * SSopt * ")"

  -- contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
  contentspec = P "EMPTY" + P "ANY" + Mixed + children

  -- elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
  elementdecl = "<!ELEMENT" * SS * Name * SS * contentspec * SSopt * ">"

  -- EnumeratedType ::= NotationType | Enumeration
  -- NotationType   ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
  -- Enumeration    ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
  -- AttType        ::= StringType | TokenizedType | EnumeratedType
  -- StringType     ::= 'CDATA'
  -- TokenizedTyp e ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES'
  --                  | 'NMTOKEN' | 'NMTOKENS'
  --
  NotationType = "NOTATION" * SS * "(" * SSopt * Name * (SSopt * "|" * SSopt * Name)^0 * SSopt * ")"
  Enumeration = "(" * SSopt * Nmtoken * (SSopt * "|" * SSopt * Nmtoken)^0 * SSopt * ")"
  AttType = P "CDATA"
          + P "ID"
          + P "IDREF"
          + P "IDREFS"
          + P "ENTITY"
          + P "ENTITIES"
          + P "NMTOKEN"
          + P "NMTOKENS"
          + NotationType
          + Enumeration

  -- DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
  DefaultDecl = P "#REQUIRED"
              + P "#IMPLIED"
              + (((P "#FIXED" * SS) ^ -1) * AttValue)

  -- AttDef ::= S Name S AttType S DefaultDecl
  AttDef = SS * Name * SS * AttType * SS * DefaultDecl

  -- AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
  AttlistDecl = "<!ATTLIST" * SS * Name * AttDef^0 * SSopt * ">"

  -- ExternalID ::= 'SYSTEM' S SystemLiteral
  --              | 'PUBLIC' S PubidLiteral S SystemLiteral
  --
  ExternalID = C( P "SYSTEM" ) * SS * SystemLiteral
             + C( P "PUBLIC" ) * SS * PubidLiteral * SS * SystemLiteral
             / tpack

  -- NDataDecl ::= S 'NDATA' S Name
  NDataDecl = SS * "NDATA" * SS * Name

  -- EntityDecl ::= GEDecl | PEDecl
  -- GEDecl     ::= '<!ENTITY' S Name S EntityDef S? '>'
  -- PEDecl     ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
  -- EntityDef  ::= EntityValue | (ExternalID NDataDecl?)
  -- PEDef      ::= EntityValue | ExternalID
  --
  PEDef      = EntityValue + ExternalID
  EntityDef  = EntityValue + (ExternalID * NDataDecl ^ -1)
  GEDecl     = "<!ENTITY" * SS * Name * SS * EntityDef * SSopt * ">"
  PEDecl     = "<!ENTITY" * SS * "%" * SS * Name * SS * PEDef * SSopt * ">"
  EntityDecl = GEDecl + PEDecl

  -- PublicID ::= 'PUBLIC' S PubidLiteral
  PublicID = "PUBLIC" * SS * PubidLiteral

  -- NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
  NotationDecl = "<!NOTATION" * SS * Name * SS * (ExternalID + PublicID) * SSopt * ">"

  -- markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
  markupdecl = elementdecl
             + AttlistDecl
             + EntityDecl
             + NotationDecl
             + PI
             + Comment

  -- conditionalSect    ::= includeSect | ignoreSect
  -- includeSect        ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
  -- ignoreSect         ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
  -- ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
  -- Ignore             ::= Char* - (Char* ('<![' | ']]>') Char*)
  --
  Ignore = (1 - (P "<![" + P "]]>"))
  ignoreSectContents = Ignore * ("<![" * V "ignoreSectContents" * "]]" * Ignore)^0
  conditionalSect = ("<![" * SSopt * "INCLUDE" * SSopt * "[" * V "extSubsetDecl" * "]]>")
                  + ("<![" * SSopt * "IGNORE"  * SSopt * "[" * ignoreSectContents^0 * "]]>")

  -- intSubset ::= (markupdecl | DeclSep)*
  intSubset = (markupdecl + DeclSep)^0

  -- extSubsetDecl ::= (markupdecl | conditionalSect | DeclSep)*
  extSubsetDecl = (markupdecl + conditionalSect + DeclSep)^0

  -- EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
  EncName = R ("AZ", "az") * ((R ("AZ", "az", "09") + S "._") + "-")^0

  -- EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
  EncodingDecl = SS
               * "encoding"
               * Eq
               * ( '"' * C( EncName ) * '"'
                 + "'" * C( EncName ) * "'"
                 )

  -- TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
  TextDecl = "<?xml"
           * VersionInfo ^ -1
           * EncodingDecl
           * SSopt
           * "?>"

  -- extSubset ::= TextDecl? extSubsetDecl
  extSubset = TextDecl ^ -1
            * extSubsetDecl

  -- Attribute ::= Name Eq AttValue
  Attribute = C( Name )
            * Eq
            * Cg( AttValue )

  -- STag ::= '<' Name (S Attribute)* S? '>'
  STag = "<"
       * C( Name )
       * Ct( (SS * Attribute)^0 )
       * SSopt
       * ">"

  -- ETag ::= '</' Name S? '>'
  ETag = "</"
       * C( Name )
       * SSopt
       * ">"

  -- EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
  EmptyElemTag = "<"
               * C( Name )
               * Cg( (SS * Attribute)^0 )
               * SSopt
               * "/>"
               / action.empty_element_tag

  ElemTag = STag
          * Cg( V "content" )
          * ETag
          / action.element_tag

  -- elementdecl ::= EmptyElemTag | STag content ETag
  element = ElemTag + EmptyElemTag

  textContent = (Ct( (Reference + CharData)^1 ) / tconcat)
              / action.text

  --  content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
  content = textContent ^ -1
          * ( ( element
              + CDSect
              + PI
              + Comment
              )
            * textContent ^ -1
            ) ^ 0

end -- _ENV = _M.grammar


---
-- Parsing and dumping.
-- @section
--

---
-- Generates an outline out of a DOM tree. The outline is written
-- to the standard output stream
--
-- @param treeish DOM tree or subtree (a @{Node}).
-- @function outline
--
_M.outline = outline

_M.xml_peg = P (_M.xml_grammar)
_M.Node    = Node

---
-- Escapes certain characters using entity references.
--
-- @param text Text to be escaped.
-- @function escape
--
_M.escape = escape

---
-- Parses XML input into a DOM tree.
--
-- @param input Input XML string.
-- @return A DOM tree @{Node}.
--
function _M.parse (input)
  return _M.xml_peg:match (input)
end

function _M.P (prodname)
  local old = _M.xml_grammar[1]
  _M.xml_grammar[1] = prodname
  local peg = P (_M.xml_grammar)
  _M.xml_grammar[1] = old
  return peg
end

return _M
	--
	-- LPeg-based XML parser.
	--
	-- * Grammar term names are the same as in the XML 1.1
	-- specification: http://www.w3.org/TR/xml11/
	-- * Action functions are missing.
	--
	-- Copyright (C) 2012 Adrian Perez <aperez@igalia.com>
	-- Distribute under terms of the MIT license.
	--

	local lpeg = require "lpeg"
	local V, R, S, P = lpeg.V, lpeg.R, lpeg.S, lpeg.P
	local grammar = { "document" }

	do local _ENV = grammar

	-- S ::= (#x20 \| #x9 \| #xD \| #xA)+
	SS = (S " \t\r\n")^1
	SSopt = SS ^ -1

	-- NameStartChar ::= ":" \| [A-Z] \| "_" \| [a-z]
	NameStartChar = S ":_" + R ("az", "AZ")

	-- NameChar ::= NameStartChar \| "-" \| "." \| [0-9]
	NameChar = NameStartChar + S "-." + R "09"

	-- Name ::= NameStartChar (NameChar)*
	Name = NameStartChar * NameChar^0

	-- Names ::= Name (#x20 Name)*
	Names = Name * (" " * Name)^0

	-- Nmtoken ::= (NameChar)+
	Nmtoken = NameChar^1

	-- Nmtokens ::= Nmtoken (#x20 Nmtoken)*
	Nmtokens = Nmtoken * (" " * Nmtoken)^0

	-- CharRef ::= '&#' [0-9]+ ';'
	-- \| '&#x' [0-9a-fA-F]+ ';'
	CharRef = ("&#" * (R "09")^1 * ";")
	+ ("&#x" * (R ("09", "af", "AF"))^1 * ";")

	-- EntityRef ::= '&' Name ';'
	-- PEReference ::= '%' Name ';'
	-- Reference ::= EntityRef \| CharRef
	--
	EntityRef = "&" * Name * ";"
	PEReference = "%" * Name * ";"
	Reference = EntityRef + CharRef

	-- EntityValue ::= '"' ([^%&"] \| PEReference \| Reference)* '"'
	-- \| "'" ([^%&'] \| PEReference \| Reference)* "'"
	EntityValue = ('"' * ((1 - S '%&"') + PEReference + Reference)^0 * '"')
	+ ("'" * ((1 - S "%&'") + PEReference + Reference)^0 * "'")

	-- AttValue ::= '"' ([^<&"] \| Reference)* '"'
	-- \| "'" ([^<&'] \| Reference)* "'"
	AttValue = ('"' * ((1 - S '<&"') + Reference)^0 * '"')
	+ ("'" * ((1 - S "<&'") + Reference)^0 * "'")

	-- SystemLiteral ::= ('"' [^"]* '"') \| ("'" [^']* "'")
	SystemLiteral = ('"' * (1 - P '"')^0 * '"')
	+ ("'" * (1 - P "'")^0 * "'")

	-- PubidChar ::= #x20 \| #xD \| #xA \| [a-zA-Z0-9] \| [-'()+,./:=?;!*#@$_%]
	PubidChar = S " \r\n-'()+,./:=?;!*#@$_%" + R ("az", "AZ", "09")

	-- PubidLiteral ::= '"' PubidChar* '"' \| "'" (PubidChar - "'")* "'"
	PubidLiteral = ('"' * PubidChar^0 * '"')
	+ ("'" * (PubidChar - "'")^0 * "'")

	-- CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
	CharData = (1 - (S "<&" + "]]>"))^0

	-- Comment ::= '<!--' ((Char - '-') \| ('-' (Char - '-')))* '-->'
	Comment = "<!--"
	* ((1 - S "-") + ("-" * (1 - S "-")))^0
	* "-->"

	-- PITarget ::= Name - (('X' \| 'x') ('M' \| 'm') ('L' \| 'l'))
	PITarget = Name - (S "xX" * S "mM" * S "lL")

	-- PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
	PI = "<?"
	* PITarget
	* (SS * (1 - P "?>")^1)^0
	* "?>"

	-- CDSect ::= CDStart CData CDEnd
	-- CDStart ::= '<![CDATA['
	-- CData ::= (Char* - (Char* ']]>' Char*))
	-- CDEnd ::= ']]>'
	CData = (1 - P "]]>")^0
	CDSect = "<![CDATA[" * CData * "]]>"

	-- prolog ::= XMLDecl Misc* (doctypedecl Misc*)?
	prolog = (V "XMLDecl") ^ -1
	* (V "Misc")^0
	* (V "doctypedecl" * (V "Misc")^0) ^ -1

	-- Eq ::= S? '=' S?
	Eq = SSopt * "=" * SSopt

	-- SDDecl ::= S 'standalone' Eq (("'" ('yes' \| 'no') "'") \| ('"' ('yes' \| 'no') '"'))
	SDDecl = SS
	* "standalone"
	* Eq
	* ( ("'" * (P "yes" + "no") * "'")
	+ ('"' * (P "yes" + "no") * '"')
	)

	-- XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
	XMLDecl = "<?xml"
	* V "VersionInfo"
	* (V "EncodingDecl") ^ -1
	* SDDecl ^ -1
	* SSopt
	* "?>"

	-- VersionNum ::= '1.0' \| '1.1'
	VersionNum = P "1.0"
	+ P "1.1"

	-- VersionInfo ::= S 'version' Eq ("'" VersionNum "'" \| '"' VersionNum '"')
	VersionInfo = SS
	* "version"
	* Eq
	* ( ("'" * VersionNum * "'")
	+ ('"' * VersionNum * '"')
	)

	-- Misc ::= Comment \| PI \| S
	Misc = Comment + PI + SS

	-- doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
	doctypedecl = "<!DOCTYPE"
	* SS
	* Name
	* (SS * V "ExternalID") ^ -1
	* SSopt
	* ( "["
	* V "intSubset"
	* "]"
	* SSopt
	) ^ -1
	* ">"

	-- document ::= ( prolog element Misc* )
	document = prolog * V "element" * Misc^0

	-- DeclSep ::= PEReference \| S
	DeclSep = PEReference + SS

	-- choice ::= '(' S? cp ( S? '\|' S? cp )+ S? ')'
	-- seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
	choice = "(" * SSopt * V "cp" * (SSopt * "\|" * SSopt * V "cp")^1 * SSopt * ")"
	seq = "(" * SSopt * V "cp" * (SSopt * "," * SSopt * V "cp")^0 * SSopt * ")"

	-- cp ::= (Name \| choice \| seq) ('?' \| '*' \| '+')?
	cp = (Name + choice + seq) * (S "?*+") ^ -1

	-- children ::= (choice \| seq) ('?' \| '*' \| '+')?
	children = (choice + seq) * (S "?*+") ^ -1

	-- Mixed ::= '(' S? '#PCDATA' (S? '\|' S? Name)* S? ')*' \| '(' S? '#PCDATA' S? ')'
	Mixed = "(" * SSopt * "#PCDATA" * (SSopt * "\|" * SSopt * Name)^0 * SSopt * ")*"
	+ "(" * SSopt * "#PCDATA" * SSopt * ")"

	-- contentspec ::= 'EMPTY' \| 'ANY' \| Mixed \| children
	contentspec = P "EMPTY" + P "ANY" + Mixed + children

	-- elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
	elementdecl = "<!ELEMENT" * SS * Name * SS * contentspec * SSopt * ">"

	-- EnumeratedType ::= NotationType \| Enumeration
	-- NotationType ::= 'NOTATION' S '(' S? Name (S? '\|' S? Name)* S? ')'
	-- Enumeration ::= '(' S? Nmtoken (S? '\|' S? Nmtoken)* S? ')'
	-- AttType ::= StringType \| TokenizedType \| EnumeratedType
	-- StringType ::= 'CDATA'
	-- TokenizedTyp e ::= 'ID' \| 'IDREF' \| 'IDREFS' \| 'ENTITY' \| 'ENTITIES'
	-- \| 'NMTOKEN' \| 'NMTOKENS'
	--
	NotationType = "NOTATION" * SS * "(" * SSopt * Name * (SSopt * "\|" * SSopt * Name)^0 * SSopt * ")"
	Enumeration = "(" * SSopt * Nmtoken * (SSopt * "\|" * SSopt * Nmtoken)^0 * SSopt * ")"
	AttType = P "CDATA"
	+ P "ID"
	+ P "IDREF"
	+ P "IDREFS"
	+ P "ENTITY"
	+ P "ENTITIES"
	+ P "NMTOKEN"
	+ P "NMTOKENS"
	+ NotationType
	+ Enumeration

	-- DefaultDecl ::= '#REQUIRED' \| '#IMPLIED' \| (('#FIXED' S)? AttValue)
	DefaultDecl = P "#REQUIRED"
	+ P "#IMPLIED"
	+ (((P "#FIXED" * SS) ^ -1) * AttValue)

	-- AttDef ::= S Name S AttType S DefaultDecl
	AttDef = SS * Name * SS * AttType * SS * DefaultDecl

	-- AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
	AttlistDecl = "<!ATTLIST" * SS * Name * AttDef^0 * SSopt * ">"

	-- ExternalID ::= 'SYSTEM' S SystemLiteral
	-- \| 'PUBLIC' S PubidLiteral S SystemLiteral
	--
	ExternalID = "SYSTEM" * SS * SystemLiteral
	+ "PUBLIC" * SS * PubidLiteral * SS * SystemLiteral

	-- NDataDecl ::= S 'NDATA' S Name
	NDataDecl = SS * "NDATA" * SS * Name

	-- EntityDecl ::= GEDecl \| PEDecl
	-- GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
	-- PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
	-- EntityDef ::= EntityValue \| (ExternalID NDataDecl?)
	-- PEDef ::= EntityValue \| ExternalID
	--
	PEDef = EntityValue + ExternalID
	EntityDef = EntityValue + (ExternalID * NDataDecl ^ -1)
	GEDecl = "<!ENTITY" * SS * Name * SS * EntityDef * SSopt * ">"
	PEDecl = "<!ENTITY" * SS * "%" * SS * Name * SS * PEDef * SSopt * ">"
	EntityDecl = GEDecl + PEDecl

	-- PublicID ::= 'PUBLIC' S PubidLiteral
	PublicID = "PUBLIC" * SS * PubidLiteral

	-- NotationDecl ::= '<!NOTATION' S Name S (ExternalID \| PublicID) S? '>'
	NotationDecl = "<!NOTATION" * SS * Name * SS * (ExternalID + PublicID) * SSopt * ">"

	-- markupdecl ::= elementdecl \| AttlistDecl \| EntityDecl \| NotationDecl \| PI \| Comment
	markupdecl = elementdecl
	+ AttlistDecl
	+ EntityDecl
	+ NotationDecl
	+ PI
	+ Comment

	-- conditionalSect ::= includeSect \| ignoreSect
	-- includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
	-- ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
	-- ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
	-- Ignore ::= Char* - (Char* ('<![' \| ']]>') Char*)
	--
	Ignore = (1 - (P "<![" + P "]]>"))
	ignoreSectContents = Ignore * ("<![" * V "ignoreSectContents" * "]]" * Ignore)^0
	conditionalSect = ("<![" * SSopt * "INCLUDE" * SSopt * "[" * V "extSubsetDecl" * "]]>")
	+ ("<![" * SSopt * "IGNORE" * SSopt * "[" * ignoreSectContents^0 * "]]>")

	-- intSubset ::= (markupdecl \| DeclSep)*
	intSubset = (markupdecl + DeclSep)^0

	-- extSubsetDecl ::= (markupdecl \| conditionalSect \| DeclSep)*
	extSubsetDecl = (markupdecl + conditionalSect + DeclSep)^0

	-- EncName ::= [A-Za-z] ([A-Za-z0-9._] \| '-')*
	EncName = R ("AZ", "az") * ((R ("AZ", "az", "09") + S "._") + "-")^0

	-- EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' \| "'" EncName "'" )
	EncodingDecl = SS
	* "encoding"
	* Eq
	* ( '"' * EncName * '"'
	+ "'" * EncName * "'"
	)

	-- TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
	TextDecl = "<?xml"
	* VersionInfo ^ -1
	* EncodingDecl
	* SSopt
	* "?>"

	-- extSubset ::= TextDecl? extSubsetDecl
	extSubset = TextDecl ^ -1
	* extSubsetDecl

	-- Attribute ::= Name Eq AttValue
	Attribute = Name * Eq * AttValue

	-- STag ::= '<' Name (S Attribute)* S? '>'
	STag = "<" * Name * (SS * Attribute)^0 * SSopt * ">"

	-- ETag ::= '</' Name S? '>'
	ETag = "</" * Name * SSopt * ">"

	-- EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
	EmptyElemTag = "<" * Name * (SS * Attribute)^0 * SSopt * "/>"

	-- elementdecl ::= EmptyElemTag \| STag content ETag
	element = EmptyElemTag
	+ (STag * V "content" * ETag)

	-- content ::= CharData? ((element \| Reference \| CDSect \| PI \| Comment) CharData?)*
	content = CharData ^ -1
	* ( ( element
	+ Reference
	+ CDSect
	+ PI
	+ Comment
	)
	* CharData ^ -1
	) ^ 0

	end -- _ENV = grammar


	d = P (grammar) / io.write
	d:match (io.read "*a")
	---
	-- XML parsing and DOM tree representation.
	--
	-- @copyright 2012 Adrian Perez <aperez@igalia.com>
	-- @license Distributed under terms of the MIT license.
	--

	local setmetatable, getmetatable = setmetatable, getmetatable
	local tpack, tremove, tconcat = table.pack, table.remove, table.concat
	local print, ipairs, pairs, assert, type = print, ipairs, pairs, assert, type
	local openfile, tostring = io.open, tostring
	local sprintf = string.format

	local lpeg = require "lpeg"
	local V, R, S, P, C = lpeg.V, lpeg.R, lpeg.S, lpeg.P, lpeg.C
	local Cg, Ct = lpeg.Cg, lpeg.Ct

	local _M = {
	xml_grammar = { "document" }
	}


	local Object = {}

	--- Clones an object.
	--
	-- Clones an object, returning a new one. The returned object will look
	-- up missing attributes in the table in which `clone()` was called.
	-- Optionally, a table from which to pick additional attributes can
	-- be passed (n.b. it is equivalent to call @{Object:extend} on the
	-- returned object).
	--
	-- @param t Table with additional attributes (optional).
	-- @return New cloned object.
	--
	function Object:clone (t)
	local clone = {}
	setmetatable (clone, { __index = self })
	if type (t) == "table" then
	for k, v in pairs (t) do
	clone[k] = v
	end
	end
	return clone
	end

	--- Alias for @{Object:clone}
	--
	Object.extend = Object.clone

	--- Gets the prototype of an object.
	--
	-- The prototype is the base object from which the object was cloned.
	-- @return A table (the prototype) or `nil` (for the base object).
	--
	function Object:prototype ()
	local meta = getmetatable (self)
	return meta and meta.__index
	end

	--- Checks whether an object is derived from some other object.
	--
	-- Note that this function will traverse the object prototype
	-- chain recursively, so it may be slow.
	--
	-- @param obj Reference object.
	-- @return Whether the object derives from the reference object.
	--
	function Object:derives (obj)
	local meta = getmetatable (self)
	while true do
	-- No metatable, or no __index, means it's the base object
	if not (meta and meta.__index) then
	return false
	end
	-- Yup, this is derived
	if meta.__index == obj then
	return true
	end
	-- Climb up in the hierarchy
	meta = getmetatable (meta.__index)
	end
	end


	_M.Object = Object




	local flatten_attributes
	local outline
	function outline (e, d)
	d = d or 0
	local indent = (" "):rep (d)
	local attr = flatten_attributes (e.attributes)
	print (("%s%s [%s] <%s>"):format (indent, e.nodeType, attr, e.tagName or e.nodeValue))

	if e:hasChildNodes () then
	d = d + 1
	for i, v in ipairs (e.childNodes) do
	outline (v, d)
	end
	end
	end


	---
	-- XML DOM node class.
	--
	-- This loosely follows the [HTML DOM element API]
	-- (https://developer.mozilla.org/en/DOM/element), but deviates from
	-- it on purpose, to make more convenient to use and more Lua-like.
	--
	-- @type Node
	--
	local Node = Object:clone
	{
	ELEMENT_NODE = "element";
	TEXT_NODE = "text";
	CDATA_SECTION_NODE = "cdata";
	COMMENT_NODE = "comment";
	PROCESSING_INSTRUCTION_NODE = "processinginstruction";
	DOCTYPE_NODE = "doctype";
	XML_DECLARATION_NODE = "xmldeclaration";
	DOCUMENT_NODE = "document";

	nodeType = nil;
	nodeValue = nil;
	tagName = nil;
	parentNode = nil;

	attributes = nil;
	childNodes = nil;

	-- nodeType == DOCTYPE_NODE
	doctypePublicIdentifier = nil;
	doctypeSystemIdentifier = nil;

	-- nodeType == XML_DECLARATION_NODE
	xmlVersion = nil;
	xmlEncoding = nil;
	xmlStandalone = nil;
	}

	---
	-- Returns the number of child elements of a node.
	--
	function Node:childElementCount ()
	return self.childNodes and #self.childNodes or 0
	end

	---
	-- Appends a child to a node.
	--
	-- @param child Node to be appended.
	--
	function Node:appendChild (child)
	if self.childNodes == nil then
	self.childNodes = {}
	end
	self.childNodes[#self.childNodes + 1] = child
	child.parentNode = self
	end

	---
	-- Removes a child from a node.
	--
	-- @param child Node to be removed. If it is not a child, nothing is done.
	--
	function Node:removeChild (child)
	if child.parentNode ~= self or self.childNodes == nil then
	return
	end

	local pos = nil
	if self.childNodes ~= nil then
	for i, value in ipairs (self.childNodes) do
	if value == child then
	pos = i
	break
	end
	end
	end

	if pos ~= nil then
	tremove (self.childNodes, pos)
	child.parentNode = nil
	end
	end

	---
	-- Checks whether a node has children.
	--
	function Node:hasChildNodes ()
	return self.childNodes ~= nil and #self.childNodes > 0
	end


	local escape_replacements = {
	["&"] = "&";
	["<"] = "<";
	}

	local function escape_match (capture)
	return escape_replacements[capture] or capture
	end

	local function escape (text)
	return text:gsub ("&%a*;?", escape_match)
	end

	local flatten_node

	local function flatten_document (elt, output)
	for _, child in ipairs (elt.childNodes) do
	flatten_node (child, output)
	end
	end

	local function flatten_cdata (elt, output)
	output[#output+1] = "<![CDATA["
	output[#output+1] = elt.nodeValue
	output[#output+1] = "]]>\n"
	end

	local function flatten_comment (elt, output)
	output[#output+1] = "<!-- " .. elt.nodeValue .. " -->"
	end

	local function flatten_proc_ins (elt, output)
	output[#output+1] = "<?" .. elt.tagName .. " "
	output[#output+1] = elt.nodeValue
	output[#output+1] = "?>\n"
	end

	local function flatten_text (elt, output)
	output[#output+1] = escape (elt.nodeValue)
	end

	local function flatten_xml_decl (elt, output)
	output[#output+1] = "<?xml version=\"" .. elt.xmlVersion
	output[#output+1] = "\" encoding=\"" .. elt.xmlEncoding
	if elt.xmlStandalone ~= nil then
	output[#output+1] = "\" standalone=\""
	output[#output+1] = elt.xmlStandalone and "yes" or "no"
	end
	output[#output+1] = "\"?>\n"
	end

	local function flatten_doctype (elt, output)
	output[#output+1] = "<!DOCTYPE " .. elt.tagName
	if elt.doctypeKind ~= nil then
	output[#output+1] = elt.doctypeKind .. " \""
	if elt.doctypeKind == "SYSTEM" then
	output[#output+1] = elt.doctypeSystemIdentifier
	else
	output[#output+1] = elt.doctypePublicIdentifier
	output[#output+1] = "\" \""
	output[#output+1] = elt.doctypeSystemIdentifier
	end
	output[#output+1] = "\""
	end
	output[#output+1] = ">\n"
	end

	function flatten_attributes (attributes)
	local attr = {}
	if attributes == nil then
	return ""
	end
	for name, value in pairs (attributes) do
	-- TODO Escaping of characters
	attr[#attr+1] = name .. "=\"" .. escape (value) .. "\""
	end
	return tconcat (attr, " ")
	end

	local function flatten_element (elt, output)
	output[#output+1] = "<" .. elt.tagName

	local attr = flatten_attributes (elt.attributes)
	if #attr > 0 then
	output[#output+1] = " " .. attr
	end

	if elt:hasChildNodes () then
	output[#output+1] = ">"
	for _, child in ipairs (elt.childNodes) do
	flatten_node (child, output)
	end
	output[#output+1] = "</" .. elt.tagName .. ">"
	else
	output[#output+1] = "/>"
	end
	end

	local flatteners =
	{
	[Node.DOCUMENT_NODE] = flatten_document;
	[Node.CDATA_SECTION_NODE] = flatten_cdata;
	[Node.COMMENT_NODE] = flatten_comment;
	[Node.PROCESSING_INSTRUCTION_NODE] = flatten_proc_ins;
	[Node.TEXT_NODE] = flatten_text;
	[Node.XML_DECLARATION_NODE] = flatten_xml_decl;
	[Node.DOCTYPE_NODE] = flatten_doctype;
	[Node.ELEMENT_NODE] = flatten_element;
	}

	function flatten_node (treeish, output)
	local flatten_func = assert (flatteners[treeish.nodeType],
	"Unknown node type: " ..
	tostring (treeish.nodeType))
	flatten_func (treeish, output)
	end


	function _M.dump (treeish, output)
	local result = {}
	flatten_node (treeish, result)
	result[#result+1] = "\n"
	if output == nil then
	return tconcat (result)
	else
	local fd
	if type (output) == "string" then
	fd = assert (openfile (output, "wb"))
	else
	fd = output
	end

	for _, line in ipairs (output) do
	fd:write (line)
	end

	if fd ~= output then
	fd:close ()
	end
	end
	end


	--
	-- The "action" table contains parsing actions. Those functions pick the
	-- relevant captures defined in the XML grammar and build up a DOM tree
	-- composed out of "Node" instances.
	--
	local action = {}

	function action.comment (text)
	return Node:clone {
	nodeType = Node.COMMENT_NODE;
	nodeValue = text;
	}
	end

	function action.processing_instruction (name, content)
	return Node:clone {
	nodeType = Node.PROCESSING_INSTRUCTION_NODE;
	nodeValue = content;
	tagName = name;
	}
	end

	function action.cdata (content)
	return Node:clone {
	nodeType = Node.CDATA_SECTION_NODE;
	nodeValue = content;
	}
	end

	function action.text (content)
	return Node:clone {
	nodeType = Node.TEXT_NODE;
	nodeValue = content;
	}
	end

	-- The "data" table is optional:
	--
	-- data[1] contains the type of reference: SYSTEM / PUBLIC
	-- data[2] and data[3] contain the rest of information
	--
	-- The rest of contents of DOCTYPE declarations are completely
	-- ignored -- yet they are parsed and validated by LPeg.
	--
	function action.doctype (tag, data)
	local node = Node:clone {
	nodeType = Node.DOCTYPE_NODE;
	tagName = tag;
	}
	if data then
	assert (type (data) == "table",
	"argument #2 to action.doctype is not a table")
	node.doctypeKind = data[1]
	if node.doctypeKind == "SYSTEM" then
	node.doctypeSystemIdentifier = data[2]
	else
	assert (node.doctypeKind == "PUBLIC",
	"DOCTYPE kind has to be either PUBLIC or SYSTEM")
	node.doctypePublicIdentifier = data[2]
	node.doctypeSystemIdentifier = data[3]
	end
	end
	return node
	end

	function action.xml_declaration (version, encoding, standalone)
	return Node:clone {
	nodeType = Node.XML_DECLARATION_NODE;
	xmlStandalone = standalone ~= "" and standalone or nil;
	xmlEncoding = encoding ~= "" and encoding or "utf-8";
	xmlVersion = version;
	}
	end

	function action.document (prolog, rootnode, epilog)
	local doc = Node:clone { nodeType = Node.DOCUMENT_NODE }
	for i, node in ipairs (prolog) do
	doc:appendChild (node)
	end
	doc:appendChild (rootnode)
	for i, node in ipairs (epilog) do
	doc:appendChild (node)
	end
	return doc
	end

	function action.empty_element_tag (tag, ...)
	local arg = tpack (...)
	local att = {}

	for i = 1, arg.n, 2 do
	att[arg[i]] = arg[i+1]
	end

	return Node:clone {
	nodeType = Node.ELEMENT_NODE;
	tagName = tag;
	attributes = att;
	}
	end

	function action.element_tag (stag, arg, ...)
	local child = tpack (...)
	assert (stag == child[child.n],
	"Start tag does not match end tag ("..stag..", "..child[child.n]..")")
	tremove (child)

	local att = {}
	for i = 1, arg.n or #arg, 2 do
	att[arg[i]] = arg[i+1]
	end

	local elt = Node:clone {
	nodeType = Node.ELEMENT_NODE;
	tagName = stag;
	attributes = att;
	}

	for i, node in ipairs (child) do
	elt:appendChild (node)
	end

	return elt
	end


	local entity_refs = {
	["<"] = "<";
	["&"] = "&";
	}

	function action.entity_reference (name)
	return entity_refs[name] or name
	end

	function action.captures (...)
	local args = { ... }
	local FMT = "[%i] [1;1m>[0;0m%s[1;1m<[0;0m"
	local FN = "<%s[%s]>"
	for i, v in ipairs (args) do
	if type (v) == "table" then
	if v.nodeType then
	v = FN:format (v.nodeType, v.tagName)
	else
	v = lib.ml.tstring (v)
	end
	end
	print (FMT:format (i, v))
	end
	return { ... }
	end


	--
	-- XML grammar using LPeg. It uses the functions in the "action" table to
	-- build a DOM tree.
	--
	do local _ENV = _M.xml_grammar

	-- S ::= (#x20 \| #x9 \| #xD \| #xA)+
	SS = (S " \t\r\n")^1
	SSopt = SS ^ -1

	-- NameStartChar ::= ":" \| [A-Z] \| "_" \| [a-z]
	NameStartChar = S ":_" + R ("az", "AZ")

	-- NameChar ::= NameStartChar \| "-" \| "." \| [0-9]
	NameChar = NameStartChar + S "-." + R "09"

	-- Name ::= NameStartChar (NameChar)*
	Name = NameStartChar * NameChar^0

	-- Names ::= Name (#x20 Name)*
	Names = Name * (" " * Name)^0

	-- Nmtoken ::= (NameChar)+
	Nmtoken = NameChar^1

	-- Nmtokens ::= Nmtoken (#x20 Nmtoken)*
	Nmtokens = Nmtoken * (" " * Nmtoken)^0

	-- CharRef ::= '&#' [0-9]+ ';'
	-- \| '&#x' [0-9a-fA-F]+ ';'
	CharRef = ("&#" * (R "09")^1 * ";")
	+ ("&#x" * (R ("09", "af", "AF"))^1 * ";")

	-- EntityRef ::= '&' Name ';'
	--EntityRef = "&" * C( Name ) * ";"
	-- / action.entity_reference
	EntityRef = "&" * Name * ";"
	/ action.entity_reference

	-- PEReference ::= '%' Name ';'
	PEReference = "%" * Name * ";"

	-- Reference ::= EntityRef \| CharRef
	Reference = EntityRef + CharRef

	-- EntityValue ::= '"' ([^%&"] \| PEReference \| Reference)* '"'
	-- \| "'" ([^%&'] \| PEReference \| Reference)* "'"
	EntityValue = ('"' * ((1 - S '%&"') + PEReference + Reference)^0 * '"')
	+ ("'" * ((1 - S "%&'") + PEReference + Reference)^0 * "'")

	-- AttValue ::= '"' ([^<&"] \| Reference)* '"'
	-- \| "'" ([^<&'] \| Reference)* "'"
	AttValue = ( ('"' * Ct( (C(1 - S '<&"')^1 + Reference)^0 ) * '"')
	+ ("'" * Ct( (C(1 - S "<&'")^1 + Reference)^0 ) * "'")
	) / tconcat

	-- SystemLiteral ::= ('"' [^"]* '"') \| ("'" [^']* "'")
	SystemLiteral = ('"' * C( (1 - P '"')^0 ) * '"')
	+ ("'" * C( (1 - P "'")^0 ) * "'")

	-- PubidChar ::= #x20 \| #xD \| #xA \| [a-zA-Z0-9] \| [-'()+,./:=?;!*#@$_%]
	PubidChar = S " \r\n-'()+,./:=?;!*#@$_%" + R ("az", "AZ", "09")

	-- PubidLiteral ::= '"' PubidChar* '"' \| "'" (PubidChar - "'")* "'"
	PubidLiteral = ('"' * C( PubidChar^0 ) * '"')
	+ ("'" * C( (PubidChar - "'")^0 ) * "'")

	-- CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
	CharData = C( (1 - (S "<&" + "]]>"))^1 )

	-- Comment ::= '<!--' ((Char - '-') \| ('-' (Char - '-')))* '-->'
	Comment = "<!--"
	* C( ((1 - S "-") + ("-" * (1 - S "-")))^0 )
	* "-->"
	/ action.comment

	-- PITarget ::= Name - (('X' \| 'x') ('M' \| 'm') ('L' \| 'l'))
	PITarget = Name - (S "xX" * S "mM" * S "lL")

	-- PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
	PI = "<?"
	* C( PITarget )
	* (SS * C( (1 - P "?>")^1 )^0 )
	* "?>"
	/ action.processing_instruction

	-- CDSect ::= CDStart CData CDEnd
	-- CDStart ::= '<![CDATA['
	-- CData ::= (Char* - (Char* ']]>' Char*))
	-- CDEnd ::= ']]>'
	CData = (1 - P "]]>")^0
	CDSect = "<![CDATA["
	* C( CData )
	* "]]>"
	/ action.cdata

	-- prolog ::= XMLDecl Misc* (doctypedecl Misc*)?
	prolog = (V "XMLDecl") ^ -1
	* (V "Misc")^0
	* (V "doctypedecl" * (V "Misc")^0) ^ -1

	-- Eq ::= S? '=' S?
	Eq = SSopt * "=" * SSopt

	-- SDDecl ::= S 'standalone' Eq (("'" ('yes' \| 'no') "'") \| ('"' ('yes' \| 'no') '"'))
	SDDecl = SS
	* "standalone"
	* Eq
	* ( ("'" * C(P "yes" + "no") * "'")
	+ ('"' * C(P "yes" + "no") * '"')
	)
	/ function (v) return v == "yes" end

	-- VersionNum ::= '1.0' \| '1.1'
	VersionNum = P "1.0"
	+ P "1.1"

	-- VersionInfo ::= S 'version' Eq ("'" VersionNum "'" \| '"' VersionNum '"')
	VersionInfo = SS
	* "version"
	* Eq
	* ( ("'" * C( VersionNum ) * "'")
	+ ('"' * C( VersionNum ) * '"')
	)

	-- XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
	XMLDecl = "<?xml"
	* Cg( VersionInfo )
	* Cg( (V "EncodingDecl") ^ -1 )
	* Cg( SDDecl ^ -1 )
	* SSopt
	* "?>"
	/ action.xml_declaration

	-- Misc ::= Comment \| PI \| S
	Misc = Comment + PI + SS

	-- doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
	doctypedecl = "<!DOCTYPE"
	* SS
	* C( Name )
	* (SS * V "ExternalID") ^ -1
	* SSopt
	* ( "["
	* V "intSubset"
	* "]"
	* SSopt
	) ^ -1
	* ">"
	/ action.doctype

	-- document ::= ( prolog element Misc* )
	document = Ct( prolog )
	* Cg( V "element" )
	* Ct( Misc^0 )
	/ action.document

	-- DeclSep ::= PEReference \| S
	DeclSep = PEReference + SS

	-- choice ::= '(' S? cp ( S? '\|' S? cp )+ S? ')'
	-- seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
	choice = "(" * SSopt * V "cp" * (SSopt * "\|" * SSopt * V "cp")^1 * SSopt * ")"
	seq = "(" * SSopt * V "cp" * (SSopt * "," * SSopt * V "cp")^0 * SSopt * ")"

	-- cp ::= (Name \| choice \| seq) ('?' \| '*' \| '+')?
	cp = (Name + choice + seq) * (S "?*+") ^ -1

	-- children ::= (choice \| seq) ('?' \| '*' \| '+')?
	children = (choice + seq) * (S "?*+") ^ -1

	-- Mixed ::= '(' S? '#PCDATA' (S? '\|' S? Name)* S? ')*' \| '(' S? '#PCDATA' S? ')'
	Mixed = "(" * SSopt * "#PCDATA" * (SSopt * "\|" * SSopt * Name)^0 * SSopt * ")*"
	+ "(" * SSopt * "#PCDATA" * SSopt * ")"

	-- contentspec ::= 'EMPTY' \| 'ANY' \| Mixed \| children
	contentspec = P "EMPTY" + P "ANY" + Mixed + children

	-- elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
	elementdecl = "<!ELEMENT" * SS * Name * SS * contentspec * SSopt * ">"

	-- EnumeratedType ::= NotationType \| Enumeration
	-- NotationType ::= 'NOTATION' S '(' S? Name (S? '\|' S? Name)* S? ')'
	-- Enumeration ::= '(' S? Nmtoken (S? '\|' S? Nmtoken)* S? ')'
	-- AttType ::= StringType \| TokenizedType \| EnumeratedType
	-- StringType ::= 'CDATA'
	-- TokenizedTyp e ::= 'ID' \| 'IDREF' \| 'IDREFS' \| 'ENTITY' \| 'ENTITIES'
	-- \| 'NMTOKEN' \| 'NMTOKENS'
	--
	NotationType = "NOTATION" * SS * "(" * SSopt * Name * (SSopt * "\|" * SSopt * Name)^0 * SSopt * ")"
	Enumeration = "(" * SSopt * Nmtoken * (SSopt * "\|" * SSopt * Nmtoken)^0 * SSopt * ")"
	AttType = P "CDATA"
	+ P "ID"
	+ P "IDREF"
	+ P "IDREFS"
	+ P "ENTITY"
	+ P "ENTITIES"
	+ P "NMTOKEN"
	+ P "NMTOKENS"
	+ NotationType
	+ Enumeration

	-- DefaultDecl ::= '#REQUIRED' \| '#IMPLIED' \| (('#FIXED' S)? AttValue)
	DefaultDecl = P "#REQUIRED"
	+ P "#IMPLIED"
	+ (((P "#FIXED" * SS) ^ -1) * AttValue)

	-- AttDef ::= S Name S AttType S DefaultDecl
	AttDef = SS * Name * SS * AttType * SS * DefaultDecl

	-- AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
	AttlistDecl = "<!ATTLIST" * SS * Name * AttDef^0 * SSopt * ">"

	-- ExternalID ::= 'SYSTEM' S SystemLiteral
	-- \| 'PUBLIC' S PubidLiteral S SystemLiteral
	--
	ExternalID = C( P "SYSTEM" ) * SS * SystemLiteral
	+ C( P "PUBLIC" ) * SS * PubidLiteral * SS * SystemLiteral
	/ tpack

	-- NDataDecl ::= S 'NDATA' S Name
	NDataDecl = SS * "NDATA" * SS * Name

	-- EntityDecl ::= GEDecl \| PEDecl
	-- GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
	-- PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
	-- EntityDef ::= EntityValue \| (ExternalID NDataDecl?)
	-- PEDef ::= EntityValue \| ExternalID
	--
	PEDef = EntityValue + ExternalID
	EntityDef = EntityValue + (ExternalID * NDataDecl ^ -1)
	GEDecl = "<!ENTITY" * SS * Name * SS * EntityDef * SSopt * ">"
	PEDecl = "<!ENTITY" * SS * "%" * SS * Name * SS * PEDef * SSopt * ">"
	EntityDecl = GEDecl + PEDecl

	-- PublicID ::= 'PUBLIC' S PubidLiteral
	PublicID = "PUBLIC" * SS * PubidLiteral

	-- NotationDecl ::= '<!NOTATION' S Name S (ExternalID \| PublicID) S? '>'
	NotationDecl = "<!NOTATION" * SS * Name * SS * (ExternalID + PublicID) * SSopt * ">"

	-- markupdecl ::= elementdecl \| AttlistDecl \| EntityDecl \| NotationDecl \| PI \| Comment
	markupdecl = elementdecl
	+ AttlistDecl
	+ EntityDecl
	+ NotationDecl
	+ PI
	+ Comment

	-- conditionalSect ::= includeSect \| ignoreSect
	-- includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
	-- ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
	-- ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
	-- Ignore ::= Char* - (Char* ('<![' \| ']]>') Char*)
	--
	Ignore = (1 - (P "<![" + P "]]>"))
	ignoreSectContents = Ignore * ("<![" * V "ignoreSectContents" * "]]" * Ignore)^0
	conditionalSect = ("<![" * SSopt * "INCLUDE" * SSopt * "[" * V "extSubsetDecl" * "]]>")
	+ ("<![" * SSopt * "IGNORE" * SSopt * "[" * ignoreSectContents^0 * "]]>")

	-- intSubset ::= (markupdecl \| DeclSep)*
	intSubset = (markupdecl + DeclSep)^0

	-- extSubsetDecl ::= (markupdecl \| conditionalSect \| DeclSep)*
	extSubsetDecl = (markupdecl + conditionalSect + DeclSep)^0

	-- EncName ::= [A-Za-z] ([A-Za-z0-9._] \| '-')*
	EncName = R ("AZ", "az") * ((R ("AZ", "az", "09") + S "._") + "-")^0

	-- EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' \| "'" EncName "'" )
	EncodingDecl = SS
	* "encoding"
	* Eq
	* ( '"' * C( EncName ) * '"'
	+ "'" * C( EncName ) * "'"
	)

	-- TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
	TextDecl = "<?xml"
	* VersionInfo ^ -1
	* EncodingDecl
	* SSopt
	* "?>"

	-- extSubset ::= TextDecl? extSubsetDecl
	extSubset = TextDecl ^ -1
	* extSubsetDecl

	-- Attribute ::= Name Eq AttValue
	Attribute = C( Name )
	* Eq
	* Cg( AttValue )

	-- STag ::= '<' Name (S Attribute)* S? '>'
	STag = "<"
	* C( Name )
	* Ct( (SS * Attribute)^0 )
	* SSopt
	* ">"

	-- ETag ::= '</' Name S? '>'
	ETag = "</"
	* C( Name )
	* SSopt
	* ">"

	-- EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
	EmptyElemTag = "<"
	* C( Name )
	* Cg( (SS * Attribute)^0 )
	* SSopt
	* "/>"
	/ action.empty_element_tag

	ElemTag = STag
	* Cg( V "content" )
	* ETag
	/ action.element_tag

	-- elementdecl ::= EmptyElemTag \| STag content ETag
	element = ElemTag + EmptyElemTag

	textContent = (Ct( (Reference + CharData)^1 ) / tconcat)
	/ action.text

	-- content ::= CharData? ((element \| Reference \| CDSect \| PI \| Comment) CharData?)*
	content = textContent ^ -1
	* ( ( element
	+ CDSect
	+ PI
	+ Comment
	)
	* textContent ^ -1
	) ^ 0

	end -- _ENV = _M.grammar


	---
	-- Parsing and dumping.
	-- @section
	--

	---
	-- Generates an outline out of a DOM tree. The outline is written
	-- to the standard output stream
	--
	-- @param treeish DOM tree or subtree (a @{Node}).
	-- @function outline
	--
	_M.outline = outline

	_M.xml_peg = P (_M.xml_grammar)
	_M.Node = Node

	---
	-- Escapes certain characters using entity references.
	--
	-- @param text Text to be escaped.
	-- @function escape
	--
	_M.escape = escape

	---
	-- Parses XML input into a DOM tree.
	--
	-- @param input Input XML string.
	-- @return A DOM tree @{Node}.
	--
	function _M.parse (input)
	return _M.xml_peg:match (input)
	end

	function _M.P (prodname)
	local old = _M.xml_grammar[1]
	_M.xml_grammar[1] = prodname
	local peg = P (_M.xml_grammar)
	_M.xml_grammar[1] = old
	return peg
	end

	return _M