Skip to content

Instantly share code, notes, and snippets.

@Simn
Created April 19, 2012 22:53
Show Gist options
  • Save Simn/2424730 to your computer and use it in GitHub Desktop.
Save Simn/2424730 to your computer and use it in GitHub Desktop.
Port of neko C Xml parser to haxe
package haxe.xml;
using StringTools;
enum State
{
IGNORE_SPACES;
BEGIN;
BEGIN_NODE;
TAG_NAME;
BODY;
ATTRIB_NAME;
EQUALS;
ATTVAL_BEGIN;
ATTRIB_VAL;
CHILDS;
CLOSE;
WAIT_END;
WAIT_END_RET;
PCDATA;
HEADER;
COMMENT;
DOCTYPE;
CDATA;
}
class XmlParser
{
static public function parse(str:String)
{
var doc = Xml.createDocument();
doParse(str, 0, doc);
return doc;
}
static function doParse(str:String, ?p:Int = 0, ?parent:Xml):Int
{
var xml:Xml = null;
var state = BEGIN;
var next = BEGIN;
var aname = null;
var start = 0;
var nsubs = 0;
var nbrackets = 0;
var c = str.fastCodeAt(p);
while (!c.isEOF())
{
switch(state)
{
case IGNORE_SPACES:
switch(c)
{
case
'\n'.code,
'\r'.code,
'\t'.code,
' '.code:
default:
state = next;
continue;
}
case BEGIN:
switch(c)
{
case '<'.code:
state = IGNORE_SPACES;
next = BEGIN_NODE;
default:
start = p;
state = PCDATA;
continue;
}
case PCDATA:
if (c == '<'.code)
{
var child = Xml.createPCData(str.substr(start, p - start));
parent.addChild(child);
nsubs++;
state = IGNORE_SPACES;
next = BEGIN_NODE;
}
case CDATA:
if (c == ']'.code && str.fastCodeAt(p + 1) == ']'.code && str.fastCodeAt(p + 2) == '>'.code)
{
var child = Xml.createCData(str.substr(start, p - start));
parent.addChild(child);
nsubs++;
p += 2;
state = BEGIN;
}
case BEGIN_NODE:
switch(c)
{
case '!'.code:
if (str.fastCodeAt(p + 1) == '['.code)
{
p += 2;
if (str.substr(p, 6).toUpperCase() != "CDATA[")
throw("Expected <![CDATA[");
p += 5;
state = CDATA;
start = p + 1;
}
else if (str.fastCodeAt(p + 1) == 'D'.code || str.fastCodeAt(p + 1) == 'd'.code)
{
if(str.substr(p + 2, 6).toUpperCase() != "OCTYPE")
throw("Expected <!DOCTYPE");
p += 8;
state = DOCTYPE;
start = p + 1;
}
else if( str.fastCodeAt(p + 1) != '-'.code || str.fastCodeAt(p + 2) != '-'.code )
throw("Expected <!--");
else
{
p += 2;
state = COMMENT;
start = p + 1;
}
case '?'.code:
state = HEADER;
start = p;
case '/'.code:
if( parent == null )
throw("Expected node name");
start = p + 1;
state = IGNORE_SPACES;
next = CLOSE;
default:
state = TAG_NAME;
start = p;
continue;
}
case TAG_NAME:
if (!is_valid_char(c))
{
if( p == start )
throw("Expected node name");
xml = Xml.createElement(str.substr(start, p - start));
parent.addChild(xml);
state = IGNORE_SPACES;
next = BODY;
continue;
}
case BODY:
switch(c)
{
case '/'.code:
state = WAIT_END;
nsubs++;
case '>'.code:
state = CHILDS;
nsubs++;
default:
state = ATTRIB_NAME;
start = p;
continue;
}
case ATTRIB_NAME:
if (!is_valid_char(c))
{
var tmp;
if( start == p )
throw("Expected attribute name");
tmp = str.substr(start,p-start);
aname = tmp;
if( xml.exists(aname) )
throw("Duplicate attribute");
state = IGNORE_SPACES;
next = EQUALS;
continue;
}
case EQUALS:
switch(c)
{
case '='.code:
state = IGNORE_SPACES;
next = ATTVAL_BEGIN;
default:
throw("Expected =");
}
case ATTVAL_BEGIN:
switch(c)
{
case '"'.code, '\''.code:
state = ATTRIB_VAL;
start = p;
default:
throw("Expected \"");
}
case ATTRIB_VAL:
if (c == str.fastCodeAt(start))
{
var val = str.substr(start+1,p-start-1);
xml.set(aname, val);
state = IGNORE_SPACES;
next = BODY;
}
case CHILDS:
p = doParse(str, p, xml);
start = p;
state = BEGIN;
case WAIT_END:
switch(c)
{
case '>'.code:
state = BEGIN;
default :
throw("Expected >");
}
case WAIT_END_RET:
switch(c)
{
case '>'.code:
if( nsubs == 0 )
parent.addChild(Xml.createPCData(""));
return p;
default :
throw("Expected >");
}
case CLOSE:
if (!is_valid_char(c))
{
if( start == p )
throw("Expected node name");
var v = str.substr(start,p - start);
if (v != parent.nodeName)
throw "Expected </" +parent.nodeName + ">";
state = IGNORE_SPACES;
next = WAIT_END_RET;
continue;
}
case COMMENT:
if (c == '-'.code && str.fastCodeAt(p +1) == '-'.code && str.fastCodeAt(p + 2) == '>'.code)
{
parent.addChild(Xml.createComment(str.substr(start, p - start)));
p += 2;
state = BEGIN;
}
case DOCTYPE:
if(c == '['.code)
nbrackets++;
else if(c == ']'.code)
nbrackets--;
else if (c == '>'.code && nbrackets == 0)
{
parent.addChild(Xml.createDocType(str.substr(start, p - start)));
state = BEGIN;
}
case HEADER:
if (c == '?'.code && str.fastCodeAt(p + 1) == '>'.code)
{
p++;
var str = str.substr(start + 1, p - start - 2);
parent.addChild(Xml.createProlog(str));
state = BEGIN;
}
}
c = str.fastCodeAt(++p);
}
if (state == BEGIN)
{
start = p;
state = PCDATA;
}
if (state == PCDATA)
{
if (p != start || nsubs == 0)
parent.addChild(Xml.createPCData(str.substr(start, p - start)));
return p;
}
throw "Unexpected end";
}
static inline function is_valid_char(c)
return (c >= 'a'.code && c <= 'z'.code) || (c >= 'A'.code && c <= 'Z'.code) || (c >= '0'.code && c <= '9'.code) || c == ':'.code || c == '.'.code || c == '_'.code || c == '-'.code
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment