Skip to content

Instantly share code, notes, and snippets.

@yne yne/xml_parser.html
Created Dec 20, 2015

Embed
What would you like to do?
Minimalist (<100 line) regexp based XML parser+selector
<html>
<head>
<title>XMLparser</title>
<script>
var XMLparser=function(xml){
//parsing part
this.stack=[];
this.obj={childNode:[]};
this.getLastChild=function(i){
var pos=this.obj;
for(;i<this.stack.length;i++)
pos=pos.childNode[pos.childNode.length-1];
return pos
};
this.append=function(name,attr){
this.stack.push(name);
this.getLastChild(1).childNode.push({attr:attr,name:name,childNode:[]})
};
this.setText=function(text){//TODO:parse text
if(text)this.getLastChild(0).text=text;
return this.stack.pop();
};
this.parseAttr=function(str){
var attr={};
for(var reg;(reg=str.match(/^ (\w+)="(.*?[^\\])"(.*)/));str=reg[3])
attr[reg[1]]=reg[2];
var t=str.match(/^\s*(\/)?>(.*)/);
return {attr:attr,rest:t[2],short:t[1]!=undefined};
};
this.parse=function(str){
var tmp=str.match(/^\s*<([\w:]+)(.*)/);
if(tmp){
var a=this.parseAttr(tmp[2]);
this.append(tmp[1],a.attr);
if(a.short)this.setText();
this.parse(a.rest);
}else{
var c,end=str.match(/(.*?)<\/([\w:]+)>(.*)/);
if((c=this.setText(end[1]))!=end[2])
return console.error('missmatching closing tag('+c+' vs '+end[2]+')');
if(end[3])this.parse(end[3]);
}
};
this.valueOf=function(){
return this.obj.childNode;
};
//selector part
this.tagMatch=function(tag,cmd){
var tags=[],c=tag.childNode,attr=cmd.attr.match(/([\w:]+)(=(\w+))?/);
for(var t in c){//pour chaque fils
if((cmd.name=='*'||c[t].name==cmd.name)){//le nom match
if((!attr)//pas de spec sur les attr = ok
||(((attr[3]==undefined)&&(c[t].attr[attr[1]]!=undefined))//pas de spec de val + presence de l'attr = ok
||(((attr[3]!=undefined)&&(c[t].attr[attr[1]]!=undefined)&&(c[t].attr[attr[1]]==attr[3])))))//spec de val + presence de l'attr + bonne val = ok
tags.push(c[t]);
}
if((!cmd.direct)&&c[t].childNode.length)//si on est indirect on descend
tags=tags.concat(this.tagMatch(c[t],cmd));
}
return tags;
};
this.find=function(str){
var sel,cmds=[],tags=[this.obj];
if(!str.match(/^[ >]/))str=' '+str;
while(str){
if(!(sel=str.match(/([ >])([\w:\*]+)(\[.*?\])?(.*)/)))break;
cmds.push({direct:sel[1]=='>',name:sel[2],attr:sel[3]?(sel[3].substr(1,sel[3].length-2)):''});
str=sel[4];
}
for(var cmd in cmds){
var tmp=[];
for(var tag in tags)
tmp=tmp.concat(this.tagMatch(tags[tag],cmds[cmd]));
tags=tmp;
}
return tags;
}
this.parse(xml);
}
</script>
</head>
<body class="container-fluid">
<h1>Description</h1>
<p>
Minimalist (<100 line) regexp based XML parser+selector.<br/>
Slow (regexp) and incomplet but enought to load a xml-like file for embedded JS devices.
<h2>Support</h2>
<ul>
<li>short tag <i>&lt;br/&gt;</i></li>
<li>namespace syntax <i>&lt;ns:tag/&gt;</i></li>
<li>selector : <i>*</i>, <i>tag</i>, <i>[attr=val]</i>, <i>[attr]</i>, <i>&gt;</i> or <i>(space)</i> child</li>
</ul>
</p>
<h1>Input</h1>
<textarea id="input" rows="8" cols="80" class="row-fluid">
<xml>
<ns:body onload="alert(\">\")">
<div class="test">
<br/>
</div>
</ns:body>
</xml>
</textarea>
<h1>Param</h1>
<div>
Selector : <input type="text" class="row-fluid" id="selector" value="*[class]>*"/>
</div>
<div>
<button onclick="parse()" class="btn" type="button">select</button>
</div>
<h1>Output</h1>
<p>see <code>console</code> for full output</p>
<textarea id="output" rows="5" cols="80" class="row-fluid"></textarea>
<script src="XMLparser.js"></script>
<script>
function parse(){
var input=document.getElementById('input').value.replace(/[\n\t]/g,'');
var xml=new XMLparser(input);
var ret=xml.find(document.getElementById('selector').value);
var output="";
for(var r in ret)output+=ret[r].name+'\n'
document.getElementById('output').value=output;
console.debug(ret);
}
</script>
<style>input{height: auto !important;}</style>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.