Skip to content

Instantly share code, notes, and snippets.

@DigiTec
Last active August 29, 2015 14:13
A complete WebIDLLexer (without tests) which is compliant with the lexer rules for WebIDL Second Edition available here http://heycam.github.io/webidl/
// integer = /-?([1-9][0-9]*|0[Xx][0-9A-Fa-f]+|0[0-7]*)/
INTEGER =
opt:'-'? num:( DECINT / HEXINT / OCTINT )
{
if (opt) {
num.string = '-' + num.string;
num.value = -num.value;
}
return num;
}
DECINT =
first:[1-9] rest:[0-9]*
{
return token.createDecInteger(first, rest);
}
HEXINT =
'0' hex:[Xx] rest:[0-9A-Fa-f]+
{
return token.createHexInteger(hex, rest);
}
OCTINT =
'0' rest:[0-7]*
{
return token.createOctInteger(rest);
}
var webIdlParser = require('./WebIDL.js');
var fs = require('fs');
fs.readFile('floats.webIDL', function (err, data) {
if (err) {
throw err;
}
var text = data.toString();
var result = webIdlParser.parse(text);
console.log(JSON.stringify(result, null, 4));
});
// other = /[^\t\n\r 0-9A-Za-z]/
OTHER =
val:[^\t\n\r 0-9A-Za-z]
{
return token.createOther(val);
}
function createString(val) {
var tok = new token();
tok.string = '"' + val + '"';
tok.value = val;
tok.type = 'string';
return tok;
}
function flatten(ary) {
return ary.reduce(function (prev, cur) {
return prev + cur;
}, "");
}
// string = /"[^"]*"/
STRING =
'"' val:[^"]* '"'
{
return token.createString(token.flatten(val));
}
start = token+
token =
FLOAT / INTEGER / IDENTIFIER / STRING / comment / WHITESPACE / OTHER
{
function token() {
this.value = '';
this.type = 'error';
}
// Static methods on token
Object.defineProperties(token, {
createOther: {
value: function createOther(val) {
var tok = new token();
tok.string = val;
tok.value = val;
tok.type = 'other';
return tok;
}
},
createSingleLineComment: {
value: function createOther(val) {
var tok = new token();
tok.string = '//' + val;
tok.value = val;
tok.type = 'slcomment';
return tok;
}
},
createMultiLineComment: {
value: function createOther(val) {
var tok = new token();
tok.string = '/*' + val + '*/'
tok.value = val;
tok.type = 'mlcomment';
return tok;
}
},
createWhiteSpace: {
value: function createWhiteSpace(val) {
var tok = new token();
tok.string = val;
tok.value = val;
tok.type = 'ws';
return tok;
}
},
createString: {
value:
},
createIdent: {
value: function createIdent(opt, first, rest) {
var tok = new token();
tok.value = (opt ? opt : '') + first + token.flatten(rest);
tok.string = tok.value;
tok.type = 'ident';
return tok;
}
},
createDecInteger: {
value: function createDecInteger(first, rest) {
var tok = new token();
tok.string = first + token.flatten(rest);
tok.value = parseInt(tok.string, 10);
tok.type = 'integer';
return tok;
}
},
createHexInteger: {
value: function createHexInteger(hex, rest) {
var tok = new token();
tok.string = '0' + hex + token.flatten(rest);
tok.value = parseInt(token.flatten(rest), 16);
tok.type = 'integer';
return tok;
}
},
createOctInteger: {
value: function createOctInteger(rest) {
var tok = new token();
tok.string = '0' + token.flatten(rest);
tok.value = parseInt(tok.string, 8);
tok.type = 'integer';
return tok;
}
},
createFloat: {
value: function createFloat(val) {
var tok = new token();
tok.string = val;
tok.value = parseFloat(val);
tok.type = 'float';
return tok;
}
},
flatten: {
value: function flatten(ary) {
return ary.reduce(function (prev, cur) {
return prev + cur;
}, "");
}
}
});
}
// integer = /-?([1-9][0-9]*|0[Xx][0-9A-Fa-f]+|0[0-7]*)/
// float = /-?(([0-9]+\.[0-9]*|[0-9]*\.[0-9]+)([Ee][+-]?[0-9]+)?|[0-9]+[Ee][+-]?[0-9]+)/
// identifier = /_?[A-Za-z][0-9A-Z_a-z-]*/
// string = /"[^"]*"/
// whitespace = /[\t\n\r ]+/
// comment = /\/\/.*|\/\*(.|\n)*?\*\//
// other = /[^\t\n\r 0-9A-Za-z]/
start = token+
token =
FLOAT / INTEGER / IDENTIFIER / STRING / comment / WHITESPACE / OTHER
INTEGER =
opt:'-'? num:( DECINT / HEXINT / OCTINT )
{
if (opt) {
num.string = '-' + num.string;
num.value = -num.value;
}
return num;
}
DECINT =
first:[1-9] rest:[0-9]*
{
return token.createDecInteger(first, rest);
}
HEXINT =
'0' hex:[Xx] rest:[0-9A-Fa-f]+
{
return token.createHexInteger(hex, rest);
}
OCTINT =
'0' rest:[0-7]*
{
return token.createOctInteger(rest);
}
FLOAT =
opt:'-'? float:(FULLFLOAT / EPSILONONLY)
{
return token.createFloat((opt ? opt : '') + float);
}
FULLFLOAT =
float:(FLOATSTART1 / FLOATSTART2) ep:EPSILON?
{
return float + (ep ? ep : '');
}
FLOATSTART1 =
left:[0-9]+ '.' right:[0-9]*
{
return token.flatten(left) + '.' + token.flatten(right);
}
FLOATSTART2 =
left:[0-9]* '.' right:[0-9]+
{
return token.flatten(left) + '.' + token.flatten(right);
}
EPSILON =
ep:[Ee] opt:[+-]? val:[0-9]+
{
return ep + (opt ? opt : '') + token.flatten(val);
}
EPSILONONLY =
first:[0-9]+ ep:[Ee] opt:[+-]? rest:[0-9]+
{
return token.flatten(first) + ep + (opt ? opt : '') + token.flatten(rest);
}
IDENTIFIER =
opt:'_'? first:[A-Za-z] rest:[0-9A-Z_a-z-]*
{
return token.createIdent(opt, first, rest);
}
STRING =
'"' val:[^"]* '"'
{
return token.createString(token.flatten(val));
}
WHITESPACE =
val:[\t\n\r ]+
{
return token.createWhiteSpace(token.flatten(val));
}
comment =
COMMENT1 / COMMENT2
COMMENT1 =
'//' val:[^\n]*
{
return token.createSingleLineComment(token.flatten(val));
}
COMMENT2 =
'/*' val:(!'*/' .)* '*/'
{
// val is an array of arrays since !'*/' produces a null on success
val = val.map(function (elem) { return elem[1]; });
return token.createMultiLineComment(token.flatten(val));
}
OTHER =
val:[^\t\n\r 0-9A-Za-z]
{
return token.createOther(val);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment