Created
April 6, 2012 21:43
-
-
Save leafstorm/2323188 to your computer and use it in GitHub Desktop.
PEG.js parser for DCPU-16 assembly language
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Parser for DCPU-16 assembly language. | |
* This parser should accurately parse all of Notch's example code, and most | |
* of the community's code. | |
* It converts instructions to objects with fields {op, params, label}. | |
* op is the lowercased opcode, params is the parameters passed to it (as | |
* values), and label is the label prefixing the statement (null if the | |
* statement is unlabelled). | |
* Values have a type field and one or two others depending on the type. | |
* | |
* Copyright (C) 2012 Matthew Frazier. | |
* Released under the MIT license, see LICENSE for details. | |
*/ | |
//== Elements of programs ==// | |
program = | |
__ first:instruction rest:(__ i:instruction { return i; })* __ { | |
return [first].concat(rest); | |
} | |
instruction = | |
label:(label)? __ op:mundaneName __ params:paramList { | |
return {label: label === "" ? null : label, | |
op: op.toLowerCase(), params: params} | |
} | |
label = | |
':' name:mundaneName { return name; } | |
paramList = | |
first:value rest:(__ ',' __ v:value { return v; })* { | |
return [first].concat(rest); | |
} | |
//== Values ==// | |
value = memoryRef / registerValue / stackRefValue / literalValue / stringData | |
// 0x00-0x07, 0x1b-0x1d: register | |
registerValue = name:registerName { | |
return {type: 'register', name: name}; | |
} | |
memoryRef = | |
('[' __ ref:( | |
// 0x08-0x0f: registerRef | |
(name:stdRegisterName { | |
return {type: 'registerRef', name: name}; | |
}) / | |
// 0x10-0x17: registerOffset | |
(offset:(numLiteral / mundaneName) __ '+' __ name:stdRegisterName { | |
return {type: 'registerOffset', offset: offset, name: name}; | |
}) / | |
// 0x1e: literalRef | |
(addr:(numLiteral / mundaneName) { | |
return {type: 'literalRef', addr: addr}; | |
}) | |
) __ ']') { return ref; } | |
// 0x18-0x1a: push, pop, peek | |
stackRefValue = cmd:stackCommand { return {type: cmd}; } | |
// 0x1f-0x3f: literal | |
literalValue = v:(numLiteral / mundaneName) { | |
return {type: 'literal', value: v}; | |
} | |
// not technically a value | |
stringData = lit:stringLiteral { return {type: 'data', data: lit}; } | |
//== Names and things ==// | |
mundaneName = !magicName name:identifier { return name; } | |
magicName = registerName / stackCommand | |
registerName = stdRegisterName / specialRegisterName | |
stdRegisterName = l:[AaBbCcXxYyZzIiJj] !identLetter { return l.toLowerCase(); } | |
specialRegisterName = name:('PC' / 'pc' / | |
'SP' / 'sp' / | |
'O' / 'o' | |
) !identLetter { return name.toLowerCase(); } | |
stackCommand = name:('PUSH' / 'push' / | |
'POP' / 'pop' / | |
'PEEK' / 'peek' | |
) !identLetter { return name.toLowerCase(); } | |
//== Various kinds of literals ==// | |
identifier "identifier" = | |
ltrs:identLetter+ { return ltrs.join(""); } | |
numLiteral "numeric literal" = binLiteral / hexLiteral / decLiteral | |
decLiteral "decimal literal" = | |
digits:digit+ { return parseInt(digits.join(""), 10); } | |
hexLiteral "hex literal" = | |
'0x' hexits:hexit+ { return parseInt(hexits.join(""), 16); } | |
binLiteral "binary literal" = | |
'0b' bits:bit+ { return parseInt(bits.join(""), 2); } | |
stringLiteral "string literal" = dqString / sqString | |
dqString = | |
'"' contents:( | |
[^"\\] / generalEscape | |
)+ '"' { return contents.join(""); } | |
sqString = | |
"'" contents:( | |
[^'\\] / generalEscape | |
)+ "'" { return contents.join(""); } | |
//== String escapes ==// | |
generalEscape = bsEscape / fsEscape / tabEscape / nlEscape / crEscape | |
/ hexEscape / sqEscape / dqEscape | |
bsEscape = '\\\\' { return '\\'; } | |
fsEscape = '\\/' { return '/'; } | |
tabEscape = '\\t' { return '\t'; } | |
nlEscape = '\\n' { return '\n'; } | |
crEscape = '\\r' { return '\r'; } | |
hexEscape = ('\\x' h:hexit l:hexit) { | |
return String.fromCharCode( | |
parseInt(h + l, 16) | |
); | |
} | |
sqEscape = '\\\'' { return "'"; } | |
dqEscape = '\\"' { return '"'; } | |
//== Lexical conventions ==// | |
// mostly borrowed from the PEG.js parser.pegjs code | |
__ = (whitespace / eol / comment)* | |
identLetter "letter/underscore" = [a-zA-Z_] | |
digit "digit" = [0-9] | |
hexit "hex digit" = [0-9a-fA-F] | |
bit "bit" = [01] | |
comment "comment" = | |
";" [^\n\r\u2028\u2029]* | |
eol "line end" = | |
"\n" / "\r\n" / "\r" / "\u2028" / "\u2029" | |
whitespace "whitespace" = | |
[ \t\v\f\u00A0\uFEFF\u1680\u180E\u2000-\u200A\u202F\u205F\u3000] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
To parse assembly code, generate a parser using PEG.js. You can also paste this into http://pegjs.majda.cz/online and try assembly there.