Skip to content

Instantly share code, notes, and snippets.

@Floofies
Last active January 15, 2018 10:19
Show Gist options
  • Save Floofies/aca8d37ee348294a61d6af04e94a144b to your computer and use it in GitHub Desktop.
Save Floofies/aca8d37ee348294a61d6af04e94a144b to your computer and use it in GitHub Desktop.
Basic Lexical Scanner
const SEMICOLON = [[59, 59]];
const SPACE = [[32, 32]];
const NUMERIC = [[48, 57]];
const ALPHA = [[65, 90], [97, 122]];
const ALPHA_SPACE = [...SPACE, ...ALPHA];
const ALPHA_NUMERIC = [...NUMERIC, ...ALPHA];
const ALPHA_NUMERIC_SPACE = [...SPACE, ...ALPHA_NUMERIC];
// Returns the first contiguous substring in `string`, starting at `start`, which precedes symbols found in `nt`.
function scanToken(string, start = 0, nt = [" "]) {
var subString = "";
var char;
_scanString: for (var loc = start; char = string[loc], loc < string.length; loc++) {
_scanNonterminals: for (var ntString of nt) {
var ntChar;
_matchNonterminal: for (var mLoc = 0; ntChar = ntString[mLoc], mLoc < ntString.length; mLoc++) {
if (string[loc + mLoc] !== ntChar) {
break _matchNonterminal;
}
if (mLoc === ntString.length - 1) {
break _scanString;
}
}
}
subString += char;
}
return subString;
}
// Returns the first contiguous substring in `string`, starting at `start`, which precedes symbols found in `nt`.
function scanRange(string, start = 0, ranges = SPACE) {
if ((typeof ranges) === "string") {
ranges = getRanges(ranges);
}
var subString = "";
var char;
var code;
_scanString: for (var loc = start; loc < string.length; loc++) {
char = string[loc];
code = char.charCodeAt(0);
_scanRange: for (var range of ranges) {
if (code >= range[0] && code <= range[1]) {
break _scanString;
}
}
subString += char;
}
return subString;
}
// Returns an ordered array containing the unique alphabet of `string`.
function getAlphabet(string) {
return Array.from(string)
.map(symbol => symbol.charCodeAt(0))
.sort((t1, t2) => t1 > t2)
.filter((symbol, loc, symbols) => loc === 0 || symbol !== symbols[loc - 1]);
}
function getRanges(string) {
var symbols = getAlphabet(string);
var codeRanges = [];
// Reduce contiguous ranges of codes down to arrays of two.
symbols.forEach(function (symbol, loc) {
var symbol = symbols[loc]
if (loc !== 0 && symbol === symbols[loc - 1] + 1) {
codeRanges[codeRanges.length - 1].push(symbol);
} else {
codeRanges.push([symbol]);
}
});
// Complete the ranges for single codes.
codeRanges.forEach(function (range) {
if (range.length === 1) {
range.push(range[0]);
}
});
return codeRanges;
}
function findClosingPos(string, start = 0, opener = "{", closer = "}") {
var depth = 1;
var char;
_scanString: for (var loc = start; char = string[loc], loc < string.length; loc++) {
if (char === closer) {
depth--;
if (depth === 0) {
return loc;
}
} else if (char === opener) {
depth++;
}
}
return null;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment