Skip to content

Instantly share code, notes, and snippets.

@nchanged
Created May 21, 2019 07:31
Show Gist options
  • Save nchanged/5bae9efeaf07896c3bc1911a50b7bd22 to your computer and use it in GitHub Desktop.
Save nchanged/5bae9efeaf07896c3bc1911a50b7bd22 to your computer and use it in GitHub Desktop.
import { Context } from "./Context";
import { CharTypes, CharType } from "./chars";
import { OneCharToken } from "./OneCharToken";
import { CharCategory } from "./CharCategory";
import { TokenType } from "./TokenType";
export interface ITokenizeProps {
str: string;
locations?: boolean;
onToken?: (token: any) => void;
}
function captureComment(c: Context): boolean {
const next = CharTypes[c.str.charCodeAt(c.index + 1)];
if (next & CharType.ASTERISK) {
for (let i = c.index; i <= c.length; i++) {
if (
CharTypes[c.str.charCodeAt(i)] & CharType.ASTERISK &&
CharTypes[c.str.charCodeAt(i + 1)] & CharType.FORWARD_SLASH
) {
c.index = i + 1;
return true;
}
}
} else if (next & CharType.FORWARD_SLASH) {
for (let i = c.index; i <= c.length; i++) {
if (CharTypes[c.str.charCodeAt(i)] & CharType.END_OF_LINE) {
c.index = i + 1;
return true;
}
}
}
}
function captureHTTPToken(c: Context) {
let token = "http";
for (let i = c.index; i <= c.length; i++) {
if (CharTypes[c.str.charCodeAt(i)] & CharCategory.HTTP_TOKEN_END) {
c.onToken({ type: TokenType.VALID, value: token });
c.index = i;
return true;
} else {
token += c.str[i];
}
}
}
function captureToken(c: Context): boolean {
let token = "";
for (let i = c.index; i <= c.length; i++) {
const SubType = CharTypes[c.str.charCodeAt(i)];
if (SubType & CharCategory.TOKEN_END || !SubType) {
// special treatment for a stupid http token
if (token === "http") {
c.index = i;
return captureHTTPToken(c);
}
c.onToken({ type: TokenType.VALID, value: token });
c.index = i;
return true;
} else token += c.str[i];
}
}
function captureDigit(c: Context): boolean {
let type = TokenType.DIGIT;
let token = "";
let unit = "";
for (let i = c.index; i <= c.length; i++) {
const SubType = CharTypes[c.str.charCodeAt(i)];
if (SubType & CharType.DIGIT) token += c.str[i];
if (SubType & CharCategory.TOKEN_END || !SubType) {
if (unit) c.onToken({ type: type, value: token, unit: unit });
else c.onToken({ type: type, value: token });
c.index = i;
return true;
} else {
if (SubType & CharType.PERCENT) {
c.index = i + 1;
c.onToken({ type: TokenType.PERCENT, value: token });
return true;
} else if (SubType & CharType.ALPHABET) {
type = TokenType.UNIT;
unit += c.str[i];
}
}
}
}
/**
* Try all the characters.
* If found, return true and leave the index iteration up to the tokenizer groups
* @param c : Context
*/
function parseChar(c: Context): boolean {
const num = c.str.charCodeAt(c.index);
const char = CharTypes[num];
if (char & CharType.FORWARD_SLASH) {
return captureComment(c);
} else if (OneCharToken[num]) {
c.onToken({ type: OneCharToken[num] });
c.index++;
return true;
} else if (char & CharCategory.TOKEN_START) {
return captureToken(c);
} else if (char & CharType.DIGIT) {
return captureDigit(c);
}
}
export function tokenize(props: ITokenizeProps) {
const c: Context = {
index: 0,
str: props.str,
length: props.str.length,
onToken: props.onToken,
locations: props.locations
};
while (c.index <= c.str.length) {
if (!parseChar(c)) c.index++;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment