Created
May 22, 2022 17:27
-
-
Save 1moita/650cff5404386376236367d607e051d0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const is_whitespace = (value) => /\s/.test(value) | |
const is_alpha = (value) => (value => | |
(value > 64 && value < 91) || (value > 96 && value < 123) || (value == 95)) (value.charCodeAt()) | |
const is_numeric = (value) => (value => | |
(value > 47 && value < 58)) (value.charCodeAt()) | |
const is_alphanumeric = (value) => (value => | |
(value > 47 && value < 58) || (value > 64 && value < 91) || (value > 96 && value < 123) || (value == 95)) (value.charCodeAt()) | |
const keywords = [ | |
'use', | |
'function', | |
'end', | |
'return', | |
'var', | |
'const', | |
'if', | |
'else', | |
'elif', | |
'for', | |
'in', | |
'of', | |
'match' | |
] | |
module.exports = function(input) { | |
let token = [] | |
let position = 0 | |
const get_current = (value = 0) => position + value >= input.length | |
? '\0' : input[position + value] | |
const tokenize_double = (initial) => { | |
let result = get_current() == '=' | |
? initial + '=' | |
: ['+', '-', '.'].includes(get_current()) && get_current() == initial | |
? initial + get_current() | |
: initial | |
if(result != initial) position++ | |
return result | |
} | |
const tokenize_identifier = () => { | |
let begin = position | |
while(is_alphanumeric(get_current())) position++ | |
let literal = input.substring(begin, position) | |
let type = keywords.includes(literal) ? 'keyword' : 'identifier' | |
return { type, literal } | |
} | |
const tokenize_string = (symbol) => { | |
let begin = position | |
while(get_current() != '\0' && get_current() != '\n') { | |
if(get_current() == '\\') { | |
position++ | |
if(['\'', '\n', '"'].includes(get_current()) || is_alphanumeric(get_current())) { | |
position++ | |
} else { | |
throw new SyntaxError('deu merda 2') | |
} | |
} | |
if(get_current() == symbol) break | |
position++ | |
} | |
if(get_current() != symbol) throw new SyntaxError('deu merda') | |
position++ | |
return { type: 'string', literal: input.substring(begin, position - 1) } | |
} | |
const tokenize_number = () => { | |
let begin = position | |
let type = 'integer' | |
while(is_numeric(get_current()) || get_current() == '.') { | |
if(get_current() == '.') { | |
if(!is_numeric(get_current(1))) throw new SyntaxError('deu merda 3') | |
if(type == 'integer') type = 'float' | |
else throw new SyntaxError('deu merda 4') | |
} | |
position++ | |
} | |
return { type, literal: input.substring(begin, position) } | |
} | |
while(position <= input.length) { | |
while(is_whitespace(get_current())) { | |
if(get_current() == '\n') token.push({ type: 'symbol', literal: get_current() }) | |
position++ | |
} | |
let current = get_current() | |
switch(current) { | |
case '\0': | |
position++ | |
token.push({ type: 'eof', literal: get_current() }) | |
break | |
case '+': | |
case '-': | |
case '*': | |
case '/': | |
case '%': | |
case '=': | |
case '!': | |
case '>': | |
case '<': | |
position++ | |
token.push({ type: 'operator', literal: tokenize_double(current) }) | |
break | |
case '(': | |
case ')': | |
case '[': | |
case ']': | |
case '{': | |
case '}': | |
case ':': | |
case ';': | |
case ',': | |
position++ | |
token.push({ type: 'symbol', literal: current }) | |
break | |
case '.': | |
position++ | |
token.push({ type: 'symbol', literal: tokenize_double(current) }) | |
break | |
case '"': | |
case '\'': | |
position++ | |
token.push(tokenize_string(current)) | |
break | |
default: | |
if(is_numeric(get_current())) { | |
token.push(tokenize_number()) | |
} else if(is_alpha(get_current())) { | |
token.push(tokenize_identifier()) | |
} else { | |
position++ | |
token.push({ type: 'unknown', literal: current }) | |
} | |
break | |
} | |
} | |
return token | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment