Skip to content

Instantly share code, notes, and snippets.

@1moita
Created May 22, 2022 17:27
Show Gist options
  • Save 1moita/650cff5404386376236367d607e051d0 to your computer and use it in GitHub Desktop.
Save 1moita/650cff5404386376236367d607e051d0 to your computer and use it in GitHub Desktop.
const is_whitespace = (value) => /\s/.test(value)
const is_alpha = (value) => (value =>
(value > 64 && value < 91) || (value > 96 && value < 123) || (value == 95)) (value.charCodeAt())
const is_numeric = (value) => (value =>
(value > 47 && value < 58)) (value.charCodeAt())
const is_alphanumeric = (value) => (value =>
(value > 47 && value < 58) || (value > 64 && value < 91) || (value > 96 && value < 123) || (value == 95)) (value.charCodeAt())
const keywords = [
'use',
'function',
'end',
'return',
'var',
'const',
'if',
'else',
'elif',
'for',
'in',
'of',
'match'
]
module.exports = function(input) {
let token = []
let position = 0
const get_current = (value = 0) => position + value >= input.length
? '\0' : input[position + value]
const tokenize_double = (initial) => {
let result = get_current() == '='
? initial + '='
: ['+', '-', '.'].includes(get_current()) && get_current() == initial
? initial + get_current()
: initial
if(result != initial) position++
return result
}
const tokenize_identifier = () => {
let begin = position
while(is_alphanumeric(get_current())) position++
let literal = input.substring(begin, position)
let type = keywords.includes(literal) ? 'keyword' : 'identifier'
return { type, literal }
}
const tokenize_string = (symbol) => {
let begin = position
while(get_current() != '\0' && get_current() != '\n') {
if(get_current() == '\\') {
position++
if(['\'', '\n', '"'].includes(get_current()) || is_alphanumeric(get_current())) {
position++
} else {
throw new SyntaxError('deu merda 2')
}
}
if(get_current() == symbol) break
position++
}
if(get_current() != symbol) throw new SyntaxError('deu merda')
position++
return { type: 'string', literal: input.substring(begin, position - 1) }
}
const tokenize_number = () => {
let begin = position
let type = 'integer'
while(is_numeric(get_current()) || get_current() == '.') {
if(get_current() == '.') {
if(!is_numeric(get_current(1))) throw new SyntaxError('deu merda 3')
if(type == 'integer') type = 'float'
else throw new SyntaxError('deu merda 4')
}
position++
}
return { type, literal: input.substring(begin, position) }
}
while(position <= input.length) {
while(is_whitespace(get_current())) {
if(get_current() == '\n') token.push({ type: 'symbol', literal: get_current() })
position++
}
let current = get_current()
switch(current) {
case '\0':
position++
token.push({ type: 'eof', literal: get_current() })
break
case '+':
case '-':
case '*':
case '/':
case '%':
case '=':
case '!':
case '>':
case '<':
position++
token.push({ type: 'operator', literal: tokenize_double(current) })
break
case '(':
case ')':
case '[':
case ']':
case '{':
case '}':
case ':':
case ';':
case ',':
position++
token.push({ type: 'symbol', literal: current })
break
case '.':
position++
token.push({ type: 'symbol', literal: tokenize_double(current) })
break
case '"':
case '\'':
position++
token.push(tokenize_string(current))
break
default:
if(is_numeric(get_current())) {
token.push(tokenize_number())
} else if(is_alpha(get_current())) {
token.push(tokenize_identifier())
} else {
position++
token.push({ type: 'unknown', literal: current })
}
break
}
}
return token
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment