Skip to content

Instantly share code, notes, and snippets.

@thoughtsunificator
Created September 29, 2021 16:33
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save thoughtsunificator/5fea511e211a8fad17035054a2d43e1a to your computer and use it in GitHub Desktop.
Save thoughtsunificator/5fea511e211a8fad17035054a2d43e1a to your computer and use it in GitHub Desktop.
URI Tokenizer
export const STATE_IDENTIFYING = "STATE_IDENTIFYING"
export const STATE_TOKENIZING_NORMAL = "STATE_TOKENIZING_NORMAL"
export const STATE_TOKENIZING_SPECIAL = "STATE_TOKENIZING_SPECIAL"
export const TOKEN_SEPARATOR = "/"
export const TOKEN_SPECIAL_PREFIX = "{"
export const TOKEN_SPECIAL_SUFFIX = "}"
export const TOKEN_TYPE_NORMAL = "normal"
export const TOKEN_TYPE_SPECIAL = "special"
export const TOKEN_TYPE_SEPARATOR = "separator"
/**
* Create tokens out of an URI
* @param [string] text [description]
* @return [array] [description]
*/
export const tokenize = function(text) {
const tokens = []
const characters = [...text]
let state = STATE_IDENTIFYING
let token = {
"type": null,
"buffer": "",
"bufferIndex": null
}
for (const [index, character] of characters.entries()) {
if (state === STATE_IDENTIFYING) {
if (character === TOKEN_SEPARATOR) {
tokens.push({
"type": TOKEN_TYPE_SEPARATOR,
"buffer": character,
"bufferIndex": index
})
} else if (index === characters.length - 1) {
token.buffer += character
token.type = TOKEN_TYPE_NORMAL
tokens.push({...token})
} else if (character === TOKEN_SPECIAL_PREFIX) {
token.bufferIndex = index
token.buffer += character
state = STATE_TOKENIZING_SPECIAL
} else {
token.bufferIndex = index
token.buffer += character
state = STATE_TOKENIZING_NORMAL
}
} else if (state === STATE_TOKENIZING_NORMAL) {
if (character === TOKEN_SEPARATOR) {
token.type = TOKEN_TYPE_NORMAL
tokens.push({...token})
tokens.push({
"type": TOKEN_TYPE_SEPARATOR,
"buffer": character,
"bufferIndex": index
})
token.type = null
token.buffer = ""
token.bufferIndex = null
state = STATE_IDENTIFYING
} else if (index === characters.length - 1) {
token.buffer += character
token.type = TOKEN_TYPE_NORMAL
tokens.push({...token})
} else if (character === TOKEN_SPECIAL_PREFIX) {
if (token.buffer === TOKEN_SEPARATOR) {
token.type = TOKEN_TYPE_SEPARATOR
} else {
token.type = TOKEN_TYPE_NORMAL
}
tokens.push({...token})
// reset token
token.buffer = character
token.type = null
token.bufferIndex = index
state = STATE_TOKENIZING_SPECIAL
} else {
token.buffer += character
}
} else if (state === STATE_TOKENIZING_SPECIAL) {
if (character === TOKEN_SPECIAL_SUFFIX) {
token.buffer += character
token.type = TOKEN_TYPE_SPECIAL
tokens.push({...token})
// reset token
token.buffer = ""
token.type = null
token.bufferIndex = null
state = STATE_IDENTIFYING
} else if (index === characters.length - 1) {
token.buffer += character
token.type = TOKEN_TYPE_NORMAL
tokens.push({...token})
} else {
token.buffer += character
}
}
}
return tokens
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment