Skip to content

Instantly share code, notes, and snippets.

@thoughtsunificator
Created September 29, 2021 16:32
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save thoughtsunificator/f97dfc68106996081bbb21461f499358 to your computer and use it in GitHub Desktop.
Save thoughtsunificator/f97dfc68106996081bbb21461f499358 to your computer and use it in GitHub Desktop.
Tweet parser
export const STATE_SEARCHING = "STATE_SEARCHING"
export const STATE_TOKENIZING_HASHTAG = "STATE_TOKENIZING_HASHTAG"
export const STATE_TOKENIZING_MENTION = "STATE_TOKENIZING_MENTION"
export const PREFIX_HASHTAG = "#"
export const PREFIX_MENTION = "@"
export const tokenize = function(text) {
let characters = [...text]
let state = STATE_SEARCHING
let tokens = []
let token = {
buffer: "",
bufferIndex: null
}
for(const [index, character] of characters.entries()) {
if(state === STATE_SEARCHING && character === PREFIX_HASHTAG) {
state = STATE_TOKENIZING_HASHTAG
token.bufferIndex = index
token.buffer += character
} else if(state === STATE_SEARCHING && character === PREFIX_MENTION) {
state = STATE_TOKENIZING_MENTION
token.bufferIndex = index
token.buffer += character
} else if(state === STATE_TOKENIZING_HASHTAG && (character === PREFIX_HASHTAG || character === " " || character === "\n" || index === characters.length - 1)
|| state === STATE_TOKENIZING_MENTION && (character === PREFIX_MENTION || character === " " || character === "\n" || index === characters.length - 1)) {
if(index === characters.length - 1) {
token.buffer += character
}
state = STATE_SEARCHING
tokens.push(token)
token = {
buffer: "",
bufferIndex: null
}
} else if(state === STATE_TOKENIZING_HASHTAG || state === STATE_TOKENIZING_MENTION) {
token.buffer += character
}
}
return tokens
}
export const parse = function(text) {
let tree = {
hashtags: [],
mentions: []
}
let tokens = tokenize(text)
for(let token of tokens) {
if(token.buffer.substring(0, 1) === PREFIX_HASHTAG) {
token.buffer = token.buffer.substring(1)
tree.hashtags.push(token)
} else if(token.buffer.substring(0, 1) === PREFIX_MENTION) {
token.buffer = token.buffer.substring(1)
tree.mentions.push(token)
}
}
return tree
}
export const replace = function(text) {
let tokens = tokenize(text)
for (let [index, token] of tokens.entries()) {
let replacement
if (token.buffer.substring(0, 1) === "#") {
replacement = "<a href='/hashtag/" + token.buffer.substring(1) + "'>" + token.buffer + "</a>"
} else if (token.buffer.substring(0, 1) === "@") {
replacement = "<a href='/profile/" + token.buffer.substring(1) + "'>" + token.buffer + "</a>"
}
let diff = replacement.length - token.buffer.length
text = text.substring(0, token.bufferIndex) + replacement + text.substring(token.bufferIndex + token.buffer.length)
for (let i = index; i < tokens.length; i++) {
tokens[i].bufferIndex += diff
}
}
return text
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment