Skip to content

Instantly share code, notes, and snippets.

@proguy914629bot
Created December 13, 2023 06:53
Show Gist options
  • Save proguy914629bot/5440451cb4522d81d04ea9e4e1e1925d to your computer and use it in GitHub Desktop.
Save proguy914629bot/5440451cb4522d81d04ea9e4e1e1925d to your computer and use it in GitHub Desktop.
Twemoji Parser for Python
import re
import typing
import emoji
class TwemojiParser:
"""
parser = TwemojiParser()
parser.parse_emoji("🤔") # {"url": "...", "indices": [...], "text": "...", "type": "emoji"}
"""
ASSET_TYPE = typing.Literal["png", "svg"]
def __init__(self):
# self.vs16_regex = re.compile("\uFE0F")
# self.zero_width_joiner = "\u200d"
...
# def remove_vs16s(self, raw_emoji: str) -> str:
# if self.zero_width_joiner not in raw_emoji:
# return re.sub(self.vs16_regex, '', raw_emoji)
# else:
# return raw_emoji
@staticmethod
def get_twemoji_url(codepoints: str, svg: bool) -> str:
if svg:
return f"https://twemoji.maxcdn.com/v/latest/svg/{codepoints}.svg"
else:
return f"https://twemoji.maxcdn.com/v/latest/72x72/{codepoints}.png"
def parse(self, text: str, *, svg: bool = False) -> list[dict[str, str | list[int]]]:
asset_type = "svg" if svg else "png"
emojis = emoji.emoji_list(text)
entities = []
for emoji_dict in emojis:
emoji_text = emoji_dict["emoji"]
codepoints = "-".join(
hex(ord(c))[2:] for c in emoji_text
) # "-".join(hex(ord(c))[2:] for c in self.remove_vs16s(emoji_text))
entities.append(
{
"url": self.get_twemoji_url(codepoints, asset_type) if codepoints else "",
"indices": [emoji_dict["match_start"], emoji_dict["match_end"]],
"text": emoji_text,
"type": "emoji",
}
)
return entities
def __call__(self, text: str, *, svg: bool = False) -> list[dict[str, str | list[int]]]:
return self.parse(text, svg=svg)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment