A Twemoji Parser written in Python
This was inspired by @twitter/twemoji-parser.
A Twemoji Parser written in Python
This was inspired by @twitter/twemoji-parser.
import re | |
import typing | |
import emoji | |
class TwemojiParser: | |
""" | |
parser = TwemojiParser() | |
parser.parse_emoji("🤔") # {"url": "...", "indices": [...], "text": "...", "type": "emoji"} | |
""" | |
ASSET_TYPE = typing.Literal["png", "svg"] | |
def __init__(self): | |
# self.vs16_regex = re.compile("\uFE0F") | |
# self.zero_width_joiner = "\u200d" | |
... | |
# def remove_vs16s(self, raw_emoji: str) -> str: | |
# if self.zero_width_joiner not in raw_emoji: | |
# return re.sub(self.vs16_regex, '', raw_emoji) | |
# else: | |
# return raw_emoji | |
@staticmethod | |
def get_twemoji_url(codepoints: str, svg: bool) -> str: | |
if svg: | |
return f"https://twemoji.maxcdn.com/v/latest/svg/{codepoints}.svg" | |
else: | |
return f"https://twemoji.maxcdn.com/v/latest/72x72/{codepoints}.png" | |
def parse(self, text: str, *, svg: bool = False) -> list[dict[str, str | list[int]]]: | |
asset_type = "svg" if svg else "png" | |
emojis = emoji.emoji_list(text) | |
entities = [] | |
for emoji_dict in emojis: | |
emoji_text = emoji_dict["emoji"] | |
codepoints = "-".join( | |
hex(ord(c))[2:] for c in emoji_text | |
) # "-".join(hex(ord(c))[2:] for c in self.remove_vs16s(emoji_text)) | |
entities.append( | |
{ | |
"url": self.get_twemoji_url(codepoints, asset_type) if codepoints else "", | |
"indices": [emoji_dict["match_start"], emoji_dict["match_end"]], | |
"text": emoji_text, | |
"type": "emoji", | |
} | |
) | |
return entities | |
def __call__(self, text: str, *, svg: bool = False) -> list[dict[str, str | list[int]]]: | |
return self.parse(text, svg=svg) |