Skip to content

Instantly share code, notes, and snippets.

@blcarlson01
Created April 14, 2023 14:45
Show Gist options
  • Save blcarlson01/c0b748801e917d60e6e1ab964f2a4bd9 to your computer and use it in GitHub Desktop.
Save blcarlson01/c0b748801e917d60e6e1ab964f2a4bd9 to your computer and use it in GitHub Desktop.
ujson : https://pypi.org/project/ujson/
****
__all__ = ("findall", "findall_list", "last_downloaded_timestamp", "replace", "replace_with_desc")
__version__ = "1.1.0"
import datetime
import functools
import logging
import os.path
import re
import sys
import json
logging.getLogger(__name__).addHandler(logging.NullHandler())
# Download endpoint
EMOJI_VERSION = "13.1"
URL = f"https://unicode.org/Public/emoji/{EMOJI_VERSION}/emoji-test.txt"
# This variable is updated automatically from scripts/download_codes.py
_LDT = datetime.datetime(2021, 7, 18, 19, 57, 25, 20304, tzinfo=datetime.timezone.utc)
# Load codes from file and compile regex pattern
with open(os.path.join(os.path.dirname(__file__), "codes.json")) as f:
_CODE_TO_DESC = json.load(f)
_ESCAPED_CODES = sorted([re.escape(code) for code in _CODE_TO_DESC.keys()], key=len, reverse=True)
_EMOJI_PATTERN = re.compile("|".join(_ESCAPED_CODES))
def findall(string):
"""Find emojis within ``string``.
:param string: The input text to search
:type string: str
:return: A dictionary of ``{emoji: description}``
:rtype: dict
"""
return {f: _CODE_TO_DESC[f] for f in set(_EMOJI_PATTERN.findall(string))}
def findall_list(string, desc=True):
"""Find emojis within ``string``; return a list with possible duplicates.
:param string: The input text to search
:type string: str
:param desc: Whether to return the description rather than emoji
:type desc: bool
:return: A list of ``[description, ...]`` in the order in which they
are found.
:rtype: list
"""
if desc:
return [_CODE_TO_DESC[k] for k in _EMOJI_PATTERN.findall(string)]
else:
return _EMOJI_PATTERN.findall(string)
def replace(string, repl=""):
"""Replace emojis in ``string`` with ``repl``.
:param string: The input text to search
:type string: str
:return: Modified ``str`` with replacements made
:rtype: str
"""
return _EMOJI_PATTERN.sub(repl, string)
def replace_with_desc(string, sep=":"):
"""Replace emojis in ``string`` with their description.
Add a ``sep`` immediately before and after ``string``.
:param string: The input text to search
:type string: str
:param sep: String to put before and after the emoji description
:type sep: str
:return: New copy of ``string`` with replacements made and ``sep``
immediately before and after each code
:rtype: str
"""
found = findall(string)
result = string
for emoji, desc in found.items():
result = result.replace(emoji, sep + desc + sep)
return result
def set_emoji_pattern():
global _EMOJI_PAT
global _CODE_TO_DESC
if _EMOJI_PAT is None:
codes = _load_codes_from_file()
_EMOJI_PAT = _compile_codes(codes)
_CODE_TO_DESC = dict((c, d) for d, clist in codes.items() for c in clist)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment