Skip to content

Instantly share code, notes, and snippets.

Last active June 3, 2024 06:15
Show Gist options
  • Save msenol86/44082269be46aa446ccda9d02202e523 to your computer and use it in GitHub Desktop.
Save msenol86/44082269be46aa446ccda9d02202e523 to your computer and use it in GitHub Desktop.
import urllib.request
import re
EMOJI_TEST_FILENAME = "emoji-test.txt"
def download_latest_emoji_test_data() :
response = urllib.request.urlopen(EMOJI_DATA_URL)
emoji_test_file =
with open(EMOJI_TEST_FILENAME, "wb") as tmp_file:
def convert_unicode_chars_2(p_string_in_unicode):
:param p_string: u'1F469 200D 1F469 200D 1F467 200D 1F466'
:return: u'\U0001f469\u200d\U0001f469\u200d\U0001f467\u200d\U0001f466'
return u"".join([chr(int(a_char, base=16)) for a_char in p_string_in_unicode.split(u" ")])
def get_normalize_short_names(p_string_in_unicode):
family: woman, woman, girl, girl -> family_woman_woman_girl_girl
UP! button -> UP_button
Japanese “free of charge” button -> Japanese_free_of_charge_button
flag: Cocos (Keeling) Islands -> flag_Cocos_Keeling_Islands
three o’clock -> three_oclock
rescue worker’s helmet -> rescue_workers_helmet
flag: São Tomé & Príncipe -> flag_São_Tomé_Príncipe
:param p_string: input format men holding hands: dark skin tone, medium-dark skin tone
:return: output format men_holding_hands_dark_skin_tone_medium-dark_skin_tone
# remove all non alphanumeric chars except space and dash
temp1 = "".join([c for c in p_string_in_unicode if c.isalnum() or c in [" ", "-"]])
# replace multiple spaces with single space
temp2 = re.sub(r" +", r" ", temp1)
# replace all spaces with underscore
temp3 = re.sub(r" ", r"_", temp2)
return temp3
def load_emoji_lookup():
try :
with open(EMOJI_TEST_FILENAME, "r", encoding="utf8") as unicode_data:
unicode_data_rows = unicode_data.readlines()
quailifed_emojis = [a_line for a_line in unicode_data_rows if'(minimally-qualified #|fully-qualified #)', a_line)]
tmp_dict = dict()
print("Count of quailifed_emojis: " + str(len(quailifed_emojis)))
for an_emoji_row in quailifed_emojis:
emoji_shortname = " ".join(an_emoji_row.split("#")[1].split(" ")[2:]).strip()
emoji_in_unicode = an_emoji_row.split("#")[1].split(" ")[1].strip()
tmp_dict[emoji_in_unicode] = u"" + get_normalize_short_names(emoji_shortname)
return tmp_dict
except FileNotFoundError as e:
print(EMOJI_TEST_FILENAME + " file not found. Downloading it ...")
print("File downloaded. Re-run the script")
return None
def is_contains_emoji(p_string_in_unicode):
Instead of searching all chars of a text in a emoji lookup dictionary this function just
checks whether any char in the text is in unicode emoji range
It is much faster than a dictionary lookup for a large text
However it only tells whether a text contains an emoji. It does not return the found emojis
range_min = ord(u'\U0001F300') # 127744
range_max = ord(u"\U0001FAF6") # 129782
range_min_2 = 126980
range_max_2 = 127569
range_min_3 = 169
range_max_3 = 174
range_min_4 = 8205
range_max_4 = 12953
if p_string_in_unicode:
for a_char in p_string_in_unicode:
char_code = ord(a_char)
if range_min <= char_code <= range_max:
# or range_min_2 <= char_code <= range_max_2 or range_min_3 <= char_code <= range_max_3 or range_min_4 <= char_code <= range_max_4:
return True
elif range_min_2 <= char_code <= range_max_2:
return True
elif range_min_3 <= char_code <= range_max_3:
return True
elif range_min_4 <= char_code <= range_max_4:
return True
return False
return False
def test_emoji_range_with(emoji_dict : dict) :
passed_items = dict()
for a_key, a_value in emoji_dict.items():
if is_contains_emoji(a_key):
passed_items[a_key] = a_value
set1 = set(emoji_dict.items())
set2 = set(passed_items.items())
return set1 ^ set2
emoji_dict = load_emoji_lookup()
if emoji_dict is not None:
missing_emojis_list = [p_key for p_key in test_emoji_range_with(emoji_dict) if p_key[0] != ""]
if len(missing_emojis_list) != 0:
print("List of emojis which are not in the range: " + str([p_key for p_key in test_emoji_range_with(emoji_dict) if p_key[0] != ""]))
print("Range values are correct and detects all emojis")
Copy link

msenol86 commented Jul 14, 2020

Works with Python3. No extra packages required. Supports Emoji 14.04

Copy link

tejasvi commented Jun 12, 2021

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment