Skip to content

Instantly share code, notes, and snippets.

@fulcrum6378
Last active July 3, 2023 19:28
Show Gist options
  • Save fulcrum6378/53e2d3390191073f607d6b32c892b842 to your computer and use it in GitHub Desktop.
Save fulcrum6378/53e2d3390191073f607d6b32c892b842 to your computer and use it in GitHub Desktop.
Lists every possible emoji using the 2 TXT files available in the official Unicode website: https://unicode.org/Public/emoji/
import codecs
def uz(s: str, maxim: int) -> str:
while len(s) < maxim:
s = '0' + s
return s
def ucToChar(unicode: str):
if len(unicode) <= 4:
return codecs.decode(bytes('\\u' + uz(unicode, 4), 'utf-8'), 'unicode_escape')
else:
return codecs.decode(bytes('\\U' + uz(unicode, 8), 'utf-8'), 'unicode_escape')
with open('emoji-sequences.txt', 'r', encoding='utf-8') as f:
lines = f.read().split('\n')
with open('emoji-zwj-sequences.txt', 'r', encoding='utf-8') as f:
lines.extend(f.read().split('\n'))
emojis: list = list()
for ln in lines:
if ln == '' or ln.startswith('#') or '# E' not in ln: continue
spl = ln.split(';')[0]
while spl.endswith(' '): spl = spl[:-1]
if '..' not in spl and ' ' not in spl:
try:
emojis.append(ucToChar(spl))
except UnicodeDecodeError:
raise Exception("<" + spl + ">") # ln
elif ' ' in spl:
multi = list()
for x in spl.split(' '): multi.append(ucToChar(x))
emojis.append(''.join(multi))
if multi[-1] == '\ufe0f': # Android compatibility (which adds 10 more kilobytes)
emojis.append(''.join(multi[:-1]))
else:
a, b = spl.split('..')
x = a
while int(x, 16) != int(b, 16) + 1:
emojis.extend(ucToChar(x))
x = hex(int(x, 16) + 1)[2:]
with open('emojis.txt', 'w', encoding='utf-8') as f:
f.write(' '.join(emojis))
# For more info: https://www.unicode.org/reports/tr51/
# How-to: https://stackoverflow.com/questions/51585291/how-to-interpret-unicode-notation-in-python
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment