Skip to content

Instantly share code, notes, and snippets.

@alvations
Created March 9, 2020 15:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alvations/a55679184ffd3f74bf4360079c4e93b0 to your computer and use it in GitHub Desktop.
Save alvations/a55679184ffd3f74bf4360079c4e93b0 to your computer and use it in GitHub Desktop.
import requests
from io import StringIO
response = requests.get('https://unicode.org/Public/emoji/13.0/emoji-sequences.txt')
with open('emoji.txt', 'w') as fout:
with StringIO(response.content.decode('utf8')) as fin:
for line in fin:
if line.strip() and not line.startswith('#'):
hexa = line.split(';')[0].split('..')
if len(hexa) == 1: # One codepoint for this emoji
# It's possible that one codepoint has >=1 hexadecimal.
ch = ''.join([chr(int(h, 16)) for h in hexa[0].strip().split(' ')])
print(ch, end='\n', file=fout)
else: # Multiple codepoint for this emoji
start, end = hexa
for ch in range(int(start, 16), int(end, 16)+1):
# Assume that only one hexadecimal per codepoint.
print(ch, end='\n', file=fout)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment