Skip to content

Instantly share code, notes, and snippets.

@dejurin
Created January 1, 2024 15:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dejurin/27da1526ab9ea5fe291084997483622f to your computer and use it in GitHub Desktop.
Save dejurin/27da1526ab9ea5fe291084997483622f to your computer and use it in GitHub Desktop.
import os
import requests
import json
import re
from slugify import slugify
EMOJI_VERSION = '15.1'
def main():
text = get_test_file(EMOJI_VERSION)
print("Format text to json...")
collected = parse_emoji_text(text)
print(f"Processed emojis: {len(collected['full'])}")
print("Write files: emoji.json, emoji-compact.json\n")
write_files(collected)
print(collected['comments'])
def get_test_file(ver):
url = f"https://unicode.org/Public/emoji/{ver}/emoji-test.txt"
print(f"Fetch emoji-test.txt (v{EMOJI_VERSION})", end="", flush=True)
response = requests.get(url)
response.raise_for_status()
text = response.text
print()
return text
def parse_emoji_text(text):
lines = text.strip().split('\n')
collected = {'comments': '', 'full': [], 'compact': []}
group = subgroup = None
for line in lines:
line = line.strip()
if line.startswith('# group: '):
print(f" Processing {line[9:]}...")
group = line[9:]
elif line.startswith('# subgroup: '):
subgroup = line[12:]
elif line.startswith('#'):
collected['comments'] += line + '\n'
else:
meta = parse_line(line)
if meta:
meta['category'] = f"{group} ({subgroup})"
meta['group'] = group
meta['subgroup'] = subgroup
collected['full'].append(meta)
else:
collected['comments'] = collected['comments'].strip() + '\n\n'
return collected
def parse_line(line):
data = line.strip().split(None, 2)
if len(data) != 3:
return None
codes, status, char_and_name = data
match = re.match(r'^(.*?)\s+(E\d+\.\d+)\s+(.*?)$', char_and_name)
if match:
char = match.group(1)
name = match.group(3)
slug = slugify(name)
else:
return None
return {'codes': codes, 'name': name, 'slug': slug}
def write_files(data):
with open(rel('../src/data/emoji.json'), 'w', encoding='utf8') as full_file:
json.dump(data['full'], full_file, ensure_ascii=False, indent=2)
def rel(*args):
return os.path.abspath(os.path.join(os.path.dirname(__file__), *args))
if __name__ == "__main__":
main()
"""
[
{
"codes": "1F432",
"name": "dragon face",
"slug": "dragon-face",
"category": "Animals & Nature (animal-reptile)",
"group": "Animals & Nature",
"subgroup": "animal-reptile"
},
...
]
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment