Skip to content

Instantly share code, notes, and snippets.

@treyhunner
Created January 21, 2022 23:07
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save treyhunner/c722dffdb7b78ed5475d8865a64bb260 to your computer and use it in GitHub Desktop.
Save treyhunner/c722dffdb7b78ed5475d8865a64bb260 to your computer and use it in GitHub Desktop.
Generates a JSON file of all unicode characters, their name, and their aliases. This is a hack but it works.
"""
Tool that names each unicode character based on their name or their aliases
Must be run from the cpython repo root directory:
https://github.com/python/cpython/tree/3.10
Relies on Tools.unicode being an importable path due to implicit packages
"""
from contextlib import redirect_stderr, redirect_stdout
from io import StringIO
import json
from pathlib import Path
from unicodedata import unidata_version
from Tools.unicode.makeunicodedata import UnicodeData, makeunicodename
def get_characters():
unicode_data_path = Path(__file__).parent / "unicode_data.json"
if unicode_data_path.exists():
# If unicode_data.json path exists, load data from there
with unicode_data_path.open(mode="rt") as f:
characters = json.load(f)
else:
# Use CPython's makeunicodedata tool which downloads needed data
with redirect_stdout(StringIO()), redirect_stderr(StringIO()):
unicode = UnicodeData(unidata_version)
makeunicodename(unicode, 1)
characters = {
chr(int(r.codepoint, 16)): {"name": r.name, "aliases": []}
for r in unicode.table
if r
}
for alias, codepoint in unicode.aliases:
characters[chr(codepoint)]["aliases"].append(alias)
with unicode_data_path.open(mode="wt") as f:
json.dump(characters, f)
return characters
characters = get_characters()
def name(character):
r"""Like unicodedata.name, but works for unnamed characters like \n."""
record = characters[character]
if record["name"].startswith("<"): # If no name, use the first alias
return record["aliases"][0]
return record["name"]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment