Skip to content

Instantly share code, notes, and snippets.

@neuschaefer
Created October 6, 2019 19:42
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save neuschaefer/bd3c34e3440c29611c7956d53b695f6f to your computer and use it in GitHub Desktop.
Save neuschaefer/bd3c34e3440c29611c7956d53b695f6f to your computer and use it in GitHub Desktop.
Unicode codepoint lister
#!/usr/bin/python3
import sys
DATAFILE='/usr/share/unicode/UnicodeData.txt'
class Entry:
def __init__(self, line):
fields = line.split(';')
self.codepoint = int(fields[0], 16)
self.name = fields[1]
self.description = fields[10]
def get_description(self):
if self.description: return self.description
elif self.name: return self.name
class Table:
def __init__(self, f=DATAFILE):
lines = open(f).readlines()
self.map = {}
for line in lines:
e = Entry(line)
self.map[e.codepoint] = e
def get_description(self, codepoint):
if isinstance(codepoint, str):
codepoint = ord(codepoint)
if codepoint in self.map:
return self.map[codepoint].get_description()
else:
return '(unknown)'
table = Table()
for string in sys.argv[1:]:
for codepoint in string:
print("U+%04X\t%s\t%s" % (ord(codepoint), codepoint, table.get_description(codepoint)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment