Skip to content

Instantly share code, notes, and snippets.

@lethalbit
Created March 10, 2018 02:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lethalbit/979473908c9ed1c990fe5ad27fd5bade to your computer and use it in GitHub Desktop.
Save lethalbit/979473908c9ed1c990fe5ad27fd5bade to your computer and use it in GitHub Desktop.
#!/bin/python3
from os import path
from urllib.request import urlretrieve
classes = ['Lu', 'Ll', 'Lt', 'Lm', 'Lo']
entr = None
if not path.isfile('UnicodeData.txt'):
urlretrieve('https://www.unicode.org/Public/UNIDATA/UnicodeData.txt', 'UnicodeData.txt')
with open('UnicodeData.txt') as ud:
entr = [ln.rstrip('\n') for ln in ud]
cps = [cdp.split(';')[0] for cdp in entr if cdp.split(';')[2] in classes]
last = 0
br = 0
for cp in cps:
nxt = int(cp, 16)
if last == 0:
last = nxt
br = nxt
continue
if last + 1 != nxt:
if br == last:
print('U+{}'.format(format(br, '05X')))
else:
print('U+{}...U+{}'.format(format(br,'05X'),format(last, '05X')))
br = nxt
last = nxt
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment