Skip to content

Instantly share code, notes, and snippets.

@jtanx
Created July 21, 2019 02:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jtanx/bfa7489acc1af53f7937047908ca7704 to your computer and use it in GitHub Desktop.
Save jtanx/bfa7489acc1af53f7937047908ca7704 to your computer and use it in GitHub Desktop.
Combiners parsing for combiners.h
#!/usr/bin/env python3
import sys,os,re
# http://www.unicode.org/reports/tr44/#Canonical_Combining_Class_Values
COMBIN = {
0x000: '0',
0x001: 'FF_UNICODE_Overstrike',
0x202: 'FF_UNICODE_Below|FF_UNICODE_Touching',
0x214: 'FF_UNICODE_Above|FF_UNICODE_Touching',
0x216: 'FF_UNICODE_Above|FF_UNICODE_Right|FF_UNICODE_Touching',
0x228: 'FF_UNICODE_Above|FF_UNICODE_Left',
0x220: 'FF_UNICODE_Below',
0x230: 'FF_UNICODE_Above',
0x232: 'FF_UNICODE_Above|FF_UNICODE_Right',
0x233: 'FF_UNICODE_Below|FF_UNICODE_Joins2',
0x234: 'FF_UNICODE_Above|FF_UNICODE_Joins2',
0x240: 'FF_UNICODE_Below', # Apparently we don't have something for Iota_Subscript
}
def printit(count, uni, parsed):
if (count % 16) == 0:
print(' {0},\t/* 0x{1:X} */'.format(parsed, uni))
else:
print(' {0},'.format(parsed))
def run(fn):
distinct = set()
count = 0
lo, hi = 0x300, 0x36F
with open(fn) as fp:
for line in fp:
parts = line.strip().split(';')
uni = int(parts[0], 16)
if uni >= lo and uni <= hi:
for off in range(lo + count, uni):
printit(count, off, 0)
count += 1
desc = parts[1]
combining = int(parts[3], 16)
parsed = COMBIN[combining]
distinct.add(combining)
printit(count, uni, parsed)
count += 1
#print('{0:x} {1} {2:x} {3}'.format(uni, desc, combining, parsed))
#print(['%x' % x for x in sorted(distinct)], count)
if __name__ == '__main__':
run('UnicodeData.txt')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment