Last active
September 12, 2019 15:14
-
-
Save xyb/a72ac43e22d6f794c2462e17ce11604b to your computer and use it in GitHub Desktop.
A script generate east asian chars list used in https://github.com/robotframework/robotframework/issues/604 . more details https://github.com/robotframework/robotframework/blob/89a9b80bcbf697bbcc883b21fb8afea1331027d8/src/robot/utils/charwidth.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# Author: Xie Yanbo <xieyanbo@gmail.com> | |
# Date: 2012-03-21 | |
# see also: | |
# Unicode Standard Annex #11 | |
# East Asian Width | |
# http://unicode.org/reports/tr11/ | |
from unicodedata import east_asian_width as eaw | |
def get_wild_char(): | |
for i in xrange(0x10FFFF + 1): | |
try: | |
c = unichr(i) | |
except ValueError: | |
continue | |
if eaw(c) in 'WF': | |
# East Asian Fullwidth (F) | |
# East Asian Wide (W) | |
yield i | |
def get_ranges(): | |
prev = 0 | |
in_range = False | |
range_start = 0 | |
for i in get_wild_char(): | |
if in_range: | |
if prev + 1 == i: | |
prev = i | |
continue | |
else: | |
yield range_start, prev | |
in_range = False | |
range_start = i | |
else: | |
range_start = i | |
in_range = True | |
prev = i | |
if in_range: | |
yield range_start, i | |
def format_py(): | |
for b, e in get_ranges(): | |
range_size = e - b +1 | |
print '%X %X %s' % (b, e, range_size) | |
#format_py() | |
wild_chars = list(get_ranges()) | |
def print_source_code(): | |
map_lines = [] | |
line = ' ' * 8 | |
for b, e in wild_chars: | |
item = '(%s, %s), ' % (b, e) | |
new_line = line + item | |
if len(new_line) < 76: | |
line = new_line | |
else: | |
map_lines.append(line.rstrip()) | |
line = ' '*8 + item | |
if line: | |
map_lines.append(line.rstrip()) | |
print 'wild_chars = [' | |
for line in map_lines: | |
print line | |
print ' ' * 8 + ']' | |
print ''' | |
def is_wild_char(char): | |
c = ord(char) | |
for b, e in wild_chars: | |
if c < b: | |
break | |
if b <= c <= e: | |
return True | |
return False | |
''' | |
print_source_code() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
wild_chars = [ | |
(888, 889), (896, 899), (909, 909), (1316, 1328), (1368, 1368), | |
(1416, 1416), (1420, 1424), (1481, 1487), (1516, 1519), | |
(1526, 1535), (1541, 1541), (1565, 1565), (1631, 1631), | |
(1867, 1868), (1971, 1983), (2044, 2304), (2363, 2363), | |
(2383, 2383), (2390, 2391), (2420, 2426), (2436, 2436), | |
(2446, 2446), (2450, 2450), (2481, 2481), (2484, 2485), | |
(2491, 2491), (2502, 2502), (2506, 2506), (2512, 2518), | |
(2521, 2523), (2532, 2533), (2556, 2560), (2571, 2574), | |
(2578, 2578), (2609, 2609), (2615, 2615), (2619, 2619), | |
(2627, 2630), (2634, 2634), (2639, 2640), (2643, 2648), | |
(2655, 2661), (2679, 2688), (2702, 2702), (2729, 2729), | |
(2740, 2740), (2747, 2747), (2762, 2762), (2767, 2767), | |
(2770, 2783), (2789, 2789), (2802, 2816), (2829, 2830), | |
(2834, 2834), (2865, 2865), (2874, 2875), (2886, 2886), | |
(2890, 2890), (2895, 2901), (2905, 2907), (2916, 2917), | |
(2931, 2945), (2955, 2957), (2966, 2968), (2973, 2973), | |
(2977, 2978), (2982, 2983), (2988, 2989), (3003, 3005), | |
(3012, 3013), (3022, 3023), (3026, 3030), (3033, 3045), | |
(3068, 3072), (3085, 3085), (3113, 3113), (3130, 3132), | |
(3145, 3145), (3151, 3156), (3162, 3167), (3173, 3173), | |
(3185, 3191), (3201, 3201), (3213, 3213), (3241, 3241), | |
(3258, 3259), (3273, 3273), (3279, 3284), (3288, 3293), | |
(3300, 3301), (3315, 3329), (3341, 3341), (3369, 3369), | |
(3387, 3388), (3401, 3401), (3407, 3414), (3417, 3423), | |
(3429, 3429), (3447, 3448), (3457, 3457), (3479, 3481), | |
(3516, 3516), (3519, 3519), (3528, 3529), (3532, 3534), | |
(3543, 3543), (3553, 3569), (3574, 3584), (3644, 3646), | |
(3677, 3712), (3717, 3718), (3723, 3724), (3727, 3731), | |
(3744, 3744), (3750, 3750), (3753, 3753), (3770, 3770), | |
(3775, 3775), (3783, 3783), (3791, 3791), (3803, 3803), | |
(3807, 3839), (3949, 3952), (3981, 3983), (4029, 4029), | |
(4053, 4095), (4251, 4253), (4295, 4303), (4350, 4447), | |
(4516, 4519), (4603, 4607), (4686, 4687), (4697, 4697), | |
(4703, 4703), (4750, 4751), (4790, 4791), (4801, 4801), | |
(4807, 4807), (4881, 4881), (4887, 4887), (4956, 4958), | |
(4990, 4991), (5019, 5023), (5110, 5120), (5752, 5759), | |
(5790, 5791), (5874, 5887), (5909, 5919), (5944, 5951), | |
(5973, 5983), (6001, 6001), (6005, 6015), (6111, 6111), | |
(6123, 6127), (6139, 6143), (6170, 6175), (6265, 6271), | |
(6316, 6399), (6430, 6431), (6445, 6447), (6461, 6463), | |
(6466, 6467), (6511, 6511), (6518, 6527), (6571, 6575), | |
(6603, 6607), (6619, 6621), (6685, 6685), (6689, 6911), | |
(6989, 6991), (7038, 7039), (7084, 7085), (7099, 7167), | |
(7225, 7226), (7243, 7244), (7297, 7423), (7656, 7677), | |
(7959, 7959), (7967, 7967), (8007, 8007), (8015, 8015), | |
(8026, 8026), (8030, 8030), (8063, 8063), (8133, 8133), | |
(8149, 8149), (8176, 8177), (8191, 8191), (8294, 8297), | |
(8307, 8307), (8341, 8351), (8375, 8399), (8434, 8447), | |
(8529, 8530), (8586, 8591), (9002, 9002), (9193, 9215), | |
(9256, 9279), (9292, 9311), (9887, 9887), (9918, 9919), | |
(9925, 9984), (9994, 9995), (10060, 10060), (10067, 10069), | |
(10079, 10080), (10134, 10135), (10175, 10175), (10189, 10191), | |
(11086, 11087), (11094, 11263), (11359, 11359), (11390, 11391), | |
(11500, 11512), (11559, 11567), (11623, 11630), (11633, 11647), | |
(11672, 11679), (11695, 11695), (11711, 11711), (11727, 11727), | |
(11743, 11743), (11826, 12350), (12353, 19903), (19969, 42239), | |
(42541, 42559), (42593, 42593), (42613, 42619), (42649, 42751), | |
(42894, 43002), (43053, 43071), (43129, 43135), (43206, 43213), | |
(43227, 43263), (43349, 43358), (43361, 43519), (43576, 43583), | |
(43599, 43599), (43611, 43611), (43617, 55295), (63745, 64255), | |
(64264, 64274), (64281, 64284), (64317, 64317), (64322, 64322), | |
(64434, 64466), (64833, 64847), (64913, 64913), (64969, 65007), | |
(65023, 65023), (65041, 65055), (65064, 65135), (65277, 65278), | |
(65281, 65376), (65472, 65473), (65481, 65481), (65489, 65489), | |
(65497, 65497), (65502, 65511), (65520, 65528), (65535, 65535), | |
] | |
def is_wild_char(char): | |
c = ord(char) | |
for b, e in wild_chars: | |
if c < b: | |
break | |
if b <= c <= e: | |
return True | |
return False |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment