Last active
July 11, 2017 19:49
-
-
Save arp242/38e41efae4527a48f6efe3b2c05c52d5 to your computer and use it in GitHub Desktop.
Generate unicode ranges
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# | |
# http://www.unicode.org/reports/tr44/#UnicodeData.txt | |
# http://www.unicode.org/Public/UNIDATA/UnicodeData.txt | |
total="" | |
prev_dec= | |
range_start_dec= | |
# Match general categories Lu, Ll, Lt, LC, Lm, Lo, L | |
for line_hex in $(awk -F ';' 'substr($3, 0, 1) == "L" {print $1}' UnicodeData.txt); do | |
line_dec=$(( 0x$line_hex )) | |
# Special case for first iteration. | |
if [ "$range_start_dec" = "" ]; then | |
range_start_dec=$line_dec | |
prev_dec=$line_dec | |
continue | |
fi | |
# There's a gap, append to total. Set new start. | |
if [ $(( $line_dec - 1 )) -ne $prev_dec ]; then | |
total="$total$(printf '\\u%0*x-\\u%0*x' 4 $range_start_dec 4 $prev_dec)" | |
range_start_dec=$line_dec | |
fi | |
prev_dec=$line_dec | |
done | |
echo "$total" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment