-
-
Save japhb/7b8a68da1b4b6c201e5334c1269b951a to your computer and use it in GitHub Desktop.
japhb's Unicode search/display script 'u'
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env raku | |
use sigpipe; | |
use Text::MiscUtils::Layout; | |
sub show-cp($cp) { | |
my $char = $cp.chr; | |
my $width = duospace-width($char); | |
my $spacer = ' ' x (2 - $width); | |
my $name = $cp.uniname; | |
my $age = $char.uniprop('Age'); | |
$age = -1 if $age eq 'Unassigned'; | |
my $script = $char.uniprop('Script'); | |
my @props = $char.uniprops.unique; | |
my $skip = $script eq 'Unknown' | |
|| 'Cc' ∈ @props || 'Cn' ∈ @props || 'Cs' ∈ @props; | |
my $glyph = $skip ?? '' !! $char; | |
# U+202D = LEFT-TO-RIGHT OVERRIDE (prevents column flipping for RTL characters) | |
my $bidi = $char.uniprop('Bidi_Class'); | |
my $ltro = $*IGNORE-RTL || $bidi eq 'L' ?? '' !! "\x[202D]"; | |
try printf "%5X $ltro%s$ltro%s %d %4.1f %-5s %-11s %s\n", | |
$cp, $glyph, $spacer, $width, $age, ~@props, $script, $name; | |
say "Unable to show codepoint U+{$cp.base(16)}:\n$!" if $!; | |
} | |
#| Describe a single unicode character | |
multi sub MAIN($character where *.chars == 1, Bool :ignore-rtl($*IGNORE-RTL)) { | |
show-cp($_) for $character.ords; | |
} | |
#| Describe a unicode codepoint in +HEX or U+HEX format | |
multi sub MAIN($codepoint where /^ [u|U]? '+' <:hex>+ $/, Bool :ignore-rtl($*IGNORE-RTL)) { | |
my $cp = $codepoint.subst(/[u|U]? '+'/, '0x').Numeric; | |
show-cp($cp); | |
} | |
#| Decode a string into individual codepoints | |
multi sub MAIN('decode', Str:D $string, Bool :ignore-rtl($*IGNORE-RTL)) { | |
show-cp($_) for $string.ords; | |
} | |
#| Search charnames | |
multi sub MAIN( | |
*@substrings, #= Primary search terms (auto-uppercased as substrings) | |
Str :$regex, #= Refinement regex (case insensitive) | |
Str :$prop, #= Required property | |
Str :$script, #= Required script | |
UInt :$first = 0, #= First codepoint to search | |
UInt :$last = 0x1FFFF, #= Last codepoint to search | |
Real :$age = Real, #= Exact Unicode version | |
Real :$oldest = $age || 0, #= Oldest Unicode version | |
Real :$newest = $age || +~Unicode.version, #= Newest Unicode version | |
Bool :ignore-rtl($*IGNORE-RTL), #= Ignore RTL (don't force LTR formatting) | |
) { | |
# See https://irclog.perlgeek.de/perl6/2016-12-12 | |
# for various conversations on this technique | |
my $sieve = $first..$last; | |
for @substrings -> $ss { | |
my $needle = $ss.uc; | |
$sieve .= grep: { uniname($_).contains($needle) } | |
} | |
$sieve .= grep({ $prop ∈ $_.chr.uniprops }) if $prop; | |
$sieve .= grep({ $script.fc eq $_.chr.uniprop('Script').fc }) if $script; | |
$sieve .= grep({ uniname($_).match(rx:i/<$regex>/) }) if $regex; | |
$sieve .= grep({ my $age = $_.chr.uniprop('Age'); | |
$age eq 'Unassigned' ?? !$oldest | |
!! $oldest <= $age.Rat <= $newest }); | |
show-cp($_) for @$sieve; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment