Created
September 29, 2021 06:40
-
-
Save nobrowser/4f4342cb95c7e4c186cba01425123871 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env perl | |
# This program parses the keysym definitions from a X11 source distribution | |
# and one or more XCompose specifications files (XCompose files mentioned | |
# later on the command line will override settings from those mentioned | |
# earlier). It then produces the same bindings (more or less) in the format | |
# suitable for a personal DefaultKeyBinding.dict file. | |
use strict; | |
use warnings; | |
use autodie; | |
use feature qw(unicode_strings state); | |
use Getopt::Long qw(:config require_order no_bundling); | |
use Tree::Trie; | |
use File::Slurp qw(write_file); | |
$main::keysymdef = qq($ENV{'HOME'}/.local/lib/keysymdef.h); | |
$main::dumpsyms = 0; | |
$main::dumpevents = 0; | |
$main::multikey = 0xf70c; # F9 key as mapped by Apple | |
$main::output = ''; | |
# Note: I ignore the keysyms numbers proper and instead I grab the Unicode | |
# point from the comments later on the line. The two coincide *only* in | |
# the Latin-1 range, and the Unicode data is what MacOS in fact expects | |
# in keybindings. If others had success with grabbing the keysym numbers | |
# it was by sheer luck. | |
sub parse_keysymdef { | |
our $keysymdef; | |
open(my $fh, '<', $keysymdef); | |
my (%keysyms, $kw, $xk, $kn, $cs, $uc); | |
while (<$fh>) { | |
($kw, $xk, $kn, $cs, $uc) = split; | |
next unless defined $uc && | |
$kw eq '#define' && | |
$kn =~ m(^0x[0-9a-f]*$)i && | |
$cs eq '/*'; | |
next unless $xk =~ m(^XK_(\S+)$); my $name = $1; | |
next unless $uc =~ m(^U[+]([0-9a-f]{4})$)i; my $code = hex($1); | |
$keysyms{$name} = $code; | |
} | |
$fh->close(); | |
return \%keysyms; | |
} | |
sub dump_keysyms { | |
my ($keysyms) = @_; | |
while (my ($k, $v) = each %{$keysyms}) { | |
$v = '0x' . sprintf('%04x', $v); | |
print($k, ' ', $v, "\n"); | |
} | |
} | |
# The check for dead keys in fact does nothing, as all bindings | |
# which contain dead key events are eliminated by other conditions already. | |
# But I include it anyway to be safe. | |
sub decode_event { | |
my ($keysyms, $e) = @_; | |
return undef if $e =~ m(^dead_); | |
my $rawcode = ( | |
$e =~ m(^U([0-9a-f]{4})$)i ? hex($1) : undef | |
); | |
return ${$keysyms}{$e} // $rawcode; | |
} | |
# By the time this is called, Multi_key is remapped to the pseudo-Unicode | |
# of the desired compose keystroke. This happens in main(). | |
sub parse_xcompose { | |
my %eventmap; | |
my ($keysyms) = @_; | |
for my $xcompose (@ARGV) { | |
open(my $fh, '< :encoding(UTF-8)', $xcompose); | |
while (<$fh>) { | |
my @fields = split; next unless @fields; | |
next unless $fields[0] eq '<Multi_key>'; | |
my @events = ( ); | |
push(@events, $1), shift(@fields) | |
while @fields && $fields[0] =~ m(^<(.*)>$); | |
next unless $#events; | |
@events = map { decode_event($keysyms, $_) } @events; | |
next if grep { not defined($_) } @events; | |
next unless @fields && $fields[0] eq ':'; shift(@fields); | |
# Note the dot is NOT quantified on the next line. | |
# Some bindings in the X11 Compose file ARE multi-character | |
# but that is WEIRD | |
next unless @fields && $fields[0] =~ m(^"(.)"$); | |
my $result = ($1 eq q(\\\\) ? q(\\) : $1); | |
my $eventrep = join(' ', map { '0x' . sprintf('%04x', $_) } @events); | |
$eventmap{$eventrep} = $result; | |
} | |
$fh->close(); | |
} | |
return \%eventmap; | |
} | |
sub dump_eventmap { | |
my ($eventmap) = @_; | |
while (my ($eventrep, $result) = each %{$eventmap}) { | |
print($eventrep, ' : "', $result, '"', "\n"); | |
} | |
} | |
sub build_trie { | |
my ($eventmap) = @_; | |
my $eventtrie = Tree::Trie->new; | |
while (my ($eventrep, $result) = each %{$eventmap}) { | |
my $eventstr = join('', map { chr(hex($_)) } (split(' ', $eventrep))); | |
$eventtrie->add_data($eventstr => $result); | |
} | |
return $eventtrie; | |
} | |
sub wrap_result { | |
my ($result) = @_; | |
return ["insertText:", $result]; | |
} | |
sub tail { | |
my ($subject) = @_; | |
$subject =~ m(^.*(.)$) | |
or die(qq(Boom! Tail of an empty string\n)); | |
return $1; | |
} | |
sub build_multihash { | |
our ($multikey); | |
my ($eventtrie) = @_; | |
my %multihash = (); | |
my @que = (\%multihash, chr($multikey)); | |
QUE: | |
while (@que) { | |
my ($thishash, $prefix) = (shift(@que), shift(@que)); | |
my @suffixes = $eventtrie->lookup($prefix, 1); | |
die(qq(Error: ambiguous prefix ${prefix}\n)) | |
if ($#suffixes && grep { $_ eq '' } @suffixes); | |
my $label = tail($prefix); | |
if ($suffixes[0] eq '') { | |
my $result = $eventtrie->lookup_data($prefix); | |
$thishash->{$label} = wrap_result($result); | |
next QUE; | |
} | |
my $newhash = { }; | |
$thishash->{$label} = $newhash; | |
push(@que, $newhash, $prefix . $_) for @suffixes; | |
} | |
return \%multihash; | |
} | |
# I tried two CPAN packages for the task of translating complex data | |
# into the plist/dict format. Mac::PropertyList requires the inner parts | |
# of a data structure to be already translated before joining them | |
# on the outer level, which is a nonstarter. Data::PropertyList can | |
# translate multilevel structures in one go, but insists on dumping | |
# literal, unescaped string data. That may work (I have not checked) | |
# but looks way too scary with all those weird non-Latin characters | |
# in there. | |
# So I was left with implementing this myself. Luckily for this application | |
# the structure is very restricted (for example I know that arrayrefs | |
# only contain strings) so I can take shortcuts. | |
sub unicode_escape_char { | |
my ($c) = @_; | |
return "\\U" . uc (sprintf('%04x', ord($c))); | |
} | |
sub plist_escape_char { | |
my ($c) = @_; | |
return $c if ord($c) > 0x20 && ord($c) < 0x7f | |
&& $c !~ m([@~#"'^]) && $c ne '$' && $c ne '\\'; | |
return unicode_escape_char($c); | |
} | |
# The parentheses around map are necessary, | |
# otherwise the subsequent quote is taken as another datum for map | |
sub plist_show_string { | |
my ($s) = @_; | |
state %cached; | |
$cached{$s} = join('', | |
'"', | |
(map { plist_escape_char($_) } split('', $s)), | |
'"' | |
) unless defined $cached{$s}; | |
return $cached{$s}; | |
} | |
sub plist_show_arrayref { | |
my ($a) = @_; | |
return '(' . join(', ', map { plist_show_string($_) } @{$a}) . ')'; | |
} | |
sub plist_show_anyref { | |
my ($r, $l) = @_; | |
return plist_show_arrayref($r) if ref($r) =~ m(^ARRAY); | |
return plist_show_hashref($r, $l) if ref($r) =~ m(^HASH); | |
die(qq(Unexpected reference type\n")); | |
} | |
sub plist_show_binding { | |
my ($k, $v, $l) = @_; | |
return ( | |
plist_show_string($k) | |
. ' = ' . | |
plist_show_anyref($v, $l + 2) | |
. ';' | |
); | |
} | |
sub plist_show_hashref { | |
my ($h, $l) = @_; | |
my $sep = "\n" . (' ' x $l); | |
return ( | |
'{' . $sep . | |
join($sep, map { plist_show_binding($_, $h->{$_}, $l) } sort(keys(%{$h}))) | |
. "\n" . (' ' x ($l - 2)) . '}' | |
); | |
} | |
sub main { | |
our ($keysymdef, $dumpsyms, $dumpevents, $multikey, $output); | |
GetOptions( | |
'keysyms=s' => \$keysymdef, | |
'dumpsyms' => \$dumpsyms, | |
'dumpevents' => \$dumpevents, | |
'multikey=i' => \$multikey, | |
'output=s' => \$output | |
) or die(qq(Error in command line arguments\n)); | |
my $keysyms = parse_keysymdef(); | |
$keysyms->{'Multi_key'} = $multikey; | |
dump_keysyms($keysyms), return 1 if $dumpsyms; | |
die(qq(At least one input file must be specified\n)) unless @ARGV; | |
my $eventmap = parse_xcompose($keysyms); | |
dump_eventmap($eventmap), return 1 if $dumpevents; | |
my $eventtrie = build_trie($eventmap); | |
my $multihash = build_multihash($eventtrie); | |
print(plist_show_hashref($multihash, 2)), return 1 unless $output; | |
write_file($output, plist_show_hashref($multihash, 2)); | |
return 1; | |
} | |
main(); | |
__END__ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment