Skip to content

Instantly share code, notes, and snippets.

@paiv
Created February 26, 2023 10:53
Show Gist options
  • Save paiv/969fad62f9a3803607e2099fce4f9cd5 to your computer and use it in GitHub Desktop.
Save paiv/969fad62f9a3803607e2099fce4f9cd5 to your computer and use it in GitHub Desktop.
AppleKeyboardLayouts.bundle structure analysis
#!/usr/bin/env python
import string
import struct
import subprocess
from pathlib import Path
# https://developer.apple.com/documentation/coreservices/uckeyboardlayout
# /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks/CoreServices.framework/Versions/A/Frameworks/CarbonCore.framework/Versions/A/Headers/UnicodeUtilities.h
_DefaultBundle = Path('/System/Library/Keyboard Layouts/AppleKeyboardLayouts.bundle')
_DefaultInput = None
for fn in (_DefaultBundle / 'Contents/Resources').glob('*.dat'):
_DefaultInput = fn
class Profiler:
def __init__(self):
self.root = None
class Node:
def __init__(self, start, stop, prev, next):
self.start = start
self.stop = stop
self.prev = prev
self.next = next
if prev:
prev.next = self
if next:
next.prev = self
def touch(self, start, stop):
if start == stop: return
p = self.root
if p is None:
node = self.Node(start, stop, None, None)
self.root = node
return node
while p:
if p.start == start and p.stop == stop:
return p
# if p.stop > start and p.start <= stop:
# raise Exception((start, stop))
if p.stop <= start and (p.next is None or p.next.start >= stop):
break
p = p.next
else:
raise Exception((start, stop))
n = p.next
return self.Node(start, stop, p, n)
def dump_tree(self):
p = self.root
while p:
print((p.start, p.stop))
p = p.next
def blanks_iter(self):
p = self.root
i = 0
while p:
if i < p.start:
yield (i, p.start, p.prev)
i = p.stop
p = p.next
class DatReader:
def __init__(self, data):
if isinstance(data, bytes):
self.data = data
elif isinstance(data, (str, Path)):
self.data = Path(data).read_bytes()
else:
raise Exception(repr(data))
self.profiler = Profiler()
self.profiler.touch(0,4)
magic, = struct.unpack('<I', self.data[0:4])
if magic != 0xabcdef02:
raise Exception(f'Unexpected file format, magic 0x{magic:08x}')
class Entry:
def __init__(self, pid, name, flags, locale, flags2, layout, icon, mods, imods, unkn):
self.pid = pid
self.name = name
self.flags = flags
self.flags2 = flags2
self.locale = locale
self.layout = layout
self.icon = icon
self.mods = mods
self.imods = imods
self.unkn = unkn
def __str__(self):
return f'({self.pid}) {self.name!r} [{self.flags:b}] {self.locale!r} [{self.flags2:b}]'
class EntriesIterator:
def __init__(self, data, total, offset, entries, somes):
self.data = data
self.total = total
self.offset = offset
self.entries = entries
self.somes = somes
self.pos = 0
self.profiler = None
def __next__(self):
if self.pos >= self.total:
raise StopIteration()
i = self.entries + self.pos * 8
j = self.somes + self.pos * 8
self.pos += 1
sp,jj = struct.unpack('<II', self.data[i:i+8])
qp,xx = struct.unpack('<II', self.data[j:j+8])
name = self._reads(sp)
self.profiler.touch(i, i+8)
self.profiler.touch(j, j+8)
i = self.offset + jj * 64
self.profiler.touch(i,i+64)
z,sp,eid,flags,cp,f2,ln,lp,xn,xp,wn,wp,mn,mp,pn,pp = struct.unpack('<IIi13I', self.data[i:i+64])
_ = self._reads(sp)
locale = self._reads(cp)
layout = self.data[lp:lp+ln]
xxs = self.data[xp:xp+xn*2]
icon = self.data[wp:wp+wn]
mods = self.data[mp:mp+mn]
imods = self.data[pp:pp+pn]
self.profiler.touch(lp, lp+ln)
self.profiler.touch(xp, xp+xn*2)
self.profiler.touch(wp, wp+wn)
self.profiler.touch(mp, mp+mn)
self.profiler.touch(pp, pp+pn)
return DatReader.Entry(eid, name, flags, locale, f2, layout, icon, mods, imods, xxs)
def _reads(self, offset):
if not offset: return
n = self.data[offset:offset+260].find(b'\x00')
if n <= 0: return
self.profiler.touch(offset, offset+n+1)
s = self.data[offset:offset+n]
if all(c in range(32, 127) for c in s):
s = s.decode('ascii')
return s
def __iter__(self):
self.profiler.touch(4,24)
num,off,_,hs,xs = struct.unpack('<5I', self.data[4:24])
it = self.EntriesIterator(self.data, num, off, hs, xs)
it.profiler = self.profiler
return it
class Dumper:
def __init__(self, target):
self.root = Path(target) if target is not None else None
abc = set(string.ascii_letters + string.digits + '-_')
sto = ''.join(x if x in abc else '_' for x in map(chr,range(256)))
rr = ''.join(map(chr, range(256)))
self._nametr = str.maketrans(rr, sto)
def dump(self, entry):
if not self.root: return
fentry = self.root / self._fname(entry)
fentry.mkdir(parents=True, exist_ok=True)
so = ''
so += f'pid:{entry.pid}\n'
so += f'name:{entry.name}\n'
so += f'locale:{entry.locale}\n'
so += f'flags1:{entry.flags:b}\n'
so += f'flags2:{entry.flags2:b}\n'
with (fentry / 'info.txt').open('w') as fp:
fp.write(so)
if entry.icon:
with (fentry / 'icon.icns').open('wb') as fp:
fp.write(entry.icon)
if entry.layout:
with (fentry / 'layout.bin').open('wb') as fp:
fp.write(entry.layout)
if entry.mods:
self._write_plst(entry.mods, fentry / 'mods.plist')
if entry.imods:
self._write_plst(entry.imods, fentry / 'imods.plist')
if entry.unkn:
with (fentry / 'extra.bin').open('wb') as fp:
fp.write(entry.unkn)
def _fname(self, entry):
if (s := entry.name):
if isinstance(s, str):
return s.translate(self._nametr)
return f'pid-({entry.pid})'
def _write_plst(self, data, target):
subprocess.run(['plutil', '-convert', 'xml1', '-o', str(target), '-'], input=data)
def main(input, output, verbose):
reader = DatReader(input)
writer = Dumper(output)
for entry in reader:
if verbose:
print(entry)
writer.dump(entry)
if verbose:
def refs(data, addr):
s = struct.pack('<I', addr)
i = -1
while True:
i = data.find(s, i+1)
if i < 0: break
yield i
for a,b,prev in reader.profiler.blanks_iter():
if any(reader.data[a:b]):
print()
print(hexdump(reader.data, a, b))
if prev:
print('refs:', list(refs(reader.data, a)))
# print('refs:', list(refs(reader.data, prev.start)))
def _make_tr_ascii():
abc = bytes(range(256))
res = [ord('.')] * len(abc)
for i in range(32, 127):
res[i] = i
return bytes.maketrans(abc, bytes(res))
_trascii = _make_tr_ascii()
def hexdump(data, start=None, stop=None, cols=16, chars=True):
if stop is None:
if start is None:
start = 0
stop = len(data)
else:
stop = start
start = 0
so = ''
for off in range(start, stop, cols):
raw = data[off:min(off+cols, stop)]
so += f'{off:06x}: '
s = ' '.join(f'{x:02x}' for x in raw)
so += s
if chars:
so += ' ' * (cols * 3 - 1 - len(s))
so += ' ' + ''.join(raw.translate(_trascii).decode())
so += '\n'
return so.rstrip('\n')
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('input', metavar='DAT', nargs=('?' if _DefaultInput else 1), default=_DefaultInput, help='input DAT file' + (f' ({_DefaultInput.name})' if _DefaultInput else ''))
parser.add_argument('-o', '--output', help='target directory for dumped data')
parser.add_argument('-v', '--verbose', action='store_true', help='verbose output')
args = parser.parse_args()
main(
input=args.input,
output=args.output,
verbose=args.verbose
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment