Skip to content

Instantly share code, notes, and snippets.

@sethmlarson
Created January 29, 2024 17:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sethmlarson/05d49e7cb98b6a833cd28481f39b0817 to your computer and use it in GitHub Desktop.
Save sethmlarson/05d49e7cb98b6a833cd28481f39b0817 to your computer and use it in GitHub Desktop.
Simple script for constructing small XAR files (License: CC0-1.0)
"""
Simple script for constructing small XAR files.
License: CC0-1.0
"""
import datetime
import gzip
import hashlib
import io
import struct
import xml.etree.ElementTree as ET
import zlib
def main():
with open("a.pkg", "wb") as f:
f.truncate()
f.write(b"xar!")
xar = ET.Element("xar")
toc = ET.SubElement(xar, "toc")
ET.SubElement(toc, "creation-time").text = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
checksum = ET.SubElement(toc, "checksum")
checksum.attrib["style"] = "sha1"
ET.SubElement(checksum, "offset").text = "0"
ET.SubElement(checksum, "size").text = "20"
toc_data = io.BytesIO()
heap_data = io.BytesIO()
heap_data.write(b"\xFF" * 20) # Make space for SHA1 checksum, we fill it in later.
file_ids = 0
files = [("a", b"x" * 100), ("b", b"x" * 100), ("c", b"x" * 100), ("d/1/2/3/a", b"x" * 10)]
for filename, data in files:
compressed_data = gzip.compress(data)
# Create subdirectories, if necessary.
root_el = toc
if "/" in filename:
*parts, filename = filename.split("/")
for part in parts:
part_el = ET.SubElement(root_el, "file")
part_el.attrib["id"] = str(file_ids)
file_ids += 1
ET.SubElement(part_el, "type").text = "directory"
ET.SubElement(part_el, "name").text = part
root_el = part_el
file = ET.SubElement(root_el, "file")
file.attrib["id"] = str(file_ids)
file_ids += 1
ET.SubElement(file, "type").text = "file"
ET.SubElement(file, "name").text = filename
file_data = ET.SubElement(file, "data")
ET.SubElement(file_data, "length").text = str(len(compressed_data))
ET.SubElement(file_data, "offset").text = str(heap_data.tell())
ET.SubElement(file_data, "size").text = str(len(data))
ET.SubElement(file_data, "encoding").attrib["style"] = "application/x-gzip"
ext_checksum = ET.SubElement(file_data, "extracted-checksum")
ext_checksum.attrib["style"] = "sha1"
ext_checksum.text = hashlib.sha1(data).hexdigest()
arc_checksum = ET.SubElement(file_data, "archived-checksum")
arc_checksum.attrib["style"] = "sha1"
arc_checksum.text = hashlib.sha1(compressed_data).hexdigest()
heap_data.write(compressed_data)
xar_tree = ET.ElementTree(xar)
xar_tree.write(toc_data)
toc_bytes = toc_data.getvalue()
toc_bytes_compressed = zlib.compress(toc_bytes)
heap_data.seek(0, 0)
heap_data.write(hashlib.sha1(toc_bytes_compressed).digest())
heap_data.seek(2, 0)
header_length = 28
format_version = 1
toc_compressed_length = len(toc_bytes_compressed)
toc_uncompressed_length = len(toc_bytes)
checksum_alg = 1
header_bytes = struct.pack(
">HHQQI",
header_length,
format_version,
toc_compressed_length,
toc_uncompressed_length,
checksum_alg
)
assert len(header_bytes) + 4 == header_length
f.write(header_bytes)
f.write(toc_bytes_compressed)
f.write(heap_data.getvalue())
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment