Skip to content

Instantly share code, notes, and snippets.

@staticfloat
Last active June 20, 2023 19:44
Show Gist options
  • Save staticfloat/3346ce5afba1ec3bc46e869b755782af to your computer and use it in GitHub Desktop.
Save staticfloat/3346ce5afba1ec3bc46e869b755782af to your computer and use it in GitHub Desktop.
Cache Scope - inspector for .ji cache rejections
using Pkg
using Base: PkgId, find_all_in_cache_path, parse_cache_header, isvalid_cache_header
using AbstractTrees
# build_id is composed of:
# - jl_module_build_id() in `src/module.c`, which is a 128-bit value
# split into two 64-bit chunks (`hi`, `lo`) which are initialized to (`0xffffffffffffffff`, `jl_hrtime()`)
# - The hi chunk gets the checksum written out to it in `src/staticdata.c`, it is itself split
# into two pieces, (`0xfafbfcfd`, `jl_crc32c(cache_data)`)
struct JICacheBuildId
magic::UInt32
crc::UInt32
timestamp::UInt64
end
split_checksum(checksum::UInt64) = (UInt32(checksum >> 32 & 0xffffffff), UInt32(checksum & 0xffffffff))
struct JICacheFileHeader
# The module defined within this cache file header
# NOTE: While the disk format allows a `.ji` to contain multiple modules in the header,
# in practice, I've only ever seen one module per `.ji` file, so I'm not sure why we
# allow multiples. In this struct, I'm making the assertion that we only ever allow
# a single module at a time.
mod::PkgId
# The build_id of this cache file
build_id::JICacheBuildId
# Files included by this `JI` file, which should be checked via timestamps
includes::Vector{Base.CacheHeaderIncludes}
# Modules that are required by `mod`. This gets given to us from `parse_cache_header()`
# as a mapping from source module to dependee module, however since we are asserting that
# there is only one module in each `.ji` file, we simplify this to just a list of dependee
# modules.
requires::Vector{PkgId}
others
#modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, flags
end
struct JICacheFile
path::String
header::Union{Nothing,JICacheFileHeader}
header_exception::Union{Exception,Nothing}
end
struct JICache
id::PkgId
files::Vector{JICacheFile}
deps::Vector{JICache}
end
function AbstractTrees.nodevalue(ji::JICache)
if !isempty(ji.files)
return string(
ji.id.name,
": ",
join([file.header_exception === nothing ? "✅" : "❌" for file in ji.files], ", "),
)
else
return ji.id.name
end
end
# By default, filter out stdlibs, etc...
AbstractTrees.children(ji::JICache) = [d for d in ji.deps if !isempty(d.files)]
# Keep these in sync with `staticdata_utils.c`
const JI_MAGIC = "\xfbjli\r\n\x1a\n"
const JI_FORMAT_VERSION = UInt16(12)
const BOM = UInt16(0xfeff)
const JULIA_VERSION_STRING = string(VersionNumber(
VERSION.major,
VERSION.minor,
VERSION.patch,
isempty(VERSION.prerelease) ? () : (first(VERSION.prerelease),),
))
function readstr_verify(io::IO, name::String, str::String; trailing_null::Bool = true)
str_units = collect(codeunits(str))
if trailing_null
push!(str_units, UInt8(0))
end
read_str = UInt8[]
for idx in 1:length(str_units)
b = read(io, UInt8)
push!(read_str, b)
if b != str_units[idx]
throw(ArgumentError("$(name) verification failed on index $(idx), (0x$(string(b, base=16, pad=2)) != 0x$(string(str_units[idx], base=16, pad=2)), expected string fragment: '$(str[1:idx])...', got '$(String(read_str))...'"))
return false
end
end
end
# Re-implement `jl_read_verify_header()` for greater error control
function jl_read_verify_header!(io::IO)
# Check the header file magic
readstr_verify(io, "JI_MAGIC", JI_MAGIC; trailing_null = false)
# Check the format version
format_version = read(io, UInt16)
if format_version != JI_FORMAT_VERSION
throw(ArgumentError("JI_FORMAT_VERSION mismatch! ($(format_version) != $(JI_FORMAT_VERSION))"))
end
# Check byte order marker
bom = read(io, UInt16)
if bom != BOM
throw(ArgumentError("Byte Order Marker mismatch! ($(bom) != $(BOM))"))
end
# Check word size
sizeof_void_ptr = read(io, UInt8)
if sizeof_void_ptr != sizeof(Ptr{Cvoid})
throw(ArgumentError("Pointer size mismatch! ($(sizeof_void_ptr) != $(sizeof(Ptr{Cvoid})))"))
end
# Check ARCH and KERNEL
readstr_verify(io, "JL_BUILD_UNAME", string(Sys.KERNEL))
readstr_verify(io, "JL_BUILD_ARCH", string(Sys.ARCH))
# Check our version string (note we had to manually chop off part of our prerelease)
readstr_verify(io, "JULIA_VERSION_STRING", JULIA_VERSION_STRING)
# Check our git version info
readstr_verify(io, "jl_git_branch", Base.GIT_VERSION_INFO.branch)
readstr_verify(io, "jl_git_commit", Base.GIT_VERSION_INFO.commit)
end
function try_parse_cache_header(path::String)
try
open(path; read=true) do io
# Verify that the header looks good
jl_read_verify_header!(io)
# Read in the rest of the pre-header:
pkgimage = read(io, UInt8)
magic, checksum = split_checksum(read(io, UInt64))
if checksum == 0
throw(ArgumentError("Checksum == 0"))
end
if magic != 0xfafbfcfd
throw(ArgumentError("Checksum magic invalid (0x$(string(magic, base=16, pad=8)) != 0xfafbfcfd)"))
end
datastartpos = read(io, Int64)
dataendpos = read(io, Int64)
#modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, flags = parse_cache_header(io)
modules, (includes, requires), others... = parse_cache_header(io)
# Assert that there is only one module in this `.ji` file
if length(modules) != 1
throw(ArgumentError("More than one module in `.ji` file?!"))
end
mod, build_id = only(modules)
build_id = JICacheBuildId(magic, checksum, build_id)
# Further assert that `requires` only maps from `mod`, and no other module:
for (should_be_mod, dependee) in requires
if should_be_mod != mod
throw(ArgumentError("Requires tracking dependency from $(should_be_mod) which should only be top-level module $(mod)"))
end
end
requires = [d for (m, d) in requires]
return JICacheFileHeader(mod, build_id, includes, requires, others), nothing
end
catch e
return nothing, e
end
end
function scope_cache_files!(; ctx = Pkg.Types.Context(), packages = ctx.env.project.deps, cache = Dict{PkgId,JICache}())
for (name, uuid) in packages
id = PkgId(uuid, name)
if id ∈ keys(cache)
continue
end
# First, recurse into all dependencies for this package
deps = ctx.env.manifest[uuid].deps
scope_cache_files!(; ctx, packages=deps, cache)
deps = vcat([cache[PkgId(uuid, name)] for (name, uuid) in deps]...)
# Next, load all of our potential `.ji` files
paths = find_all_in_cache_path(id)
files = map(paths) do path
header, exception = try_parse_cache_header(path)
return JICacheFile(path, header, exception)
end
cache[id] = JICache(id, files, deps)
end
return cache
end
function get_cache(cache, name)
for id in keys(cache)
if id.name == name
return cache[id]
end
end
return nothing
end
function AbstractTrees.print_tree(cache, name; maxdepth = 10, kwargs...)
data = get_cache(cache, name)
if data !== nothing
return print_tree(data; maxdepth, kwargs...)
else
return nothing
end
end
cache = scope_cache_files!();
print_tree(cache, "CPUSummary")
s = get_cache(cache, "CPUSummary")

Cache Scope

This is very incomplete; it got just far enough for me to find an error and submit a fix (with Jameson's help). Ideally, this would be pushed farther so that we can easily determine why something fails to load after precompilation has finished. My dream interface is for all .ji files to be inspected, matching ones to be selected, and if no matching .ji file can be found, the errant package is selected and all candidate .ji files can be shown, along with very verbose messages showing why that .ji file could not be loaded.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment