This is very incomplete; it got just far enough for me to find an error and submit a fix (with Jameson's help).
Ideally, this would be pushed farther so that we can easily determine why something fails to load after precompilation has finished.
My dream interface is for all .ji
files to be inspected, matching ones to be selected, and if no matching .ji
file can be found, the errant package is selected and all candidate .ji
files can be shown, along with very verbose messages showing why that .ji
file could not be loaded.
Last active
June 20, 2023 19:44
-
-
Save staticfloat/3346ce5afba1ec3bc46e869b755782af to your computer and use it in GitHub Desktop.
Cache Scope - inspector for .ji cache rejections
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using Pkg | |
using Base: PkgId, find_all_in_cache_path, parse_cache_header, isvalid_cache_header | |
using AbstractTrees | |
# build_id is composed of: | |
# - jl_module_build_id() in `src/module.c`, which is a 128-bit value | |
# split into two 64-bit chunks (`hi`, `lo`) which are initialized to (`0xffffffffffffffff`, `jl_hrtime()`) | |
# - The hi chunk gets the checksum written out to it in `src/staticdata.c`, it is itself split | |
# into two pieces, (`0xfafbfcfd`, `jl_crc32c(cache_data)`) | |
struct JICacheBuildId | |
magic::UInt32 | |
crc::UInt32 | |
timestamp::UInt64 | |
end | |
split_checksum(checksum::UInt64) = (UInt32(checksum >> 32 & 0xffffffff), UInt32(checksum & 0xffffffff)) | |
struct JICacheFileHeader | |
# The module defined within this cache file header | |
# NOTE: While the disk format allows a `.ji` to contain multiple modules in the header, | |
# in practice, I've only ever seen one module per `.ji` file, so I'm not sure why we | |
# allow multiples. In this struct, I'm making the assertion that we only ever allow | |
# a single module at a time. | |
mod::PkgId | |
# The build_id of this cache file | |
build_id::JICacheBuildId | |
# Files included by this `JI` file, which should be checked via timestamps | |
includes::Vector{Base.CacheHeaderIncludes} | |
# Modules that are required by `mod`. This gets given to us from `parse_cache_header()` | |
# as a mapping from source module to dependee module, however since we are asserting that | |
# there is only one module in each `.ji` file, we simplify this to just a list of dependee | |
# modules. | |
requires::Vector{PkgId} | |
others | |
#modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, flags | |
end | |
struct JICacheFile | |
path::String | |
header::Union{Nothing,JICacheFileHeader} | |
header_exception::Union{Exception,Nothing} | |
end | |
struct JICache | |
id::PkgId | |
files::Vector{JICacheFile} | |
deps::Vector{JICache} | |
end | |
function AbstractTrees.nodevalue(ji::JICache) | |
if !isempty(ji.files) | |
return string( | |
ji.id.name, | |
": ", | |
join([file.header_exception === nothing ? "✅" : "❌" for file in ji.files], ", "), | |
) | |
else | |
return ji.id.name | |
end | |
end | |
# By default, filter out stdlibs, etc... | |
AbstractTrees.children(ji::JICache) = [d for d in ji.deps if !isempty(d.files)] | |
# Keep these in sync with `staticdata_utils.c` | |
const JI_MAGIC = "\xfbjli\r\n\x1a\n" | |
const JI_FORMAT_VERSION = UInt16(12) | |
const BOM = UInt16(0xfeff) | |
const JULIA_VERSION_STRING = string(VersionNumber( | |
VERSION.major, | |
VERSION.minor, | |
VERSION.patch, | |
isempty(VERSION.prerelease) ? () : (first(VERSION.prerelease),), | |
)) | |
function readstr_verify(io::IO, name::String, str::String; trailing_null::Bool = true) | |
str_units = collect(codeunits(str)) | |
if trailing_null | |
push!(str_units, UInt8(0)) | |
end | |
read_str = UInt8[] | |
for idx in 1:length(str_units) | |
b = read(io, UInt8) | |
push!(read_str, b) | |
if b != str_units[idx] | |
throw(ArgumentError("$(name) verification failed on index $(idx), (0x$(string(b, base=16, pad=2)) != 0x$(string(str_units[idx], base=16, pad=2)), expected string fragment: '$(str[1:idx])...', got '$(String(read_str))...'")) | |
return false | |
end | |
end | |
end | |
# Re-implement `jl_read_verify_header()` for greater error control | |
function jl_read_verify_header!(io::IO) | |
# Check the header file magic | |
readstr_verify(io, "JI_MAGIC", JI_MAGIC; trailing_null = false) | |
# Check the format version | |
format_version = read(io, UInt16) | |
if format_version != JI_FORMAT_VERSION | |
throw(ArgumentError("JI_FORMAT_VERSION mismatch! ($(format_version) != $(JI_FORMAT_VERSION))")) | |
end | |
# Check byte order marker | |
bom = read(io, UInt16) | |
if bom != BOM | |
throw(ArgumentError("Byte Order Marker mismatch! ($(bom) != $(BOM))")) | |
end | |
# Check word size | |
sizeof_void_ptr = read(io, UInt8) | |
if sizeof_void_ptr != sizeof(Ptr{Cvoid}) | |
throw(ArgumentError("Pointer size mismatch! ($(sizeof_void_ptr) != $(sizeof(Ptr{Cvoid})))")) | |
end | |
# Check ARCH and KERNEL | |
readstr_verify(io, "JL_BUILD_UNAME", string(Sys.KERNEL)) | |
readstr_verify(io, "JL_BUILD_ARCH", string(Sys.ARCH)) | |
# Check our version string (note we had to manually chop off part of our prerelease) | |
readstr_verify(io, "JULIA_VERSION_STRING", JULIA_VERSION_STRING) | |
# Check our git version info | |
readstr_verify(io, "jl_git_branch", Base.GIT_VERSION_INFO.branch) | |
readstr_verify(io, "jl_git_commit", Base.GIT_VERSION_INFO.commit) | |
end | |
function try_parse_cache_header(path::String) | |
try | |
open(path; read=true) do io | |
# Verify that the header looks good | |
jl_read_verify_header!(io) | |
# Read in the rest of the pre-header: | |
pkgimage = read(io, UInt8) | |
magic, checksum = split_checksum(read(io, UInt64)) | |
if checksum == 0 | |
throw(ArgumentError("Checksum == 0")) | |
end | |
if magic != 0xfafbfcfd | |
throw(ArgumentError("Checksum magic invalid (0x$(string(magic, base=16, pad=8)) != 0xfafbfcfd)")) | |
end | |
datastartpos = read(io, Int64) | |
dataendpos = read(io, Int64) | |
#modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash, clone_targets, flags = parse_cache_header(io) | |
modules, (includes, requires), others... = parse_cache_header(io) | |
# Assert that there is only one module in this `.ji` file | |
if length(modules) != 1 | |
throw(ArgumentError("More than one module in `.ji` file?!")) | |
end | |
mod, build_id = only(modules) | |
build_id = JICacheBuildId(magic, checksum, build_id) | |
# Further assert that `requires` only maps from `mod`, and no other module: | |
for (should_be_mod, dependee) in requires | |
if should_be_mod != mod | |
throw(ArgumentError("Requires tracking dependency from $(should_be_mod) which should only be top-level module $(mod)")) | |
end | |
end | |
requires = [d for (m, d) in requires] | |
return JICacheFileHeader(mod, build_id, includes, requires, others), nothing | |
end | |
catch e | |
return nothing, e | |
end | |
end | |
function scope_cache_files!(; ctx = Pkg.Types.Context(), packages = ctx.env.project.deps, cache = Dict{PkgId,JICache}()) | |
for (name, uuid) in packages | |
id = PkgId(uuid, name) | |
if id ∈ keys(cache) | |
continue | |
end | |
# First, recurse into all dependencies for this package | |
deps = ctx.env.manifest[uuid].deps | |
scope_cache_files!(; ctx, packages=deps, cache) | |
deps = vcat([cache[PkgId(uuid, name)] for (name, uuid) in deps]...) | |
# Next, load all of our potential `.ji` files | |
paths = find_all_in_cache_path(id) | |
files = map(paths) do path | |
header, exception = try_parse_cache_header(path) | |
return JICacheFile(path, header, exception) | |
end | |
cache[id] = JICache(id, files, deps) | |
end | |
return cache | |
end | |
function get_cache(cache, name) | |
for id in keys(cache) | |
if id.name == name | |
return cache[id] | |
end | |
end | |
return nothing | |
end | |
function AbstractTrees.print_tree(cache, name; maxdepth = 10, kwargs...) | |
data = get_cache(cache, name) | |
if data !== nothing | |
return print_tree(data; maxdepth, kwargs...) | |
else | |
return nothing | |
end | |
end | |
cache = scope_cache_files!(); | |
print_tree(cache, "CPUSummary") | |
s = get_cache(cache, "CPUSummary") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment