Created
August 17, 2022 18:01
-
-
Save staticfloat/2837ec6fa761f2766c7027cd5f4761bb to your computer and use it in GitHub Desktop.
Audit script to print out which packages are using the most artifacts in an environment
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using Pkg, Artifacts, TOML, Base.BinaryPlatforms, Printf | |
# First, collect the list of packages available in this environment | |
function collect_pkg_roots() | |
ctx = Pkg.Types.Context() | |
curr_proj_dir = dirname(Base.active_project()) | |
pkg_roots = Dict{String,String}( | |
basename(curr_proj_dir) => curr_proj_dir, | |
) | |
for (_, pkg) in ctx.env.manifest.deps | |
pkg_root = Pkg.Operations.source_path(Base.active_project(), pkg) | |
if pkg_root !== nothing | |
if haskey(pkg_roots, pkg.name) | |
error("Two packages with the same name '$(pkg.name)'?!") | |
end | |
pkg_roots[pkg.name] = pkg_root | |
end | |
end | |
return pkg_roots | |
end | |
pkg_roots = collect_pkg_roots() | |
# Next, find all Artifacts.toml files that exist in these Pkg roots: | |
function collect_artifacts_tomls(pkg_roots) | |
artifacts_tomls = Dict{String,String}() | |
for (pkg_name, pkg_root) in pkg_roots | |
for fname in Artifacts.artifact_names | |
toml_path = joinpath(pkg_root, fname) | |
if isfile(toml_path) | |
artifacts_tomls[pkg_name] = toml_path | |
end | |
end | |
end | |
return artifacts_tomls | |
end | |
artifacts_tomls = collect_artifacts_tomls(pkg_roots) | |
# Next, collect all artifact hashes mentioned in the artifacts tomls: | |
function collect_artifacts(artifacts_tomls) | |
pkg_artifacts = Dict{String,Dict{String,Vector{Dict}}}() | |
# Iterate over packages | |
for (pkg_name, artifacts_toml) in artifacts_tomls | |
artifacts_dict = Artifacts.load_artifacts_toml(artifacts_toml) | |
# Iterate over all the different artifacts named in the Artifacts.toml | |
for (artifact_name, artifacts_list) in artifacts_dict | |
# Deal with platform-less artifacts | |
if !isa(artifacts_list, Vector) | |
artifacts_list = [artifacts_list] | |
end | |
# Each artifact can have multiple bindings (e.g. multiple platforms) | |
for artifact in artifacts_list | |
# If this artifact exists on-disk in our depot, record it! | |
if Artifacts.artifact_exists(Base.SHA1(artifact["git-tree-sha1"])) | |
if !haskey(pkg_artifacts, pkg_name) | |
pkg_artifacts[pkg_name] = Dict{String,Vector{Dict}}() | |
end | |
if !haskey(pkg_artifacts[pkg_name], artifact_name) | |
pkg_artifacts[pkg_name][artifact_name] = Dict[] | |
end | |
push!(pkg_artifacts[pkg_name][artifact_name], artifact) | |
end | |
end | |
end | |
end | |
return pkg_artifacts | |
end | |
pkg_artifacts = collect_artifacts(artifacts_tomls) | |
# Borrowed from Pkg.gc(); we should maybe split this out since it's useful | |
function recursive_dir_size(path) | |
size = 0 | |
try | |
for (root, dirs, files) in walkdir(path) | |
for file in files | |
path = joinpath(root, file) | |
try | |
size += lstat(path).size | |
catch ex | |
@error("Failed to calculate size of $path", exception=ex) | |
end | |
end | |
end | |
catch ex | |
@error("Failed to calculate size of $path", exception=ex) | |
end | |
return size | |
end | |
function pretty_byte_str(size) | |
bytes, mb = Base.prettyprint_getunits(size, length(Base._mem_units), Int64(1024)) | |
return @sprintf("%.3f %s", bytes, Base._mem_units[mb]) | |
end | |
function print_summary_sizes(pkg_artifacts) | |
# Iterate over packages, counting up their summary size: | |
pkg_sizes = Dict{String,Dict}( | |
pkg_name => Dict("count" => 0, "total_size" => 0) for (pkg_name, _) in pkg_artifacts | |
) | |
artifact_sizes = Dict{String,Dict}( | |
artifact_name => Dict("count" => 0, "total_size" => 0) for (_, artifacts_list) in pkg_artifacts for (artifact_name, _) in artifacts_list | |
) | |
for (pkg_name, artifacts_list) in pkg_artifacts | |
for (artifact_name, artifacts) in artifacts_list | |
for artifact in artifacts | |
path = Artifacts.artifact_path(Base.SHA1(artifact["git-tree-sha1"])) | |
size = recursive_dir_size(path) | |
pkg_sizes[pkg_name]["count"] += 1 | |
pkg_sizes[pkg_name]["total_size"] += size | |
artifact_sizes[artifact_name]["count"] += 1 | |
artifact_sizes[artifact_name]["total_size"] += size | |
end | |
end | |
end | |
# Sort by size: | |
pkg_names_by_size = sort(collect(keys(pkg_artifacts)), by = pkg -> pkg_sizes[pkg]["total_size"]; rev=true) | |
for pkg_name in pkg_names_by_size | |
println("$(pkg_name)[$(pkg_sizes[pkg_name]["count"])] - $(pretty_byte_str(pkg_sizes[pkg_name]["total_size"]))") | |
for (artifact_name, _) in pkg_artifacts[pkg_name] | |
println(" - $(artifact_name)[$(artifact_sizes[artifact_name]["count"])] - $(pretty_byte_str(artifact_sizes[artifact_name]["total_size"]))") | |
end | |
end | |
end | |
print_summary_sizes(pkg_artifacts) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Example usage:
Note that each package and artifact is, in general, installing only a single artifact. However, if we were to force-download artifacts from other platforms for
GTK3_jll
, for instance: