Skip to content

Instantly share code, notes, and snippets.

@staticfloat
Created August 17, 2022 18:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save staticfloat/2837ec6fa761f2766c7027cd5f4761bb to your computer and use it in GitHub Desktop.
Save staticfloat/2837ec6fa761f2766c7027cd5f4761bb to your computer and use it in GitHub Desktop.
Audit script to print out which packages are using the most artifacts in an environment
using Pkg, Artifacts, TOML, Base.BinaryPlatforms, Printf
# First, collect the list of packages available in this environment
function collect_pkg_roots()
ctx = Pkg.Types.Context()
curr_proj_dir = dirname(Base.active_project())
pkg_roots = Dict{String,String}(
basename(curr_proj_dir) => curr_proj_dir,
)
for (_, pkg) in ctx.env.manifest.deps
pkg_root = Pkg.Operations.source_path(Base.active_project(), pkg)
if pkg_root !== nothing
if haskey(pkg_roots, pkg.name)
error("Two packages with the same name '$(pkg.name)'?!")
end
pkg_roots[pkg.name] = pkg_root
end
end
return pkg_roots
end
pkg_roots = collect_pkg_roots()
# Next, find all Artifacts.toml files that exist in these Pkg roots:
function collect_artifacts_tomls(pkg_roots)
artifacts_tomls = Dict{String,String}()
for (pkg_name, pkg_root) in pkg_roots
for fname in Artifacts.artifact_names
toml_path = joinpath(pkg_root, fname)
if isfile(toml_path)
artifacts_tomls[pkg_name] = toml_path
end
end
end
return artifacts_tomls
end
artifacts_tomls = collect_artifacts_tomls(pkg_roots)
# Next, collect all artifact hashes mentioned in the artifacts tomls:
function collect_artifacts(artifacts_tomls)
pkg_artifacts = Dict{String,Dict{String,Vector{Dict}}}()
# Iterate over packages
for (pkg_name, artifacts_toml) in artifacts_tomls
artifacts_dict = Artifacts.load_artifacts_toml(artifacts_toml)
# Iterate over all the different artifacts named in the Artifacts.toml
for (artifact_name, artifacts_list) in artifacts_dict
# Deal with platform-less artifacts
if !isa(artifacts_list, Vector)
artifacts_list = [artifacts_list]
end
# Each artifact can have multiple bindings (e.g. multiple platforms)
for artifact in artifacts_list
# If this artifact exists on-disk in our depot, record it!
if Artifacts.artifact_exists(Base.SHA1(artifact["git-tree-sha1"]))
if !haskey(pkg_artifacts, pkg_name)
pkg_artifacts[pkg_name] = Dict{String,Vector{Dict}}()
end
if !haskey(pkg_artifacts[pkg_name], artifact_name)
pkg_artifacts[pkg_name][artifact_name] = Dict[]
end
push!(pkg_artifacts[pkg_name][artifact_name], artifact)
end
end
end
end
return pkg_artifacts
end
pkg_artifacts = collect_artifacts(artifacts_tomls)
# Borrowed from Pkg.gc(); we should maybe split this out since it's useful
function recursive_dir_size(path)
size = 0
try
for (root, dirs, files) in walkdir(path)
for file in files
path = joinpath(root, file)
try
size += lstat(path).size
catch ex
@error("Failed to calculate size of $path", exception=ex)
end
end
end
catch ex
@error("Failed to calculate size of $path", exception=ex)
end
return size
end
function pretty_byte_str(size)
bytes, mb = Base.prettyprint_getunits(size, length(Base._mem_units), Int64(1024))
return @sprintf("%.3f %s", bytes, Base._mem_units[mb])
end
function print_summary_sizes(pkg_artifacts)
# Iterate over packages, counting up their summary size:
pkg_sizes = Dict{String,Dict}(
pkg_name => Dict("count" => 0, "total_size" => 0) for (pkg_name, _) in pkg_artifacts
)
artifact_sizes = Dict{String,Dict}(
artifact_name => Dict("count" => 0, "total_size" => 0) for (_, artifacts_list) in pkg_artifacts for (artifact_name, _) in artifacts_list
)
for (pkg_name, artifacts_list) in pkg_artifacts
for (artifact_name, artifacts) in artifacts_list
for artifact in artifacts
path = Artifacts.artifact_path(Base.SHA1(artifact["git-tree-sha1"]))
size = recursive_dir_size(path)
pkg_sizes[pkg_name]["count"] += 1
pkg_sizes[pkg_name]["total_size"] += size
artifact_sizes[artifact_name]["count"] += 1
artifact_sizes[artifact_name]["total_size"] += size
end
end
end
# Sort by size:
pkg_names_by_size = sort(collect(keys(pkg_artifacts)), by = pkg -> pkg_sizes[pkg]["total_size"]; rev=true)
for pkg_name in pkg_names_by_size
println("$(pkg_name)[$(pkg_sizes[pkg_name]["count"])] - $(pretty_byte_str(pkg_sizes[pkg_name]["total_size"]))")
for (artifact_name, _) in pkg_artifacts[pkg_name]
println(" - $(artifact_name)[$(artifact_sizes[artifact_name]["count"])] - $(pretty_byte_str(artifact_sizes[artifact_name]["total_size"]))")
end
end
end
print_summary_sizes(pkg_artifacts)
@staticfloat
Copy link
Author

Example usage:

$ julia --project=. -e 'import Pkg; Pkg.add(["Gtk"])'
....
$ julia --project=. -e 'include("environment_artifact_audit.jl")'
Xorg_libX11_jll[1] - 51.291 MiB
 - Xorg_libX11[1] - 51.291 MiB
GTK3_jll[1] - 39.551 MiB
 - GTK3[1] - 39.551 MiB
adwaita_icon_theme_jll[1] - 22.570 MiB
 - adwaita_icon_theme[1] - 22.570 MiB
Librsvg_jll[1] - 21.411 MiB
 - Librsvg[1] - 21.411 MiB
iso_codes_jll[1] - 18.197 MiB
 - iso_codes[1] - 18.197 MiB
Glib_jll[1] - 15.513 MiB
 - Glib[1] - 15.513 MiB
Gettext_jll[1] - 15.373 MiB
 - Gettext[1] - 15.373 MiB
Xorg_libxcb_jll[1] - 11.389 MiB
 - Xorg_libxcb[1] - 11.389 MiB
Zstd_jll[4] - 7.874 MiB
 - Zstd[4] - 7.874 MiB
Libgcrypt_jll[2] - 6.545 MiB
 - Libgcrypt[2] - 6.545 MiB
Libtiff_jll[1] - 6.364 MiB
 - Libtiff[1] - 6.364 MiB
XML2_jll[1] - 5.444 MiB
 - XML2[1] - 5.444 MiB
Cairo_jll[1] - 4.041 MiB
 - Cairo[1] - 4.041 MiB
gdk_pixbuf_jll[1] - 3.807 MiB
 - gdk_pixbuf[1] - 3.807 MiB
Libiconv_jll[2] - 3.315 MiB
 - Libiconv[2] - 3.315 MiB
Libepoxy_jll[1] - 3.261 MiB
 - Libepoxy[1] - 3.261 MiB
PCRE_jll[1] - 3.058 MiB
 - PCRE[1] - 3.058 MiB
JpegTurbo_jll[1] - 2.911 MiB
 - JpegTurbo[1] - 2.911 MiB
HarfBuzz_jll[1] - 2.711 MiB
 - HarfBuzz[1] - 2.711 MiB
XSLT_jll[2] - 2.422 MiB
 - XSLT[2] - 2.422 MiB
Libgpg_error_jll[2] - 2.288 MiB
 - Libgpg_error[2] - 2.288 MiB
FreeType2_jll[1] - 1.606 MiB
 - FreeType2[1] - 1.606 MiB
Pango_jll[1] - 1.541 MiB
 - Pango[1] - 1.541 MiB
ATK_jll[1] - 1.447 MiB
 - ATK[1] - 1.447 MiB
Fontconfig_jll[1] - 1.301 MiB
 - Fontconfig[1] - 1.301 MiB
libpng_jll[1] - 1.003 MiB
 - libpng[1] - 1.003 MiB
Bzip2_jll[1] - 1013.525 KiB
 - Bzip2[1] - 1013.525 KiB
Pixman_jll[1] - 586.403 KiB
 - Pixman[1] - 586.403 KiB
LZO_jll[1] - 475.406 KiB
 - LZO[1] - 475.406 KiB
LERC_jll[1] - 469.931 KiB
 - LERC[1] - 469.931 KiB
Wayland_protocols_jll[1] - 465.663 KiB
 - Wayland_protocols[1] - 465.663 KiB
Expat_jll[1] - 424.294 KiB
 - Expat[1] - 424.294 KiB
FriBidi_jll[1] - 376.050 KiB
 - FriBidi[1] - 376.050 KiB
Graphite2_jll[1] - 322.448 KiB
 - Graphite2[1] - 322.448 KiB
Xorg_libXdmcp_jll[1] - 302.636 KiB
 - Xorg_libXdmcp[1] - 302.636 KiB
Xorg_xtrans_jll[1] - 241.450 KiB
 - Xorg_xtrans[1] - 241.450 KiB
Xorg_libXau_jll[1] - 134.889 KiB
 - Xorg_libXau[1] - 134.889 KiB
Libffi_jll[1] - 122.527 KiB
 - Libffi[1] - 122.527 KiB
hicolor_icon_theme_jll[1] - 74.348 KiB
 - hicolor_icon_theme[1] - 74.348 KiB
Xorg_libpthread_stubs_jll[1] - 5.074 KiB
 - Xorg_libpthread_stubs[1] - 5.074 KiB

Note that each package and artifact is, in general, installing only a single artifact. However, if we were to force-download artifacts from other platforms for GTK3_jll, for instance:

pkg> add GTK3_jll
julia> using Pkg, GTK3_jll, Base.BinaryPlatforms
       for platform in [Platform(arch, "linux") for arch in ("x86_64", "i686", "aarch64")]
           Pkg.Artifacts.ensure_all_artifacts_installed(joinpath(dirname(dirname(Base.pathof(GTK3_jll))), "Artifacts.toml"); platform, include_lazy=true)
       end
$ julia --project=. -e 'include("environment_artifact_audit.jl")'
GTK3_jll[4] - 254.088 MiB
 - GTK3[4] - 254.088 MiB
Xorg_libX11_jll[1] - 51.291 MiB
 - Xorg_libX11[1] - 51.291 MiB
....

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment