Created
July 3, 2021 03:19
-
-
Save Gregliest/ce4a6c03c9443ee7449d787e480a4317 to your computer and use it in GitHub Desktop.
A quick and dirty script to find broken links in the Julia package registry, and then find any dependencies on those repos
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Pkg | |
using GitHub | |
using CSV | |
using DataFrames | |
using Dates | |
# Don't commit this once you add your auth token!! | |
GITHUB_AUTH = "" # INSERT your Github auth token | |
myauth = GitHub.authenticate(GITHUB_AUTH) | |
path = "" # INSERT the local path to your clone of the JuliaRegistries/General repo | |
# Build the registry as a dict with the following fields | |
# name | |
# path | |
# repo | |
# lastCommit | |
# stars | |
# description | |
function buildRegistry() | |
df = DataFrame(uuid = String[], | |
name=String[], | |
path=String[], | |
repo=String[], | |
lastCommit=DateTime[], | |
stars=Int[], | |
description=String[]) | |
registry = Pkg.TOML.parsefile(joinpath(path, "Registry.toml")) | |
packages = registry["packages"] | |
errors = [] | |
for (i, (uuid, dict)) in collect(enumerate(packages)) | |
packageFile = Pkg.TOML.parsefile(joinpath(path, dict["path"], "Package.toml")) | |
repoURL = packageFile["repo"] | |
repo = replace(repoURL, "https://github.com/" => "")[1:end-4] | |
dict = copy(dict) | |
dict["uuid"] = uuid | |
dict["name"] = dict["name"] | |
dict["path"] = dict["path"] | |
dict["repo"] = repo | |
try | |
repo = GitHub.repo(repo, auth=myauth) | |
dict["stars"] = repo.stargazers_count | |
dict["lastCommit"] = repo.pushed_at | |
dict["description"] = repo.description == nothing ? "" : repo.description | |
push!(df, dict) | |
catch e | |
push!(errors, dict) | |
println(repo, e) | |
end | |
# Deal with Github rate limiting | |
if i % 1000 == 0 | |
println("At ", i) | |
sleep(15*60) | |
end | |
end | |
return df, errors | |
end | |
df, errors = buildRegistry() | |
CSV.write("registry.csv", df) # Save in case we need to reload later | |
gitErrors = filter(d -> !contains(d["repo"], "gitlab"), (errors)) | |
for e in gitErrors | |
println("https://github.com/" * e["repo"]) | |
end | |
# Find dependencies | |
function findDependents(uuid, df) | |
filter(df) do row | |
dependencyFile = joinpath(path, row.path, "Deps.toml") | |
if !isfile(dependencyFile) | |
return false | |
end | |
dependencies = Pkg.TOML.parsefile(dependencyFile) | |
return any([uuid in values(version) for version in values(dependencies)]) | |
end | |
end | |
for e in gitErrors | |
dependents = findDependents(e["uuid"], df) | |
println(e["name"]) | |
println(dependents) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment