Skip to content

Instantly share code, notes, and snippets.

@Gregliest
Created July 3, 2021 03:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Gregliest/ce4a6c03c9443ee7449d787e480a4317 to your computer and use it in GitHub Desktop.
Save Gregliest/ce4a6c03c9443ee7449d787e480a4317 to your computer and use it in GitHub Desktop.
A quick and dirty script to find broken links in the Julia package registry, and then find any dependencies on those repos
import Pkg
using GitHub
using CSV
using DataFrames
using Dates
# Don't commit this once you add your auth token!!
GITHUB_AUTH = "" # INSERT your Github auth token
myauth = GitHub.authenticate(GITHUB_AUTH)
path = "" # INSERT the local path to your clone of the JuliaRegistries/General repo
# Build the registry as a dict with the following fields
# name
# path
# repo
# lastCommit
# stars
# description
function buildRegistry()
df = DataFrame(uuid = String[],
name=String[],
path=String[],
repo=String[],
lastCommit=DateTime[],
stars=Int[],
description=String[])
registry = Pkg.TOML.parsefile(joinpath(path, "Registry.toml"))
packages = registry["packages"]
errors = []
for (i, (uuid, dict)) in collect(enumerate(packages))
packageFile = Pkg.TOML.parsefile(joinpath(path, dict["path"], "Package.toml"))
repoURL = packageFile["repo"]
repo = replace(repoURL, "https://github.com/" => "")[1:end-4]
dict = copy(dict)
dict["uuid"] = uuid
dict["name"] = dict["name"]
dict["path"] = dict["path"]
dict["repo"] = repo
try
repo = GitHub.repo(repo, auth=myauth)
dict["stars"] = repo.stargazers_count
dict["lastCommit"] = repo.pushed_at
dict["description"] = repo.description == nothing ? "" : repo.description
push!(df, dict)
catch e
push!(errors, dict)
println(repo, e)
end
# Deal with Github rate limiting
if i % 1000 == 0
println("At ", i)
sleep(15*60)
end
end
return df, errors
end
df, errors = buildRegistry()
CSV.write("registry.csv", df) # Save in case we need to reload later
gitErrors = filter(d -> !contains(d["repo"], "gitlab"), (errors))
for e in gitErrors
println("https://github.com/" * e["repo"])
end
# Find dependencies
function findDependents(uuid, df)
filter(df) do row
dependencyFile = joinpath(path, row.path, "Deps.toml")
if !isfile(dependencyFile)
return false
end
dependencies = Pkg.TOML.parsefile(dependencyFile)
return any([uuid in values(version) for version in values(dependencies)])
end
end
for e in gitErrors
dependents = findDependents(e["uuid"], df)
println(e["name"])
println(dependents)
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment