Skip to content

Instantly share code, notes, and snippets.

@paolobarbolini
Last active May 14, 2022 06:30
Show Gist options
  • Save paolobarbolini/8515d64f570b8989d02ad3242818f135 to your computer and use it in GitHub Desktop.
Save paolobarbolini/8515d64f570b8989d02ad3242818f135 to your computer and use it in GitHub Desktop.
An experiment at an idea of checking crates.io releases against the files on the corresponding git revision
#!/bin/bash
# An experiment at an idea of checking crates.io releases against the files
# on the corresponding git revision (and in the future maybe even complaining
# if there's no tag associated to it, or giving you a diff since the last release IDK)
#
# Depends on curl, jq, tar and diff
#
# I'm not very good at bash, so you'll have to excuse the mess :)
# Still very useless, as it doesn't handle workspaces
set -e
# IMPORTANT: change this before testing
# Also read the crates.io crawling policy: https://crates.io/policies#crawlers
user_agent="PaoloBarbolini's experimental crates checking script (paolo@paolo565.org)"
base_url="https://crates.io/api/v1/crates?sort=downloads&per_page=20&page="
mkdir -p cache/api/
mkdir -p cache/registry/
mkdir -p cache/git/
# Pagination support
for ((i=1;i<=1;i++)); do
echo "Page ${i}"
page_file="cache/api/page${i}.json"
if [ ! -e "$page_file" ]; then
# Be nice to the crates.io API :)
curl --fail -o "$page_file" -H "User-Agent: ${user_agent}" "${base_url}${i}"
# Lazy sleep to make sure we don't hit the API too hard
sleep 2s
fi
for crate in $(jq -cr ".crates[] | @base64" "$page_file"); do
crate=$(echo "${crate}" | base64 --decode)
name=$(jq -r '.name' <<< "$crate")
max_stable_version=$(jq -r '.max_stable_version' <<< "$crate")
repository=$(jq -r '.repository' <<< "$crate")
crate_dir="cache/registry/${name}/${max_stable_version}"
mkdir -p "$crate_dir"
crate_tar_gz_file="${crate_dir}/crate.tar.gz"
if [ ! -e "$crate_tar_gz_file" ]; then
curl --fail -o "$crate_tar_gz_file" -H "User-Agent: ${user_agent}" "https://static.crates.io/crates/${name}/${name}-${max_stable_version}.crate"
fi
crate_extract_dir="${crate_dir}/contents"
if [ ! -e "$crate_extract_dir" ]; then
mkdir -p "$crate_extract_dir"
# TODO: do we need any extra flags to make sure we aren't vulnerable to extracting outside of the folder?
tar -xzf "$crate_tar_gz_file" -C "$crate_extract_dir"
mv "${crate_extract_dir}/${name}-${max_stable_version}" "${crate_extract_dir}_tmp"
rm -r "$crate_extract_dir"
mv "${crate_extract_dir}_tmp" "$crate_extract_dir"
mv "${crate_extract_dir}/Cargo.toml.orig" "${crate_extract_dir}/Cargo.toml"
fi
crate_cargo_vcs_info="${crate_extract_dir}/.cargo_vcs_info.json"
if [ -e "$crate_cargo_vcs_info" ]; then
git_sha1=$(jq -r '.git.sha1' "$crate_cargo_vcs_info")
crate_repo_dir="cache/git/${name}"
if [ ! -e "$crate_repo_dir" ]; then
re="^https:\/\/(github.com|gitlab.com)\/([^\/]+)\/([^\/]+)\/.*"
if [[ $repository =~ $re ]]; then
hostname=${BASH_REMATCH[1]}
user=${BASH_REMATCH[2]}
repo=${BASH_REMATCH[3]}
repository="https://${hostname}/${user}/${repo}.git"
fi
# TODO: don't assume it's a git repository
git clone "$repository" "$crate_repo_dir"
cd "$crate_repo_dir"
git checkout "$git_sha1"
cd ../../../
fi
cd "$crate_extract_dir"
files=$(tree -if --noreport -F . | grep -v /$)
cd ../../../../../
for file in $files; do
file="${file:2}" # Remove the starting ./
if [ -f "${crate_extract_dir}/${file}" ]; then
if ! diff --strip-trailing-cr "${crate_extract_dir}/${file}" "${crate_repo_dir}/${file}" > /dev/null; then
echo "[${name}]: file $file doesn't match"
fi
else
echo "${crate_repo_dir}/${file}"
echo "[${name}]: file $file doesn't exist in the repo"
fi
done
else
echo "WARNING: ${name} has no cargo vcs info"
fi
done
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment