-
-
Save dingzeyuli/f07c126b74371adba4b7dbe181cb57d2 to your computer and use it in GitHub Desktop.
# http://webapps.stackexchange.com/questions/39587/view-estimated-size-of-github-repository-before-cloning | |
# tested on macOS | |
echo https://github.com/torvalds/linux.git | perl -ne 'print $1 if m!([^/]+/[^/]+?)(?:\.git)?$!' | xargs -I{} curl -s -k https://api.github.com/repos/'{}' | grep size | |
# output: | |
# "size": 1746294, |
curl -s https://api.github.com/repos/torvalds/linux | jq '.size' | numfmt --to=iec --from-unit=1024
What about private repos?
curl -s -H "Authorization: token GITHUB_TOKEN" https://api.github.com/repos/torvalds/linux | jq '.size' | numfmt --to=iec --from-unit=1024
curl -s -H "Authorization: Bearer <GITHUB_TOKEN>" https://api.github.com/repos/torvalds/linux | jq '.size' | numfmt --to=iec --from-unit=1024
I had to prefix Bearer last time I used this! 🎆
get_github_repo_size
#!/usr/bin/env -S bash -euo pipefail
get_github_repo_size() {
if [[ "$1" == "--help" || "$1" == "-h" ]]; then
echo "Usage: get_github_repo_size [OPTIONS] {GITHUB_REPO_URLS...}"
echo
echo "Retrieve and display the sizes of GitHub repositories."
echo "The function will automatically use the GITHUB_TOKEN if it's set as an environment variable."
echo
echo "Options:"
echo
echo " -h, --help Display this help message."
echo
echo "Environment variables:"
echo
echo " GITHUB_TOKEN Use the provided GitHub token for authentication."
echo
echo "Exit codes:"
echo
echo " 0 Success"
echo " 1 Fail"
echo
return
fi
local _i
local _err=0
for _i in "${@}" ; do
local _url="${_i}"
local _owner="$(basename "$(dirname "${_url}")")"
local _repo="$(basename "${_url}")"
local _domain="$(basename "$(dirname "$(dirname "${_url}")")")"
local _api_url="https://api.${_domain}/repos/${_owner}/${_repo}"
local _headers=""
if [ -n "$GITHUB_TOKEN" ]; then
# Use the provided GitHub token if available
local _headers="-H 'Authorization: Bearer $GITHUB_TOKEN'"
fi
# Fetch repository info, including size, in a single request
local _response=$(curl -sS $_headers "$_api_url")
# Check if the GitHub repo exists
if jq -e '.message' <<< "$_response" >/dev/null; then
# The API response contains a message field (indicating an error)
local _message=$(jq -r '.message' <<< "$_response")
>&2 printf 'error %s Failed to retrieve information about the GitHub repository: %s\n' "${_domain}/${_owner}/${_repo}" "${_message:-dns or http connection error}"
_err=1
else
# The GitHub repo exists; fetch and format its size
local _size=$(jq '.size' <<< "$_response")
if [ "$_size" != "null" ]; then
# Valid size found; format and print it
printf '%s %s\n' "$(numfmt --to=iec --from-unit=1024 <<< "$_size")" "${_domain}/${_owner}/${_repo}"
else
>&2 printf 'error %s Invalid size data retrieved.\n' "${_domain}/${_owner}/${_repo}"
_err=1
fi
fi
done
return ${_err}
}
Example working usage:
$ get_github_repo_size "https://github.com/git/git" "https://github.com/danielmiessler/SecLists"
expected output:
To standard output Stream stdout Only
229M github.com/git/git
916M github.com/danielmiessler/SecLists
Example failing usage:
$ get_github_repo_size "https://github.com/gitx/git" "https://github.com/git/gitx" "https://mygithub.net/git/git"
Expected output:
To standard error Stream stderr Only
error github.com/gitx/git Failed to retrieve information about the GitHub repository: Not Found
error github.com/git/gitx Failed to retrieve information about the GitHub repository: Not Found
curl: (6) Could not resolve host: api.mygithub.net
error mygithub.net/git/git Failed to retrieve information about the GitHub repository: dns or http connection error
I've developed a tool for checking GitHub repository sizes by leveraging the GitHub API.
You can find the tool here:
GitHub Repository Size Checker
I guess the size is in KBs?