Last active
August 17, 2023 17:34
-
-
Save zambonin/1d29a5613a579da08965fcab84f33780 to your computer and use it in GitHub Desktop.
Downloads all attachments from all issues of a GitLab project.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env sh | |
# shellcheck disable=SC2086 | |
# A POSIX-compliant shell script that downloads all uploaded attachments from | |
# GitLab issue descriptions and comments for a single project, following the | |
# longstanding open issue [1]. It reads several environment variables to send | |
# the requests correctly via the GitLab API. | |
# | |
# * `GITLAB_INSTANCE`: the host of the target GitLab instance; | |
# * `PROJECT_ID`: the unique project ID for the instance; | |
# * `PRIVATE_TOKEN`: a personal access token [2] with enough permissions; | |
# * `GITLAB_COOKIE`: the `_gitlab_session` cookie; | |
# * `SCRAPE_NOTES`: optionally, set to 1 to also process comments. | |
# | |
# For instance, if a file `.env` is configured with the variables above, it can | |
# be used with the command below. | |
# | |
# $ env $(cat .env | xargs) sh /download-attachments-from-issues-gitlab.sh | |
# | |
# Dependencies: curl 7.66+. | |
# | |
# [1] https://gitlab.com/gitlab-org/gitlab/-/issues/24155 | |
# [2] https://docs.gitlab.com/ee/user/profile/personal_access_tokens.html | |
our_curl() { | |
curl --silent --insecure --header "PRIVATE-TOKEN: ${PRIVATE_TOKEN}" "$@" | |
} | |
if [ -z "$GITLAB_INSTANCE" ] || [ -z "$PROJECT_ID" ] \ | |
|| [ -z "$PRIVATE_TOKEN" ] || [ -z "$GITLAB_COOKIE" ] ; then | |
exit 1 | |
fi | |
API_LINK="https://${GITLAB_INSTANCE}/api/v4/projects/${PROJECT_ID}" | |
FILE_PATHS="" | |
CURRENT_PAGE="$API_LINK/issues?per_page=100" | |
while | |
CONTENT="$(our_curl --write-out "\n%header{link}" "$CURRENT_PAGE")" | |
FILES="$(echo "$CONTENT" | grep -Po "/uploads/[0-9a-f]{32}/.*?\)")" | |
FILE_PATHS="$(printf "%s\n%s" "$FILE_PATHS" "$FILES")" | |
if [ "$SCRAPE_NOTES" = 1 ] ; then | |
NOTES="$(echo "$CONTENT" \ | |
| awk 'BEGIN { RS = ","; FS = "\"" } | |
{ | |
if ($2 == "user_notes_count") | |
len = split($0, b, ":") | |
if ($2 == "notes" && b[len] > 0) | |
n[$(NF - 1)] = b[len] | |
} | |
END { for (a in n) print a }')" | |
NOTES_FILES="$(our_curl --parallel $NOTES \ | |
| grep -Po "/uploads/[0-9a-f]{32}/.*?\)")" | |
FILE_PATHS="$(printf "%s\n%s" "$FILE_PATHS" "$NOTES_FILES")" | |
fi | |
CURRENT_PAGE="$(echo "$CONTENT" | awk '/next/ {print $2}' RS="," FS="[><]")" | |
if [ -z "$CURRENT_PAGE" ] ; then | |
break | |
fi | |
do | |
continue | |
done | |
if [ -z "$FILE_PATHS" ] ; then | |
exit | |
fi | |
DIRNAME="files-issues-${GITLAB_INSTANCE}-${PROJECT_ID}" | |
mkdir -p "$DIRNAME" | |
EXISTING_FILES="$(find "$DIRNAME" -type f -printf "%P\n" | sort)" | |
WEB_URL="$(our_curl "$API_LINK" \ | |
| awk '/web_url/ {print $(NF - 1); exit}' RS="," FS="\"")" | |
FILE_LINKS="$(echo "$FILE_PATHS" \ | |
| awk -v url="$WEB_URL" ' | |
NF > 0 { | |
$NF = "" | |
len = split($0, a, "/") | |
print url $0 "-o " a[len - 1] "-" a[len] | |
}' FS=")" \ | |
| sort -u)" | |
TO_DOWNLOAD="$(for FILE in $(comm -23 \ | |
<(echo "$FILE_LINKS" | awk '{print $NF}') \ | |
<(echo "$EXISTING_FILES")) ; do | |
echo "$FILE_LINKS" | grep "$FILE" | |
done)" | |
if [ -z "$TO_DOWNLOAD" ] ; then | |
exit | |
fi | |
curl --insecure --cookie "_gitlab_session=${GITLAB_COOKIE}" --parallel \ | |
--output-dir "$DIRNAME" $TO_DOWNLOAD | |
echo "Files downloaded to $DIRNAME" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment