Skip to content

Instantly share code, notes, and snippets.

@zambonin
Last active August 17, 2023 17:34
Show Gist options
  • Save zambonin/1d29a5613a579da08965fcab84f33780 to your computer and use it in GitHub Desktop.
Save zambonin/1d29a5613a579da08965fcab84f33780 to your computer and use it in GitHub Desktop.
Downloads all attachments from all issues of a GitLab project.
#!/usr/bin/env sh
# shellcheck disable=SC2086
# A POSIX-compliant shell script that downloads all uploaded attachments from
# GitLab issue descriptions and comments for a single project, following the
# longstanding open issue [1]. It reads several environment variables to send
# the requests correctly via the GitLab API.
#
# * `GITLAB_INSTANCE`: the host of the target GitLab instance;
# * `PROJECT_ID`: the unique project ID for the instance;
# * `PRIVATE_TOKEN`: a personal access token [2] with enough permissions;
# * `GITLAB_COOKIE`: the `_gitlab_session` cookie;
# * `SCRAPE_NOTES`: optionally, set to 1 to also process comments.
#
# For instance, if a file `.env` is configured with the variables above, it can
# be used with the command below.
#
# $ env $(cat .env | xargs) sh /download-attachments-from-issues-gitlab.sh
#
# Dependencies: curl 7.66+.
#
# [1] https://gitlab.com/gitlab-org/gitlab/-/issues/24155
# [2] https://docs.gitlab.com/ee/user/profile/personal_access_tokens.html
our_curl() {
curl --silent --insecure --header "PRIVATE-TOKEN: ${PRIVATE_TOKEN}" "$@"
}
if [ -z "$GITLAB_INSTANCE" ] || [ -z "$PROJECT_ID" ] \
|| [ -z "$PRIVATE_TOKEN" ] || [ -z "$GITLAB_COOKIE" ] ; then
exit 1
fi
API_LINK="https://${GITLAB_INSTANCE}/api/v4/projects/${PROJECT_ID}"
FILE_PATHS=""
CURRENT_PAGE="$API_LINK/issues?per_page=100"
while
CONTENT="$(our_curl --write-out "\n%header{link}" "$CURRENT_PAGE")"
FILES="$(echo "$CONTENT" | grep -Po "/uploads/[0-9a-f]{32}/.*?\)")"
FILE_PATHS="$(printf "%s\n%s" "$FILE_PATHS" "$FILES")"
if [ "$SCRAPE_NOTES" = 1 ] ; then
NOTES="$(echo "$CONTENT" \
| awk 'BEGIN { RS = ","; FS = "\"" }
{
if ($2 == "user_notes_count")
len = split($0, b, ":")
if ($2 == "notes" && b[len] > 0)
n[$(NF - 1)] = b[len]
}
END { for (a in n) print a }')"
NOTES_FILES="$(our_curl --parallel $NOTES \
| grep -Po "/uploads/[0-9a-f]{32}/.*?\)")"
FILE_PATHS="$(printf "%s\n%s" "$FILE_PATHS" "$NOTES_FILES")"
fi
CURRENT_PAGE="$(echo "$CONTENT" | awk '/next/ {print $2}' RS="," FS="[><]")"
if [ -z "$CURRENT_PAGE" ] ; then
break
fi
do
continue
done
if [ -z "$FILE_PATHS" ] ; then
exit
fi
DIRNAME="files-issues-${GITLAB_INSTANCE}-${PROJECT_ID}"
mkdir -p "$DIRNAME"
EXISTING_FILES="$(find "$DIRNAME" -type f -printf "%P\n" | sort)"
WEB_URL="$(our_curl "$API_LINK" \
| awk '/web_url/ {print $(NF - 1); exit}' RS="," FS="\"")"
FILE_LINKS="$(echo "$FILE_PATHS" \
| awk -v url="$WEB_URL" '
NF > 0 {
$NF = ""
len = split($0, a, "/")
print url $0 "-o " a[len - 1] "-" a[len]
}' FS=")" \
| sort -u)"
TO_DOWNLOAD="$(for FILE in $(comm -23 \
<(echo "$FILE_LINKS" | awk '{print $NF}') \
<(echo "$EXISTING_FILES")) ; do
echo "$FILE_LINKS" | grep "$FILE"
done)"
if [ -z "$TO_DOWNLOAD" ] ; then
exit
fi
curl --insecure --cookie "_gitlab_session=${GITLAB_COOKIE}" --parallel \
--output-dir "$DIRNAME" $TO_DOWNLOAD
echo "Files downloaded to $DIRNAME"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment