Skip to content

Instantly share code, notes, and snippets.

@lg
Last active April 22, 2024 04:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lg/5a0e82742936659c5c36cd96f45c505b to your computer and use it in GitHub Desktop.
Save lg/5a0e82742936659c5c36cd96f45c505b to your computer and use it in GitHub Desktop.
Download channels from Discord and save as a webarchive for offline reading
#!/bin/bash
#
# Make sure to have a .env with `DISCORD_TOKEN=your_token_here` in the same directory as this script.
# Get your token as per: https://github.com/Tyrrrz/DiscordChatExporter/blob/master/.docs/Token-and-IDs.md
dce() {
docker run -it --rm --env-file .env --volume "$(pwd)/out":/out tyrrrz/discordchatexporter:latest "$@"
}
ffmpeg() {
docker run --rm --volume "$(pwd)/out":/out lscr.io/linuxserver/ffmpeg:latest "$@"
}
# Ensure all files/tools exist
if [ ! -e .env ] || ! command -v docker >/dev/null 2>&1 || ! command -v webarchiver >/dev/null 2>&1; then
echo "You need a .env file with DISCORD_TOKEN in it, and you must have docker + webarchiver installed"
exit 1
fi
# Input for what to download
echo "Getting guilds"
dce guilds
echo "Which guild id would you like to download from?"; read -r guild_id
dce channels --guild "$guild_id"
echo "Which channel id?"; read -r channel_id
echo "Download from what timestamp/messageid (0 for beginning)?"; read -r start_id
echo "What name should we assign to the download (no spaces)?"; read -r name
# Download
dce export --media --reuse-media --media-dir media --markdown True --format HtmlDark --locale Pacific \
--channel "$channel_id" --after "$start_id" --output "$name.html"
echo "Converting videos to animated gifs" # Needed for webarchive for iOS/iPadOS
videos=$(grep -oE 'src=media/[^"]*.mp4' "out/$name.html")
errors=0
if [[ -n "$videos" ]]; then
while IFS= read -r match; do
mp4_filename_with_media=$(echo "$match" | cut -d'=' -f2)
gif_filename_with_media="${mp4_filename_with_media%.*}.gif"
if ! ffmpeg -nostdin -hide_banner -loglevel error -y -i "/out/$mp4_filename_with_media" -vf "scale=320:-1,fps=10" "/out/$gif_filename_with_media"; then errors=1; fi
sed -i "" "s|<video.*$match.*</video>|<img class=chatlog__embed-generic-gifv src=\"$gif_filename_with_media\" \/>|g" "out/$name.html"
done <<< "$videos"
fi
# Remove lazy loading and generate webarchive files and remove html files
echo "Fixing image urls and converting to webarchive"
sed -i "" 's/loading="*lazy"*//g' "out/$name.html" # Disable lazyloading so webarchiver downloads everything
sed -i "" -e ':a' -e 's/\(src=[^ >]*\)%/\1@!@25/g; t a' "out/$name.html" # dce has a bug when % is in the url
sed -i "" -e ':a' -e 's/\(src=[^ >]*\)@!@/\1%/g; t a' "out/$name.html" # replace the intermediary @!@ with % to prevent infinite loops
echo "Converting to webarchive"
if ! webarchiver -url "out/$name.html" -output "./out/$name.webarchive"; then errors=1; fi
[ "$errors" -eq 0 ] && rm "out/$name.html" || echo "There were errors, not deleting html file out/$name.html"
echo "Completed"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment