Skip to content

Instantly share code, notes, and snippets.

@simonbru
Last active July 7, 2016 14:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save simonbru/b0cbe032f9711d1cfce41117dfd722a6 to your computer and use it in GitHub Desktop.
Save simonbru/b0cbe032f9711d1cfce41117dfd722a6 to your computer and use it in GitHub Desktop.
Script using curl and jq to backup reddit saved links/comments
#!/bin/bash -eu
# This script requires curl and jq>=1.5
export LC_ALL=C
curl='curl --user-agent RedditBackup -L'
fetch_saved() {
# Try to load credentials.conf if FEED_URL is not set
[[ ! -v FEED_URL && -e credentials.conf ]] && source credentials.conf
if ! [[ -v FEED_URL ]]; then
echo "Error: FEED_URL variable must be set in environment or in credentials.conf" >&2
exit 1
fi
output="$PWD/$1"
mkdir -p saved
local i=1
local after=""
while true; do
fname="saved/${i}.json"
$curl "${FEED_URL}&limit=100&after=${after}" -o "$fname"
# Ensure that .data.children is there
if ! jq -e <"$fname" -e '.data | has("children")'; then
echo "Error when retrieving saved links:"
jq . <"$fname"
exit 1
fi
if ! after="$(jq -r -e <$fname '.data.children[-1].data.name')"; then
break
fi
i=$(($i + 1))
done
# Merge entries in one file
(
cd saved;
jq -s '[ .[] | .data.children[] ]' $(ls *.json | sort -n) >"$output";
)
}
export_csv() {
local json_file="$1"
local query='.[].data | [.name, .subreddit, (.created_utc | todate), .title // .link_title, .body // ""] | @csv'
jq -r "$query" "$json_file"
}
trim_saved() {
local json_file="$1"
local query='[ .[] |
if .kind == "t3" then (.data | {
type: "post",
name, subreddit, score, num_comments,
created_utc_iso: (.created_utc | todate),
edited: (.edited | try todate catch null),
title, permalink, url
}) else (.data | {
type: "comment",
name, subreddit, score,
created_utc_iso: (.created_utc | todate),
edited: (.edited | try todate catch null),
link_title, body
}) end
]'
jq "$query" "$json_file"
}
usage() {
echo "Usage: $0 -s <output_file> | -c <input_json_file> | -h"
echo "-s <output_file>: Fetch reddit saved links and save JSON output"
echo "-c <input_json_file>: Convert saved JSON into trimmed CSV"
echo "-t <input_json_file>: Trim saved JSON to only keep essential fields (e.g. for readability)"
echo "-h: Show this help"
}
while getopts "s:c:t:h" opt; do
case $opt in
s)
fetch_saved "$OPTARG"
;;
c)
export_csv "$OPTARG"
;;
t)
trim_saved "$OPTARG"
;;
h)
usage
;;
\?)
usage >&2
exit 1
;;
esac
done
if [[ $# == 0 ]]; then
usage >&2
exit 1
fi
# Saved links URL can be found in reddit preferences
FEED_URL="https://www.reddit.com/saved.json?feed=xxxxx&user=yyyyy"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment