-
-
Save renoirb/e9ae61ea30d5c554c8318cad5cff13a5 to your computer and use it in GitHub Desktop.
Archive news from WebPlatformDaily
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
## STEP 1 | |
# Make sure you have the following software on your machine | |
# | |
# curl: | |
# To make HTTP requests and write to a file received html | |
# | |
# gdate: | |
# On MacOS, it's required so you can generate an UNIX timestamp in miliseconds | |
# On Linux, the time command would work. Might look like date +%s.%3N. | |
## STEP 2 | |
# Get the list of date stamps so we can iterate | |
# NOTICE: http command below requires HTTPie | |
# curl -s https://webplatformdaily.org/json/releases-2016.json | jq -r '.releases[][0]' | |
# Format RELEASES array as shown below. | |
RELEASES=(\ | |
2016-12-16\ | |
2016-12-15\ | |
2016-01-12) | |
## STEP 3 | |
# Get to webplatformdaily.org, login with your account | |
# Go to the Inspector, get token=... value from Referer of the first request you get after logging in | |
# Adjust UID below to represent your email address. Encode @ as %40 | |
# Adjust COOKIE_STRING with current cookie jar you're sending on the site | |
TOKEN="..." | |
UID="self%40example.org" | |
COOKIE_STRING='__cfduid=a; connect.sid=b; _gat=c; _ga=d' | |
UA="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36" | |
REF="https://webplatformdaily.org/?token=${TOKEN}&uid=${UID}" | |
CT="text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8" | |
ENC="gzip, deflate, sdch, br" | |
L="en,en-US;q=0.8" | |
ITER=0 | |
MAX=9999 | |
for i in "${RELEASES[@]}" | |
do | |
: | |
if [[ ${ITER} -ge ${MAX} ]]; then | |
echo "Iter is greater or equal to ${MAX}" | |
exit 0 | |
fi | |
# Typical call on tab appends date like so: | |
# ?t=1485745262580 | |
NOW=$(/usr/local/bin/gdate +%s.%3N | sed -e 's/\.//') | |
URL="https://webplatformdaily.org/releases/${i}?t=${NOW}" | |
if [[ ! -d archive/webplatformdaily.org/releases/${i} ]]; then | |
mkdir -p archive/webplatformdaily.org/releases/${i}/ | |
$(curl -s -S "${URL}" -H 'pragma: no-cache' -H "accept-encoding: ${ENC}" -H 'x-requested-with: XMLHttpRequest' -H "accept-language: ${L}" -H "user-agent: ${UA}" -H 'accept: */*' -H 'cache-control: no-cache' -H 'authority: webplatformdaily.org' -H "cookie: ${COOKIE_STRING}" --compressed -o archive/webplatformdaily.org/releases/${i}/cache.html) | |
else | |
echo "File archive/webplatformdaily.org/releases/${i}.html already downloaded" | |
fi | |
((ITER = ITER + 1)) | |
echo "Honk ${ITER} at ${URL}" | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment