Skip to content

Instantly share code, notes, and snippets.

@sabatale
Last active April 18, 2022 20:01
Show Gist options
  • Save sabatale/5eb36733446dfa2adb34e05605d4ef0e to your computer and use it in GitHub Desktop.
Save sabatale/5eb36733446dfa2adb34e05605d4ef0e to your computer and use it in GitHub Desktop.
Wayback machine - Automatic save with Google Scripts
function wayback() {
var url = 'https://www.google.com/';
// Does it already exist?
var archive = JSON.parse(getContent_('https://archive.org/wayback/available?url=' + url, {}));
if (archive) {
var closest = archive.archived_snapshots.closest;
if (closest && closest.available) {
console.log(closest.url); // Yes? Here is the saved page URL.
} else {
var archiveLink = "https://web.archive.org/save/" + url + '/?capture_all=on'; // No? Capture the page.
var formData = {
'url': url // The component "+ url + '/?capture_all=on" in archiveLink is not necessary.
};
var options = { // Do not specify the Content-Type.
method: 'POST',
payload: formData
}
var save = UrlFetchApp.fetch(archiveLink, options);
Utilities.sleep(8000); // Not the best. Wait 8sec for Wayback to save the page.
var save_arch = JSON.parse(getContent_('https://archive.org/wayback/available?url=' + url, {}));
if (save_arch) {
var closest = save_arch.archived_snapshots.closest;
if (closest && closest.available) {
console.log(closest.url); // Here is the saved page URL.
} else {
console.log("No archive link found.");
}
} else {
console.log("Failed while checking saved wayback.");
}
}
} else {
console.log("Failed while checking wayback.");
}
}
function getContent_(url, options) { // For fun.
var result = UrlFetchApp.fetch(url, options);
if (result.getResponseCode() == 200) {
result = result.getContentText();
return result;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment