Last active
February 10, 2021 19:26
-
-
Save ruthtillman/fa7562989f299e4904c7fb0448d1fc83 to your computer and use it in GitHub Desktop.
A python script which allows one to send a set of URLs as a Python list to the Internet Archive. Prints a results file to let you know how it did. Sleeps 0.5 seconds between sending
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
# With gratitude to W. Caleb McDaniel and based on https://gist.github.com/wcaleb/218d210687546c8efd0a | |
import requests, datetime, time | |
from datetime import date | |
def backupURLS(urls): | |
today = date.today() | |
results = str(today) + '-results.txt' | |
base_url = 'http://web.archive.org' | |
for url in urls: | |
r = requests.get(base_url + '/save/' + url) | |
with open(results,'a') as resultsfile: | |
if r.status_code == requests.codes.ok: | |
resultsfile.write('Success: archived url' + url + '\n') | |
else: | |
resultsfile.write('Error in response: ' + str(r.status_code) + 'URL: ' + url + '\n') | |
time.sleep(0.5) | |
# REPLACE THESE WITH YOUR REAL URLS AS A LIST. | |
urls = ['http://example.org', 'http://example2.org'] | |
backupURLS(urls) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment