Skip to content

Instantly share code, notes, and snippets.

@hughlilly
Created April 1, 2020 23:22
Show Gist options
  • Save hughlilly/660738c4e80ceaaa373b2fbefb40a232 to your computer and use it in GitHub Desktop.
Save hughlilly/660738c4e80ceaaa373b2fbefb40a232 to your computer and use it in GitHub Desktop.
Check AM API IDs to see if they exist
import csv
import requests
import pandas
import time
from collections import namedtuple
from tqdm import tqdm
# Takes a .csv of API IDs, which are manipulated to URLs below
# Outputs a .csv
infile = "IDs.csv"
outfile = "results.csv"
# This header is for the output file
headers = ["url"]
with open(infile, mode='r') as f:
reader = csv.reader(f)
all_records = namedtuple('rec', next(reader))
records = [all_records._make(row) for row in reader]
with open(outfile, mode='w+') as o:
w = csv.writer(o)
# Write header row
w.writerow(headers)
num = len(records)
print("Checking {} records...\n".format(num))
with tqdm(total=num, bar_format="{percentage:3.0f}% {bar} [{n_fmt}/{total_fmt}] ", ncols=64) as pbar:
for r in records:
pbar.update(1)
# update the ".id" here according to the header of that column (or just rename the column to 'id')
identifier = r.id
# construct URL; update this according to the content type you're checking
url = "https://api.aucklandmuseum.com/id/humanhistory/object/" + identifier
# url = "https://api.aucklandmuseum.com/id/naturalsciences/object/" + identifier
# url = "https://api.aucklandmuseum.com/id/library/manuscriptsandarchives/" + identifier
req = requests.get(url, allow_redirects=False)
code = req.status_code
# only write URLs of 200 OK statuses
if code == 200:
w.writerow([url])
time.sleep(.25)
else:
time.sleep(.25)
# Construct a Pandas dataframe from the output, and report the number of entries (= number of 200 OKs).
df = pandas.read_csv(outfile)
print("Found {} 200 OK statuses".format(len(df)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment