Skip to content

Instantly share code, notes, and snippets.

@mark-cooper
Last active July 18, 2018 23:02
Show Gist options
  • Save mark-cooper/6ca2a5f3604ee90321818088f23c3b21 to your computer and use it in GitHub Desktop.
Save mark-cooper/6ca2a5f3604ee90321818088f23c3b21 to your computer and use it in GitHub Desktop.
ArchivesSpace: basic script to download exported files to `exports` folder in current directory.
#!/usr/bin/env python3
'''
Basic script to download exported files to `exports` folder in current
directory. It will only download if a file does not exist or the remote
file is updated and "newer" than an existing local file. A remote file
with deleted status will be removed if it exists in the local directory.
SETUP:
python3 required [adjust python3 and pip3 refs per system setup]
pip3 install python-dateutil
pip3 install requests[security]
USAGE:
./exports.py \
--location=https://archivesspace.lyrasistechnology.org/files/exports/
# pdf
./exports.py \
--location=https://archivesspace.lyrasistechnology.org/files/exports/ \
--manifest=manifest_ead_pdf.csv
# basic auth
./exports.py \
--location=https://archivesspace.lyrasistechnology.org/files/exports/ \
--username=demo-files \
--password=abc123
'''
import csv
import os
import requests
import sys
import time
from argparse import ArgumentParser
from dateutil import parser as date_parser
from urllib.parse import urljoin
def parse_date(updated_at):
return int(date_parser.parse(updated_at).timestamp())
parser = ArgumentParser()
parser.add_argument(
'-l',
'--location',
help='location (url) of exports folder',
default='https://archivesspace.lyrasistechnology.org/files/exports/'
)
parser.add_argument(
'-m',
'--manifest',
help='manifest',
default='manifest_ead_xml.csv'
)
parser.add_argument('-u', '--username', help='username', default='')
parser.add_argument('-p', '--password', help='password', default='')
cwd = os.getcwd()
outputd = os.path.join(cwd, 'exports')
# args parser
args = parser.parse_args()
manifest = args.manifest
manifest_url = urljoin(args.location, manifest)
username = args.username
password = args.password
if not os.path.exists(outputd):
os.makedirs(outputd)
print('Downloading manifest: %s' % manifest)
response = requests.get(manifest_url, auth=(username, password))
if not response.ok:
sys.exit('Failed to download manifest: ' + manifest_url)
with open(os.path.join(outputd, manifest), 'wb') as f:
f.write(response.content)
with open(os.path.join(outputd, manifest), 'r') as csvfile:
reader = csv.DictReader(csvfile, quotechar='"')
for row in reader:
deleted = True if row['deleted'].lower() == 'true' else False
filename = row['filename']
url = row['location']
updated_at = parse_date(row["updated_at"])
file = os.path.join(outputd, filename)
exists = os.path.exists(file)
if exists and deleted:
print('Deleting file: %s' % filename)
os.remove(file)
continue
mtime = os.path.getmtime(
file) if exists else time.mktime(time.gmtime(0))
if int(mtime) < updated_at:
print('Downloading file: %s' % filename)
response = requests.get(
url,
auth=(username, password),
)
with open(file, 'wb') as f:
f.write(response.content)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment