Skip to content

Instantly share code, notes, and snippets.

@jjjake
Last active March 16, 2017 23:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jjjake/28eab391c97a2ea2691c074447e99b09 to your computer and use it in GitHub Desktop.
Save jjjake/28eab391c97a2ea2691c074447e99b09 to your computer and use it in GitHub Desktop.
Audit GB Shipment
import json
def get_gb_counts(tsv):
counts = dict()
for line in open(tsv):
barcode = line.split('\t')[0].lower()
# Skip header row.
if barcode == 'barcode':
continue
# Add 1 to GB count because we make a copy of the best version.
flacs = int(line.split('\t')[1]) + 1
tifs = int(line.split('\t')[2])
# i.e. {'GBIA00000X': {'flacs': 9, 'tifs': 1}}
counts[barcode] = dict(flacs=flacs, tifs=tifs)
return counts
def audit_shipment(metadata_file, gb_counts):
"""Audit shipment with counts from GB.
BEFORE running this script, Generate a JSONL document of all metadata for
all items for the given shipment:
ia search 'collection:georgeblood AND shiptracking:15446' -p scope:all -i \
| parallel 'ia md {}' \
| pv -acbrl > 15446.jsonl
"""
for line in open(metadata_file):
j = json.loads(line.strip())
flacs = len([f for f in j['files'] if f['name'].endswith('.flac')])
tifs = len([f for f in j['files'] if f['format'] == 'TIFF'])
barcode = j['metadata']['collection-catalog-number'].lower()
assert flacs == gb_counts[barcode]['flacs']
assert tifs == gb_counts[barcode]['tifs']
print('success, audit complete!')
if __name__ == '__main__':
counts = get_gb_counts('/Users/archive/78rpm-utils/spreadsheets/iArchiveExport_FileCount_20170315.tsv')
audit_shipment('15446.jsonl', counts)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment