Created
January 25, 2020 14:40
-
-
Save miku/64543ca8c16d62a20fa4a7e34ba7bee9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Take a CSV from: | |
psql fatcat_prod -c "COPY (select id, doi, release_stage, work_ident_id from release_rev where extra_json->>'datacite' is not null) TO '/tmp/datacite-throwaway.csv' WITH CSV DELIMITER ',';" | |
and generate ingest requests, one per line. | |
""" | |
from __future__ import print_function | |
import json | |
import sys | |
import csv | |
if __name__ == '__main__': | |
filename = "/tmp/datacite-throwaway.csv" | |
if len(sys.argv) > 1: | |
filename = sys.argv[1] | |
with open(filename) as handle: | |
reader = csv.reader(handle, delimiter=',') | |
for row in reader: | |
release_id, doi, release_stage, work_id = row | |
if not doi: | |
continue | |
doc = { | |
'ingest_type': 'pdf', | |
'ingest_request_source': 'fatcat-ingest', | |
'link_source': 'doi', | |
'link_source_id': doi, | |
'base_url': 'https://doi.org/{}'.format(doi), | |
'ext_ids': { | |
'doi': doi, | |
}, | |
} | |
if release_stage: | |
doc['release_stage'] = release_stage | |
print(json.dumps(doc)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment