Skip to content

Instantly share code, notes, and snippets.

@brienna
Created October 9, 2018 01:56
Show Gist options
  • Save brienna/c8d199e26793acfcab1bc38994b32839 to your computer and use it in GitHub Desktop.
Save brienna/c8d199e26793acfcab1bc38994b32839 to your computer and use it in GitHub Desktop.
Check arXiv metadata
import boto3, configparser, os, botocore
def download_file(key):
"""
Downloads given filename from source bucket to destination directory.
Parameters
----------
key : str
Name of file to download
"""
# Ensure src directory exists
if not os.path.isdir('src'):
os.makedirs('src')
# Download file
print('\nDownloading s3://arxiv/{} to {}...'.format(key, key))
try:
s3resource.meta.client.download_file(
Bucket='arxiv',
Key=key, # name of file to download from
Filename=key, # path to file to download to
ExtraArgs={'RequestPayer':'requester'})
except botocore.exceptions.ClientError as e:
if e.response['Error']['Code'] == "404":
print('ERROR: ' + key + " does not exist in arxiv bucket")
if __name__ == '__main__':
"""Runs if script is called on command line"""
# Download manifest file to current directory
download_file('src/arXiv_src_manifest.xml')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment