Skip to content

Instantly share code, notes, and snippets.

@rafaelpezzuto
Created June 10, 2021 00:05
Show Gist options
  • Save rafaelpezzuto/6bb35664590d89f34049266ec9129ec1 to your computer and use it in GitHub Desktop.
Save rafaelpezzuto/6bb35664590d89f34049266ec9129ec1 to your computer and use it in GitHub Desktop.
check last records - SciELO OAI-PMH
import requests.exceptions as r_exceptions
import urllib3.exceptions as u_exceptions
from articlemeta.client import RestfulClient
from datetime import datetime, timedelta
from sickle import Sickle
from sickle.oaiexceptions import NoRecordsMatch
URL_STATIC_PDF_FILES = 'http://%s/static_pdf_files.txt'
URL_OAI_PMH = 'http://%s/oai/scielo-oai.php'
COLLECTIONS = ['col',]
UCOLS = ['bol', 'cri', 'rve', 'psi', 'ury']
def main():
am = RestfulClient()
active_collections = [c for c in am.collections()]
date_list = [datetime.now() - timedelta(days=x) for x in range(1000)]
for ac in active_collections:
if ac['acron'] in COLLECTIONS:
print(ac['acron'], URL_OAI_PMH % ac['domain'])
oai_client = Sickle(URL_OAI_PMH % ac['domain'], max_retries=1, verify=False)
exists = False
for dl in date_list:
if not exists:
print(ac['acron'], 'getting records', dl.strftime('%Y-%m-%d'))
counter = 0
try:
for r in oai_client.ListRecords(**{'metadataPrefix': 'oai_dc', 'from': dl.strftime('%Y-%m-%d')}):
counter += 1
if counter >= 1:
print(ac['acron'], 'getting records', dl.strftime('%Y-%m-%d'), r.header.identifier)
exists = True
break
except NoRecordsMatch:
pass
except u_exceptions.NewConnectionError:
pass
except u_exceptions.MaxRetryError:
pass
except r_exceptions.ConnectionError:
pass
except u_exceptions.TimeoutError:
pass
except r_exceptions.ReadTimeout:
pass
except r_exceptions.HTTPError:
pass
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment